# Crime Detection Model Generator

In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import joblib

In [15]:
# Load and preprocess data
df = pd.read_csv('crime_data.csv')
df.columns = df.columns.str.strip()

df["Date"] = pd.to_datetime(df["Date"], errors="coerce", dayfirst=False)
df['hour'] = pd.to_datetime(df['Time'], format='%H:%M', errors='coerce').dt.hour.fillna(0).astype(int)
df['month'] = df['Date'].dt.month
df['day'] = df['Date'].dt.day
df['weekday'] = df['Date'].dt.weekday

In [16]:
# Encode categorical variables
le_location = LabelEncoder()
df['Location_encoded'] = le_location.fit_transform(df['Locality_Name'])
joblib.dump(le_location, 'location_encoder.pkl')

le_crime = LabelEncoder()
df['Crime_encoded'] = le_crime.fit_transform(df['Crime_Type'])
joblib.dump(le_crime, 'crime_encoder.pkl')

['crime_encoder.pkl']

In [17]:
# Feature selection and scaling
X = df[['Location_encoded', 'Crime_encoded', 'month', 'day', 'weekday', 'hour']].values
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, 'scaler.pkl')
X_scaled = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

In [18]:
# Build Conv1D + BiLSTM Autoencoder
input_layer = tf.keras.Input(shape=(6, 1))
x = tf.keras.layers.Conv1D(32, kernel_size=2, padding='same', activation='relu')(input_layer)
x = tf.keras.layers.MaxPooling1D(pool_size=2, padding='same')(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(x)
x = tf.keras.layers.UpSampling1D(size=2)(x)
x = tf.keras.layers.Conv1D(1, kernel_size=2, padding='same', activation='sigmoid')(x)

autoencoder = tf.keras.Model(inputs=input_layer, outputs=x)
from tensorflow.keras.losses import MeanSquaredError
autoencoder.compile(optimizer='adam', loss=MeanSquaredError())

In [19]:
# Train the model
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
autoencoder.fit(X_scaled, X_scaled, epochs=30, batch_size=32, verbose=1, callbacks=[early_stop])

Epoch 1/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - loss: 0.0340
Epoch 2/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - loss: 0.0013
Epoch 3/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 16ms/step - loss: 0.0011
Epoch 4/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step - loss: 9.0618e-04
Epoch 5/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 7.6850e-04
Epoch 6/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 6.5123e-04
Epoch 7/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 5.3424e-04
Epoch 8/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 4.6325e-04
Epoch 9/30
[1m1255/1255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 4.0226e-04
Epoch 10/30
[1m1255/1255[0m [32m━━━

<keras.src.callbacks.history.History at 0x27fb8e86950>

In [20]:
# Save model
autoencoder.save('conv_bilstm_autoencoder.h5')



In [21]:
# Predict and calculate reconstruction error
reconstructed = autoencoder.predict(X_scaled, verbose=0)
mse = np.mean(np.power(X_scaled - reconstructed, 2), axis=(1, 2))
df['reconstruction_error'] = mse

In [22]:
# Identify anomalies
threshold = np.percentile(mse, 95)
anomalies = df[df['reconstruction_error'] > threshold]
print("\nTop anomalies based on reconstruction error:")
print(anomalies[['Locality_Name', 'Crime_Type', 'Date', 'Time', 'reconstruction_error']].head())


Top anomalies based on reconstruction error:
    Locality_Name           Crime_Type                Date              Time  \
16     gachibowli         DRUG OFFENSE 2020-01-01 16:00:00  02-01-2020 02:57   
52     gachibowli  PUBLIC INTOXICATION 2020-01-03 04:00:00  03-01-2020 22:01   
59     Shamshabad          SHOPLIFTING 2020-01-03 11:00:00  04-01-2020 05:33   
74       Ameerpet           CYBERCRIME 2020-01-04 02:00:00  05-01-2020 01:45   
120      Ameerpet     VEHICLE - STOLEN 2020-01-06 00:00:00  06-01-2020 12:16   

     reconstruction_error  
16               0.000351  
52               0.000283  
59               0.000245  
74               0.000320  
120              0.000484  
