In [1]:
import numpy as np
import pickle
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler,LabelEncoder
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.cluster import KMeans

In [None]:
# Load and preprocess the crime data
crime_data = pd.read_excel("crime_data_2001_2014.xlsx")
crime_data=crime_data.rename(columns={'KIDNAPPING & ABDUCTION':'KIDNAPPING'})
crime_data=crime_data.rename(columns={'DOWRY DEATHS':'DOWRY_DEATHS'})
crime_data=crime_data.rename(columns={'OTHER CRIMES':'OTHER_CRIMES'})
crime_data=crime_data.rename(columns={'TOTAL IPC CRIMES':'TOTAL_CRIMES'})
crime_data

In [63]:
# Split the data into training and testing sets
train_size = int(len(crime_data) * 0.9)
test_size = len(crime_data) - train_size
train_data,test_data=crime_data[:train_size],crime_data[:test_size]

In [64]:
#Split train data into input and output variables
#Divide input into categorical and numerical features for scaling
x_train_s=train_data.iloc[:,0].values
x_train_d=train_data.iloc[:,1].values
x_train_n=train_data.iloc[:,2].values
y_train=train_data.iloc[:,3:].values

In [65]:
#Split test data into input and output variables
#Divide input into categorical and numerical features for scaling
x_test_s=test_data.iloc[:,0].values
x_test_d=test_data.iloc[:,1].values
x_test_n=test_data.iloc[:,2].values
y_test=test_data.iloc[:,3:].values


In [66]:
print(x_train_s.ndim)
print(x_train_d.ndim)
print(x_train_n.ndim)

1
1
1


In [67]:
#Label coding for state categorical features
state_encoder= LabelEncoder()
state_encoder.fit(x_train_s)
x_train_s=state_encoder.transform(x_train_s)
x_test_s=state_encoder.transform(x_test_s)

In [68]:
print(x_train_s.ndim)
print(x_train_d.ndim)
print(x_train_n.ndim)

1
1
1


In [52]:
x_train_d

array(['ADILABAD', 'ANANTAPUR', 'CHITTOOR', ..., 'NAYAGARH', 'NOWRANGPUR',
       'NUAPADA'], dtype=object)

In [69]:
#Label coding for district categorical features
district_encoder= LabelEncoder()
district_encoder.fit(x_train_d)
x_train_d=district_encoder.transform(x_train_d)
x_test_d=district_encoder.transform(x_test_d)

In [70]:
print(x_train_s.ndim)
print(x_train_d.ndim)
print(x_train_n.ndim)

1
1
1


In [54]:
district_encoder.transform(['CHENNAI'])

array([146])

In [55]:
# Save the stateEncoder object to a file
with open('state_encoder.pkl', 'wb') as file:
    pickle.dump(state_encoder, file)

In [56]:
# Save the stateEncoder object to a file
with open('district_encoder.pkl', 'wb') as file:
    pickle.dump(district_encoder, file)

In [57]:
# Reshape the 1D array into a 2D array with a single column
x_train_n= np.array(x_train_n).reshape(-1, 1)
x_test_n= np.array(x_test_n).reshape(-1, 1)
x_train_s= np.array(x_train_s).reshape(-1, 1)
x_test_s= np.array(x_test_s).reshape(-1, 1)
x_train_d= np.array(x_train_d).reshape(-1, 1)
x_test_d= np.array(x_test_d).reshape(-1, 1)

In [58]:
#Normalize the numerical feature in input using minmaxscaler
num_scaler = MinMaxScaler(feature_range=(0, 1))
num_scaler.fit(x_train_n)
x_train_n=num_scaler.transform(x_train_n)
x_test_n=num_scaler.transform(x_test_n)

In [59]:
# Save the scaler parameters to a numpy file
np.save('input_scaler.npy', [num_scaler.min_, num_scaler.scale_])

In [60]:
print(x_train_s.ndim)
print(x_train_d.ndim)
print(x_train_n.ndim)

2
2
2


In [61]:
x_train=np.concatenate((x_train_s,x_train_d,x_train_n), axis=1)
x_test=np.concatenate((x_test_s,x_test_d,x_test_n), axis=1)

In [27]:
#Normalize the numerical feature in output using minmaxscaler
result_scaler = MinMaxScaler(feature_range=(0, 1))
result_scaler.fit(y_train)
y_train=result_scaler.transform(y_train)
y_test=result_scaler.transform(y_test)

In [28]:
# Save the scaler parameters to a numpy file
np.save('scaler_params.npy', [result_scaler.min_, result_scaler.scale_])

In [29]:
# Reshape the input data to fit the LSTM model
x_train = np.reshape(x_train, (x_train.shape[0],1,x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0],1,x_test.shape[1]))

(1061, 3)

In [30]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(x_train.shape[1],x_train.shape[2])))
model.add(Dense(7))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
y_train

array([[0.01034372, 0.01153403, 0.00518135, ..., 0.00689061, 0.0194925 ,
        0.01772298],
       [0.01773209, 0.00530565, 0.00596981, ..., 0.00301464, 0.01807713,
        0.01759925],
       [0.01015098, 0.00622837, 0.00664564, ..., 0.00602929, 0.02596359,
        0.02482241],
       ...,
       [0.01143591, 0.00692042, 0.00619509, ..., 0.00559862, 0.00558223,
        0.00648506],
       [0.00276261, 0.01545559, 0.0079973 , ..., 0.        , 0.00700892,
        0.00657892],
       [0.00404754, 0.00553633, 0.00551926, ..., 0.00301464, 0.00446691,
        0.0046078 ]])

In [None]:
# Train the model
model.fit(x_train, y_train, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f15d5c44ee0>

In [None]:
# Make predictions
train_predict = model.predict(x_train)
test_predict = model.predict(x_test)



In [None]:
# Reshape the predicted outputs
train_predict = train_predict.reshape(-1, 7)
test_predict = test_predict.reshape(-1, 7)

In [None]:
y_test

array([[0.01034372, 0.01153403, 0.00518135, ..., 0.00689061, 0.0194925 ,
        0.01772298],
       [0.01773209, 0.00530565, 0.00596981, ..., 0.00301464, 0.01807713,
        0.01759925],
       [0.01015098, 0.00622837, 0.00664564, ..., 0.00602929, 0.02596359,
        0.02482241],
       ...,
       [0.00546097, 0.016609  , 0.00146429, ..., 0.0047373 , 0.02108338,
        0.02019754],
       [0.00636042, 0.01937716, 0.00202748, ..., 0.00387597, 0.01418203,
        0.01606331],
       [0.00359782, 0.00645905, 0.00225276, ..., 0.00215332, 0.00579736,
        0.00571282]])

In [None]:
# Inverse transform the predictions
train_predict =result_scaler.inverse_transform(train_predict)
y_train =result_scaler.inverse_transform(y_train)
test_predict = result_scaler.inverse_transform(test_predict)
y_test = result_scaler.inverse_transform(y_test)

In [None]:
print(y_test)

[[1.610e+02 5.000e+01 4.600e+01 ... 1.600e+01 3.443e+03 4.154e+03]
 [2.760e+02 2.300e+01 5.300e+01 ... 7.000e+00 3.193e+03 4.125e+03]
 [1.580e+02 2.700e+01 5.900e+01 ... 1.400e+01 4.586e+03 5.818e+03]
 ...
 [8.500e+01 7.200e+01 1.300e+01 ... 1.100e+01 3.724e+03 4.734e+03]
 [9.900e+01 8.400e+01 1.800e+01 ... 9.000e+00 2.505e+03 3.765e+03]
 [5.600e+01 2.800e+01 2.000e+01 ... 5.000e+00 1.024e+03 1.339e+03]]


In [None]:
print(int(test_predict[0][0]))

238


In [None]:
# Evaluate the model
train_score = np.sqrt(np.mean((train_predict - y_train) ** 2))
test_score = np.sqrt(np.mean((test_predict - y_test) ** 2))
print(f'Train Score: {train_score:.2f} RMSE')
print(f'Test Score: {test_score:.2f} RMSE')

Train Score: 8912.03 RMSE
Test Score: 7633.61 RMSE


In [None]:
model.save('crime_predict.h5')

In [None]:
# Perform clustering on crime hotspots
kmeans = KMeans(n_clusters=5)  # Adjust the number of clusters as needed
kmeans.fit(predictions)

In [None]:
# Retrieve cluster labels for each prediction
cluster_labels = kmeans.labels_

In [None]:
# Print the cluster labels and corresponding crime counts
for label, count in zip(cluster_labels, predictions):
    print(f"Cluster: {label}, Crime Count: {count}")
