In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [27]:
df = pd.read_csv('no_long_lat_nulls_2023.csv')
df.head()

Unnamed: 0,Month,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category
0,2023-01,Avon and Somerset Constabulary,-2.491616,51.424619,On or near Julius Place,E01014399,Bath and North East Somerset 001A,Anti-social behaviour,Not documented
1,2023-01,Avon and Somerset Constabulary,-2.491146,51.425008,On or near Maximus Gardens,E01014399,Bath and North East Somerset 001A,Drugs,Unable to prosecute suspect
2,2023-01,Avon and Somerset Constabulary,-2.491616,51.424619,On or near Julius Place,E01014399,Bath and North East Somerset 001A,Drugs,Unable to prosecute suspect
3,2023-01,Avon and Somerset Constabulary,-2.509285,51.409716,On or near Barnard Walk,E01014399,Bath and North East Somerset 001A,Vehicle crime,Status update unavailable
4,2023-01,Avon and Somerset Constabulary,-2.509285,51.409716,On or near Barnard Walk,E01014399,Bath and North East Somerset 001A,Violence and sexual offences,Offender given a caution


# Preproccessing


encoding month to a number rather than string to be able to use it easily


In [28]:
df['Month'] = pd.to_datetime(df['Month']).dt.month


In [29]:
df.head()

Unnamed: 0,Month,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category
0,1,Avon and Somerset Constabulary,-2.491616,51.424619,On or near Julius Place,E01014399,Bath and North East Somerset 001A,Anti-social behaviour,Not documented
1,1,Avon and Somerset Constabulary,-2.491146,51.425008,On or near Maximus Gardens,E01014399,Bath and North East Somerset 001A,Drugs,Unable to prosecute suspect
2,1,Avon and Somerset Constabulary,-2.491616,51.424619,On or near Julius Place,E01014399,Bath and North East Somerset 001A,Drugs,Unable to prosecute suspect
3,1,Avon and Somerset Constabulary,-2.509285,51.409716,On or near Barnard Walk,E01014399,Bath and North East Somerset 001A,Vehicle crime,Status update unavailable
4,1,Avon and Somerset Constabulary,-2.509285,51.409716,On or near Barnard Walk,E01014399,Bath and North East Somerset 001A,Violence and sexual offences,Offender given a caution


In [30]:
df.isnull().sum()

Month                        0
Falls within                 0
Longitude                    0
Latitude                     0
Location                     0
LSOA code                93404
LSOA name                93404
Crime type                   0
Last outcome category        0
dtype: int64

In [31]:
X = df[['Latitude', 'Longitude']]
y = df['Crime type']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# (KNN) algorithm

In [32]:

k = 5
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the KNN classifier
knn_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

                              precision    recall  f1-score   support

       Anti-social behaviour       0.31      0.45      0.36    186968
               Bicycle theft       0.10      0.09      0.09     13074
                    Burglary       0.13      0.10      0.11     50062
   Criminal damage and arson       0.14      0.11      0.12     92926
                       Drugs       0.14      0.07      0.10     32182
                 Other crime       0.11      0.04      0.06     21292
                 Other theft       0.25      0.19      0.21     93941
       Possession of weapons       0.09      0.01      0.02     10502
                Public order       0.14      0.08      0.10     91380
                     Robbery       0.08      0.02      0.03     14797
                 Shoplifting       0.49      0.60      0.54     80969
       Theft from the person       0.28      0.17      0.21     23208
               Vehicle crime       0.23      0.15      0.18     72872
Violence and sexual

In [33]:

k = 10 
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the KNN classifier
knn_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

                              precision    recall  f1-score   support

       Anti-social behaviour       0.34      0.43      0.38    186968
               Bicycle theft       0.18      0.07      0.10     13074
                    Burglary       0.16      0.07      0.10     50062
   Criminal damage and arson       0.16      0.08      0.10     92926
                       Drugs       0.18      0.06      0.09     32182
                 Other crime       0.19      0.03      0.05     21292
                 Other theft       0.28      0.19      0.22     93941
       Possession of weapons       0.22      0.01      0.03     10502
                Public order       0.15      0.06      0.09     91380
                     Robbery       0.09      0.01      0.03     14797
                 Shoplifting       0.48      0.66      0.56     80969
       Theft from the person       0.32      0.21      0.26     23208
               Vehicle crime       0.26      0.14      0.19     72872
Violence and sexual

In [34]:
k = 20
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the KNN classifier
knn_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

                              precision    recall  f1-score   support

       Anti-social behaviour       0.35      0.39      0.37    186968
               Bicycle theft       0.24      0.06      0.10     13074
                    Burglary       0.18      0.04      0.06     50062
   Criminal damage and arson       0.19      0.04      0.07     92926
                       Drugs       0.22      0.04      0.07     32182
                 Other crime       0.32      0.02      0.05     21292
                 Other theft       0.32      0.17      0.22     93941
       Possession of weapons       0.45      0.01      0.03     10502
                Public order       0.17      0.04      0.06     91380
                     Robbery       0.11      0.01      0.01     14797
                 Shoplifting       0.47      0.69      0.56     80969
       Theft from the person       0.36      0.25      0.30     23208
               Vehicle crime       0.29      0.13      0.18     72872
Violence and sexual

In [36]:
k = 80
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the KNN classifier
knn_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

                              precision    recall  f1-score   support

       Anti-social behaviour       0.34      0.28      0.31    186968
               Bicycle theft       0.24      0.03      0.06     13074
                    Burglary       0.21      0.01      0.01     50062
   Criminal damage and arson       0.26      0.01      0.02     92926
                       Drugs       0.27      0.02      0.04     32182
                 Other crime       0.38      0.02      0.04     21292
                 Other theft       0.34      0.13      0.19     93941
       Possession of weapons       0.56      0.01      0.02     10502
                Public order       0.24      0.01      0.02     91380
                     Robbery       0.18      0.00      0.00     14797
                 Shoplifting       0.41      0.60      0.49     80969
       Theft from the person       0.36      0.28      0.32     23208
               Vehicle crime       0.31      0.08      0.12     72872
Violence and sexual

Decision Trees

In [35]:
from sklearn.tree import DecisionTreeClassifier

# Initialize the Decision Tree classifier
decision_tree_classifier = DecisionTreeClassifier(class_weight='balanced')

# Train the Decision Tree classifier
decision_tree_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred_decision_tree = decision_tree_classifier.predict(X_test)

# Evaluate the model
print("Decision Tree Classifier:")
print(classification_report(y_test, y_pred_decision_tree))

Decision Tree Classifier:
                              precision    recall  f1-score   support

       Anti-social behaviour       0.42      0.24      0.31    186968
               Bicycle theft       0.05      0.32      0.09     13074
                    Burglary       0.12      0.17      0.14     50062
   Criminal damage and arson       0.16      0.13      0.14     92926
                       Drugs       0.09      0.23      0.13     32182
                 Other crime       0.05      0.22      0.08     21292
                 Other theft       0.25      0.14      0.18     93941
       Possession of weapons       0.02      0.20      0.04     10502
                Public order       0.14      0.12      0.13     91380
                     Robbery       0.04      0.20      0.06     14797
                 Shoplifting       0.47      0.73      0.57     80969
       Theft from the person       0.17      0.55      0.26     23208
               Vehicle crime       0.19      0.22      0.21    

Random Forests


In [14]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest classifier
random_forest_classifier = RandomForestClassifier(class_weight='balanced')

# Train the Random Forest classifier
random_forest_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred_random_forest = random_forest_classifier.predict(X_test)

# Evaluate the model
print("Random Forest Classifier:")
print(classification_report(y_test, y_pred_random_forest))

KeyboardInterrupt: 

Support Vector Machines (SVM)


In [16]:
from sklearn.svm import SVC

# Initialize the SVM classifier
svm_classifier = SVC(class_weight='balanced')

# Train the SVM classifier
svm_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred_svm = svm_classifier.predict(X_test)

# Evaluate the model
print("Support Vector Machine (SVM) Classifier:")
print(classification_report(y_test, y_pred_svm))

New model using the month in addition to longitude and latitude

In [22]:
features = ['Longitude', 'Latitude', 'Month']  # Add additional features here
target = 'Crime type'
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)


In [23]:
k = 20
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the KNN classifier
knn_classifier.fit(X_train, y_train)

# Predictions on the testing set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

                              precision    recall  f1-score   support

       Anti-social behaviour       0.26      0.27      0.26     18779
               Bicycle theft       0.21      0.04      0.06      1359
                    Burglary       0.18      0.04      0.07      6643
   Criminal damage and arson       0.16      0.04      0.07     11140
                       Drugs       0.19      0.03      0.05      4132
                 Other crime       0.28      0.01      0.03      2681
                 Other theft       0.24      0.11      0.15     11304
       Possession of weapons       0.43      0.01      0.02      1146
                Public order       0.15      0.04      0.06     11495
                     Robbery       0.15      0.02      0.03      1818
                 Shoplifting       0.34      0.41      0.37      8829
       Theft from the person       0.34      0.25      0.29      2659
               Vehicle crime       0.24      0.11      0.15      9649
Violence and sexual