In [19]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt 
import seaborn as sns

In [20]:
data = pd.read_csv("/kaggle/input/dsci-560/LA_Crime_Data_2020_2023.csv")
data.head()

Unnamed: 0,RecNo,ReportDate,DateOCC,TimeOCC,Area,AreaName,DistrictNo,CrimeCode,CrmDesc,Status,StatusDesc,Location,LAT,LON,TimeStamp
0,10304468,01/08/2020 12:00:00 AM,01/08/2020 12:00:00 AM,2230,3,Southwest,377,624,BATTERY - SIMPLE ASSAULT,AO,Adult Other,1100 W 39TH PL,34.0141,-118.2978,2020010822
1,190101086,01/02/2020 12:00:00 AM,01/01/2020 12:00:00 AM,330,1,Central,163,624,BATTERY - SIMPLE ASSAULT,IC,Invest Cont,700 S HILL ST,34.0459,-118.2545,2020010103
2,191501505,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,1730,15,N Hollywood,1543,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),IC,Invest Cont,5400 CORTEEN PL,34.1685,-118.4019,2020010117
3,191921269,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,415,19,Mission,1998,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",IC,Invest Cont,14400 TITUS ST,34.2198,-118.4468,2020010104
4,200100502,01/02/2020 12:00:00 AM,01/02/2020 12:00:00 AM,1315,1,Central,161,442,SHOPLIFTING - PETTY THEFT ($950 & UNDER),IC,Invest Cont,700 S FIGUEROA ST,34.0483,-118.2631,2020010213


In [21]:
data['CrmDesc'].unique()

array(['BATTERY - SIMPLE ASSAULT',
       'VANDALISM - MISDEAMEANOR ($399 OR UNDER)',
       'VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)',
       'SHOPLIFTING - PETTY THEFT ($950 & UNDER)',
       'THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LIVESTK,PROD',
       'BURGLARY FROM VEHICLE', 'CRIMINAL THREATS - NO WEAPON DISPLAYED',
       'INTIMATE PARTNER - SIMPLE ASSAULT',
       'THEFT PLAIN - PETTY ($950 & UNDER)', 'THEFT OF IDENTITY',
       'ROBBERY', 'ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT',
       'BURGLARY', 'VEHICLE - STOLEN',
       'THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)',
       'BRANDISH WEAPON', 'INTIMATE PARTNER - AGGRAVATED ASSAULT',
       'TRESPASSING',
       'THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)',
       'VIOLATION OF RESTRAINING ORDER'], dtype=object)

In [22]:
len(data['CrmDesc'].unique())

20

In [23]:
dangerous_1=['ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT','ROBBERY','BRANDISH WEAPON', 'INTIMATE PARTNER - AGGRAVATED ASSAULT']

In [24]:
dangerous_2=['BURGLARY','VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)']

In [25]:
dangerous_3=['BATTERY - SIMPLE ASSAULT', 'INTIMATE PARTNER - SIMPLE ASSAULT','CRIMINAL THREATS - NO WEAPON DISPLAYED','VIOLATION OF RESTRAINING ORDER','THEFT OF IDENTITY','BURGLARY FROM VEHICLE','SHOPLIFTING - PETTY THEFT ($950 & UNDER)','THEFT PLAIN - PETTY ($950 & UNDER)','THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)','TRESPASSING','VANDALISM - MISDEAMEANOR ($399 OR UNDER)', 'THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)','THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LIVESTK,PROD','VEHICLE - STOLEN']

In [26]:
def map_degree(x):
    if x in dangerous_1:
        return 'first degree'
    elif x in dangerous_2:
        return 'second degree'
    elif x in dangerous_3:
        return 'third degree'
    else:
        return None

data['danger_crime']=data['CrmDesc'].apply(map_degree)
data['danger_crime']

0          third degree
1          third degree
2          third degree
3         second degree
4          third degree
              ...      
566144     third degree
566145     third degree
566146     third degree
566147    second degree
566148     third degree
Name: danger_crime, Length: 566149, dtype: object

In [27]:
data['danger_crime'].unique()

array(['third degree', 'second degree', 'first degree'], dtype=object)

In [28]:
X = data[['Area','DistrictNo','LAT','LON', 'TimeStamp']]

y = data['danger_crime']

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.2, random_state=42)

In [30]:
# train a Gaussian Naive Bayes classifier on the training set
# instantiate the model
# initializaing the NB
classifer = GaussianNB()
# training the model
classifer.fit(X_train, y_train)
# testing the model
y_pred = classifer.predict(X_test)

In [31]:

y_pred

array(['third degree', 'third degree', 'third degree', ...,
       'third degree', 'third degree', 'third degree'], dtype='<U13')

In [32]:
print('Model accuracy score: {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

Model accuracy score: 0.7220


In [33]:
print('Training set score: {:.4f}'.format(classifer.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(classifer.score(X_test, y_test)))

Training set score: 0.7211
Test set score: 0.7220


In [34]:
# Print the Confusion Matrix and slice it into four pieces

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[    0     0 18221]
 [    0     0 19550]
 [    0     0 98105]]

True Positives(TP) =  0

True Negatives(TN) =  0

False Positives(FP) =  0

False Negatives(FN) =  0


In [35]:
print(classification_report(y_test, y_pred))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


               precision    recall  f1-score   support

 first degree       0.00      0.00      0.00     18221
second degree       0.00      0.00      0.00     19550
 third degree       0.72      1.00      0.84     98105

     accuracy                           0.72    135876
    macro avg       0.24      0.33      0.28    135876
 weighted avg       0.52      0.72      0.61    135876



  _warn_prf(average, modifier, msg_start, len(result))
