In [2]:
from os.path import exists 
import joblib
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

## READ FILE

In [3]:
df = pd.read_csv('../../csv-files/crimes.csv')
# df = df[df['group_id'].isin([5,9,14,17,19,20,27,30,34,35,36,41,42,55,56,57,66])].copy()
df = df[df['group_id'].isin([57,20,41,34,36,19,42])].copy()
df.drop(['id','offense_code','offense_group','longitude','latitude','year'], axis=1, inplace=True)
df.head(2)

Unnamed: 0,group_id,hour,day_of_week,month,tract_id,grid_id
5,34,19,2,4,30,40
11,57,18,2,4,174,56



## TRAINING AND TESTING DATA

In [4]:
X = df[['hour', 'day_of_week', 'month','grid_id']]
y = df['group_id']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=9)
    
# standardScaler = StandardScaler()
# X_train = standardScaler.fit_transform(X_train)
# X_test = standardScaler.transform(X_test)

## APPLYING MODEL

In [10]:
model = DecisionTreeClassifier()
model.fit(X_train,y_train)

model.score(X_test,y_test)

0.2957849368105778

In [11]:
y_predicted = model.predict(X_test)
cr = classification_report(y_test, y_predicted)
print(cr)

              precision    recall  f1-score   support

          19       0.09      0.12      0.10       937
          20       0.26      0.32      0.29      3101
          34       0.19      0.20      0.19      1022
          36       0.16      0.14      0.15      1180
          41       0.15      0.14      0.14      1443
          42       0.11      0.08      0.09       850
          57       0.51      0.45      0.48      5156

    accuracy                           0.30     13689
   macro avg       0.21      0.21      0.21     13689
weighted avg       0.31      0.30      0.30     13689



In [12]:
model = RandomForestClassifier(n_estimators=250)
model.fit(X_train,y_train)

model.score(X_test,y_test)

0.3356709766966177

In [13]:
y_predicted = model.predict(X_test)
cr = classification_report(y_test, y_predicted)
print(cr)

              precision    recall  f1-score   support

          19       0.11      0.07      0.09       937
          20       0.27      0.28      0.27      3101
          34       0.21      0.17      0.19      1022
          36       0.16      0.11      0.13      1180
          41       0.15      0.13      0.14      1443
          42       0.13      0.10      0.11       850
          57       0.48      0.60      0.53      5156

    accuracy                           0.34     13689
   macro avg       0.22      0.21      0.21     13689
weighted avg       0.30      0.34      0.32     13689



In [14]:
model = AdaBoostClassifier(n_estimators=250)
model.fit(X_train,y_train)

model.score(X_test,y_test)

0.40616553437066255

In [16]:
y_predicted = model.predict(X_test)
cr = classification_report(y_test, y_predicted)
print(cr)

              precision    recall  f1-score   support

          19       0.00      0.00      0.00       937
          20       0.29      0.34      0.32      3101
          34       0.00      0.00      0.00      1022
          36       0.23      0.01      0.02      1180
          41       0.00      0.00      0.00      1443
          42       0.00      0.00      0.00       850
          57       0.45      0.87      0.59      5156

    accuracy                           0.41     13689
   macro avg       0.14      0.17      0.13     13689
weighted avg       0.26      0.41      0.30     13689



  'precision', 'predicted', average, warn_for)


In [17]:
model = GradientBoostingClassifier(n_estimators=250)
model.fit(X_train,y_train)

model.score(X_test,y_test)

0.4099642048359997

In [18]:
y_predicted = model.predict(X_test)
cr = classification_report(y_test, y_predicted)
print(cr)

              precision    recall  f1-score   support

          19       0.05      0.00      0.00       937
          20       0.30      0.36      0.33      3101
          34       0.34      0.04      0.07      1022
          36       0.28      0.03      0.06      1180
          41       0.24      0.03      0.05      1443
          42       0.25      0.00      0.01       850
          57       0.46      0.85      0.60      5156

    accuracy                           0.41     13689
   macro avg       0.27      0.19      0.16     13689
weighted avg       0.34      0.41      0.31     13689



## GET REPORT

In [9]:
y_predicted = model.predict(X_test)
cr = classification_report(y_test, y_predicted)
print(cr)

              precision    recall  f1-score   support

          19       0.05      0.00      0.00       937
          20       0.30      0.36      0.33      3101
          34       0.34      0.04      0.07      1022
          36       0.28      0.03      0.06      1180
          41       0.24      0.03      0.05      1443
          42       0.24      0.00      0.01       850
          57       0.46      0.85      0.60      5156

    accuracy                           0.41     13689
   macro avg       0.27      0.19      0.16     13689
weighted avg       0.33      0.41      0.31     13689

