# Linear Regression Model for Crime Case Prediction

This notebook will demonstrate the prediction process of crime cases for each postcode appeared in the historical dataset. Prediction results will consist from 2023 to 2027.

In [23]:
import pandas as pd

In [39]:
df = pd.read_csv('../data/curated/crime_cases.csv')
df = df.drop(columns = ['Unnamed: 0'])

In [40]:
YEAR = list(df.Year.unique())
POST = list(df.Postcode.unique())

In [41]:
len(POST)

693

In [42]:
cat_vars = ['Postcode']
dfDummies = pd.get_dummies(df, columns=cat_vars)

In [43]:
X = dfDummies.loc[:, dfDummies.columns != 'Offence Count']
y = dfDummies.loc[:, dfDummies.columns == 'Offence Count']

In [44]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)
lm = LinearRegression()
lm.fit(X_train, y_train.values.ravel())


In [45]:
y_pred = lm.predict(X_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(lm.score(X_test, y_test)))

Accuracy of logistic regression classifier on test set: 0.98


In [64]:
pred_post = POST
pred_year = list(range(2023,2028))
year_lst = []
pred_lst = []
for year in pred_year:
    for pred in pred_post:
        year_lst.append(year)
pred_post = pred_post* len(pred_year)
data = {
    'Year':year_lst,
    'Postcode':pred_post
}
pred_df = pd.DataFrame(data = data)

In [65]:
dfDummies = pd.get_dummies(pred_df, columns=['Postcode'])

In [66]:
result = lm.predict(dfDummies)

In [67]:
import numpy
pred_df['crime_count'] = list(result)

In [70]:
pred_df.crime_count = pred_df.crime_count.astype(int)

In [72]:
pred_df.to_csv('../data/curated/feature_prediction/23_27_crime_case.csv')

## Export Model for Web (Flask)

In [76]:
import pickle
pickle.dump(lm, open('../web/crimecase_model.pkl','wb'))
model = pickle.load(open('../web/crimecase_model.pkl','rb'))

In [79]:
# some testings for function in app.py
year = 2023
post = 3000
result = pd.read_csv('../data/curated/feature_prediction/23_27_crime_case.csv')
post_lst = list(set(result['Postcode']))
df_post = pd.DataFrame(post_lst)
df_post.columns = ['Postcode']
df_post_dum = pd.get_dummies(df_post, columns=['Postcode'])
row = df_post_dum[df_post_dum['Postcode_'+str(post)] == 1]
row.insert(0, 'Year',year)
pred = int(model.predict(row))
#prediction = model.predict(features)  # features Must be in the form [[a, b]]
output = pred
output

15867