In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine
from config import db_password
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
file_path = "LMPD_Data.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,INCIDENT_NUMBER,DATE_OCCURED,UOR_DESC,PREMISE_TYPE,OFFICER_ATTACKED
0,80-18-101412,12/29/2018,ENDANGERING THE WELFARE OF A MINOR,HIGHWAY / ROAD / ALLEY,0
1,80-18-101737,12/30/2018,WANTON ENDANGERMENT-2ND DEGREE,HIGHWAY / ROAD / ALLEY,0
2,80-18-990001,10/19/2018,WANTON ENDANGERMENT-1ST DEGREE,RESIDENCE / HOME,0
3,80-19-000003,1/1/2019,ASSAULT - 1ST DEGREE,RESIDENCE / HOME,0
4,80-19-000004,1/1/2019,WANTON ENDANGERMENT-1ST DEGREE,RESIDENCE / HOME,0


In [3]:
ml_df = df.drop(columns=["INCIDENT_NUMBER", "DATE_OCCURED"])
ml_df.head()

Unnamed: 0,UOR_DESC,PREMISE_TYPE,OFFICER_ATTACKED
0,ENDANGERING THE WELFARE OF A MINOR,HIGHWAY / ROAD / ALLEY,0
1,WANTON ENDANGERMENT-2ND DEGREE,HIGHWAY / ROAD / ALLEY,0
2,WANTON ENDANGERMENT-1ST DEGREE,RESIDENCE / HOME,0
3,ASSAULT - 1ST DEGREE,RESIDENCE / HOME,0
4,WANTON ENDANGERMENT-1ST DEGREE,RESIDENCE / HOME,0


In [4]:
assaults_df = pd.get_dummies(ml_df, columns=['UOR_DESC', 'PREMISE_TYPE'])
assaults_df.head()

Unnamed: 0,OFFICER_ATTACKED,UOR_DESC_ABANDONMENT OF MINOR,UOR_DESC_ASSAULT - 1ST DEGREE,UOR_DESC_ASSAULT - 1ST DEGREE - DOMESTIC VIOLENCE,UOR_DESC_ASSAULT - 1ST DEGREE - POLICE OFFICER,UOR_DESC_ASSAULT - 2ND DEGREE,UOR_DESC_ASSAULT - 2ND DEGREE - DOMESTIC VIOLENCE,UOR_DESC_ASSAULT - 2ND DEGREE - POLICE OFFICER,UOR_DESC_ASSAULT - 3RD DEGREE (OFFICER TRANSPORTING INMATES),UOR_DESC_ASSAULT - 3RD DEGREE (SCHOOL EMPLOYEE OR SCHOOL VOLUNTEER),...,PREMISE_TYPE_PARK / PLAYGROUND,PREMISE_TYPE_PARKING LOT / GARAGE,PREMISE_TYPE_RACE TRACK/GAMBLING FACILITY,PREMISE_TYPE_RENTAL / STORAGE FACILITY,PREMISE_TYPE_RESIDENCE / HOME,PREMISE_TYPE_RESTAURANT,PREMISE_TYPE_SCHOOL - COLLEGE / UNIVERSITY,PREMISE_TYPE_SCHOOL - ELEMENTARY / SECONDARY,PREMISE_TYPE_SERVICE / GAS STATION,"PREMISE_TYPE_SPECIALTY STORE (TV, FUR, ETC)"
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [5]:
# Define the features set.
X = assaults_df.copy()
X = X.drop(columns = ["OFFICER_ATTACKED"])
X.head()

Unnamed: 0,UOR_DESC_ABANDONMENT OF MINOR,UOR_DESC_ASSAULT - 1ST DEGREE,UOR_DESC_ASSAULT - 1ST DEGREE - DOMESTIC VIOLENCE,UOR_DESC_ASSAULT - 1ST DEGREE - POLICE OFFICER,UOR_DESC_ASSAULT - 2ND DEGREE,UOR_DESC_ASSAULT - 2ND DEGREE - DOMESTIC VIOLENCE,UOR_DESC_ASSAULT - 2ND DEGREE - POLICE OFFICER,UOR_DESC_ASSAULT - 3RD DEGREE (OFFICER TRANSPORTING INMATES),UOR_DESC_ASSAULT - 3RD DEGREE (SCHOOL EMPLOYEE OR SCHOOL VOLUNTEER),UOR_DESC_ASSAULT - 4TH DEGREE (CHILD ABUSE),...,PREMISE_TYPE_PARK / PLAYGROUND,PREMISE_TYPE_PARKING LOT / GARAGE,PREMISE_TYPE_RACE TRACK/GAMBLING FACILITY,PREMISE_TYPE_RENTAL / STORAGE FACILITY,PREMISE_TYPE_RESIDENCE / HOME,PREMISE_TYPE_RESTAURANT,PREMISE_TYPE_SCHOOL - COLLEGE / UNIVERSITY,PREMISE_TYPE_SCHOOL - ELEMENTARY / SECONDARY,PREMISE_TYPE_SERVICE / GAS STATION,"PREMISE_TYPE_SPECIALTY STORE (TV, FUR, ETC)"
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [6]:
# Define the target set.
y = assaults_df["OFFICER_ATTACKED"]
y[:5]

0    0
1    0
2    0
3    0
4    0
Name: OFFICER_ATTACKED, dtype: int64

In [7]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [8]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

In [10]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [11]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)

In [12]:
# Calculate feature importance in the Random Forest model.
importances = rf_model.feature_importances_
importances

array([5.24431593e-07, 6.65262160e-03, 5.62223706e-05, 4.77643242e-03,
       4.67465084e-03, 4.66384506e-04, 6.82712054e-03, 1.65927267e-03,
       1.57059139e-05, 9.11158866e-05, 5.25848152e-03, 2.15145252e-03,
       6.33878270e-03, 3.86362395e-03, 4.20665041e-02, 3.35973955e-02,
       6.41473252e-04, 0.00000000e+00, 6.56195201e-05, 1.63281779e-05,
       4.78710952e-04, 3.15146067e-04, 4.05687817e-04, 2.62702789e-01,
       8.16271943e-07, 1.76464926e-07, 8.95677305e-07, 6.49526451e-05,
       7.85512138e-05, 7.13864123e-07, 7.84942471e-06, 6.35319054e-05,
       1.03354552e-02, 1.12736648e-03, 2.04375036e-03, 2.76865146e-03,
       1.66628236e-03, 1.98769104e-04, 3.21173189e-04, 7.99885104e-06,
       8.63286537e-06, 3.22332928e-05, 1.67052586e-01, 1.16924735e-04,
       1.72930861e-02, 1.67103055e-03, 3.78890456e-02, 1.07027619e-05,
       0.00000000e+00, 4.32935691e-05, 1.58619317e-04, 1.42484746e-03,
       9.83204808e-06, 2.15679622e-07, 9.11023508e-05, 2.04951274e-04,
      

In [13]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,7925,46
Actual 1,97,125


In [14]:
acc_score = accuracy_score(y_test, predictions)

In [15]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,7925,46
Actual 1,97,125


Accuracy Score : 0.982546075918467
Classification Report
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7971
           1       0.73      0.56      0.64       222

    accuracy                           0.98      8193
   macro avg       0.86      0.78      0.81      8193
weighted avg       0.98      0.98      0.98      8193



In [16]:
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.2627027890838151,
  'UOR_DESC_ASSAULT-3RD DEGREE-POLICE/PROBATION OFFICER-IDENTIFY WEAPON'),
 (0.1831581619696114,
  'UOR_DESC_WANTON ENDANGERMENT-1ST DEGREE-POLICE OFFICER'),
 (0.16705258574660897, 'UOR_DESC_MENACING'),
 (0.045651265095606096, 'PREMISE_TYPE_HIGHWAY / ROAD / ALLEY'),
 (0.042066504067696964,
  'UOR_DESC_ASSAULT 3RD - PEACE OFFICER -  COMMUNICABLE BODILY FLUID'),
 (0.03788904561464524, 'UOR_DESC_OBSTRUCTING GOVERNMENTAL OPERATIONS'),
 (0.03359739551315869,
  'UOR_DESC_ASSAULT 3RD - PEACE OFFICER - NON COMMUNICABLE BODILY FLUID'),
 (0.01998854423009704,
  'UOR_DESC_WANTON ENDANGERMENT-2ND DEGREE-POLICE OFFICER'),
 (0.01974354966457464, 'PREMISE_TYPE_RESIDENCE / HOME'),
 (0.017293086059492657, 'UOR_DESC_MURDER - POLICE OFFICER ATTEMPTED'),
 (0.016707281893668862, 'UOR_DESC_TERRORISTIC THREATENING 3RD DEGREE'),
 (0.014255623545783679, 'PREMISE_TYPE_JAIL / PENITENTARY'),
 (0.010335455203009408, 'UOR_DESC_DISARMING A PEACE OFFICER'),
 (0.0068271205411962665, 'UOR_DESC_ASS

In [None]:
# Create connection string
db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/Assaulted_Officers"

In [None]:
# Create Database engine
engine = create_engine(db_string)

In [None]:
# import data
df.to_sql(name='Assault_Officers', con=engine)