In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
df = pd.read_csv('RTA_cleaned.csv')

In [7]:
# Make X every column except 'Accident_severity'
X = df.drop('Accident_severity', axis=1)


In [8]:
# Make y the 'Accident_severity' column
y = df['Accident_severity']

In [9]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## KNN

In [10]:
from sklearn.neighbors import KNeighborsClassifier

# Create a KNN model
knn = KNeighborsClassifier(n_neighbors=5)

In [11]:
# Fit the model
knn.fit(X_train, y_train)

In [12]:
# Predict the test set
y_pred_knn = knn.predict(X_test)

In [13]:
# Accuracy, precision, recall, f1-score
from sklearn.metrics import accuracy_score, classification_report
print('Accuracy:', accuracy_score(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))


Accuracy: 0.8181818181818182
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        37
           1       0.11      0.02      0.03       363
           2       0.84      0.97      0.90      2064

    accuracy                           0.82      2464
   macro avg       0.32      0.33      0.31      2464
weighted avg       0.72      0.82      0.76      2464



## Decision Tree

In [14]:
from sklearn.tree import DecisionTreeClassifier

# Create a Decision Tree model
dt = DecisionTreeClassifier()

In [15]:
dt.fit(X_train, y_train)

In [16]:
y_pred_dt = dt.predict(X_test)

In [17]:
# Accuracy, precision, recall, f1-score
print('Accuracy:', accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

Accuracy: 0.721185064935065
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        37
           1       0.14      0.15      0.14       363
           2       0.84      0.84      0.84      2064

    accuracy                           0.72      2464
   macro avg       0.33      0.33      0.33      2464
weighted avg       0.72      0.72      0.72      2464



## SVC

In [41]:
from sklearn.svm import SVC

# Create a SVM model
svc = SVC(kernel='poly')

In [42]:
svc.fit(X_train, y_train)

In [43]:
y_pred_svc = svc.predict(X_test)

In [44]:
# Accuracy, precision, recall, f1-score
print('Accuracy:', accuracy_score(y_test, y_pred_svc))
print(classification_report(y_test, y_pred_svc))

Accuracy: 0.8376623376623377
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        37
           1       0.00      0.00      0.00       363
           2       0.84      1.00      0.91      2064

    accuracy                           0.84      2464
   macro avg       0.28      0.33      0.30      2464
weighted avg       0.70      0.84      0.76      2464



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Random Forest

In [45]:
from sklearn.ensemble import RandomForestClassifier


In [46]:
rf = RandomForestClassifier()

In [24]:
rf.fit(X_train, y_train)

In [25]:
y_pred_rf = rf.predict(X_test)

In [26]:
# Accuracy, precision, recall, f1-score
print('Accuracy:', accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

Accuracy: 0.827922077922078
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        37
           1       0.15      0.01      0.03       363
           2       0.84      0.99      0.91      2064

    accuracy                           0.83      2464
   macro avg       0.33      0.33      0.31      2464
weighted avg       0.72      0.83      0.76      2464



In [27]:
# Compare the models into a table
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
models = ['KNN', 'Decision Tree', 'SVM', 'Random Forest']
accuracy = [accuracy_score(y_test, y_pred_knn), accuracy_score(y_test, y_pred_dt), accuracy_score(y_test, y_pred_svc), accuracy_score(y_test, y_pred_rf)]
precision = [precision_score(y_test, y_pred_knn, average='weighted'), precision_score(y_test, y_pred_dt, average='weighted'), precision_score(y_test, y_pred_svc, average='weighted'), precision_score(y_test, y_pred_rf, average='weighted')]
recall = [recall_score(y_test, y_pred_knn, average='weighted'), recall_score(y_test, y_pred_dt, average='weighted'), recall_score(y_test, y_pred_svc, average='weighted'), recall_score(y_test, y_pred_rf, average='weighted')]
f1 = [f1_score(y_test, y_pred_knn, average='weighted'), f1_score(y_test, y_pred_dt, average='weighted'), f1_score(y_test, y_pred_svc, average='weighted'), f1_score(y_test, y_pred_rf, average='weighted')]
df = pd.DataFrame({'Model': models, 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1': f1})
print(df)


           Model  Accuracy  Precision    Recall        F1
0            KNN  0.818182   0.717826  0.818182  0.758824
1  Decision Tree  0.721185   0.720956  0.721185  0.721060
2            SVM  0.802354   0.721304  0.802354  0.755310
3  Random Forest  0.827922   0.723821  0.827922  0.762351
