In [2]:
#importing libraries 
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings("ignore")
#reading data
data = '/content/Traffic_Collisions.csv'
df = pd.read_csv(data)
#mapping strings to numerical values
df["Month"] = pd.factorize(df.Month)[0] 
df["Day_of_Week"] = pd.factorize(df.Day_of_Week)[0] 
df["Neighbourhood"] = pd.factorize(df.Neighbourhood)[0] 
df["Division"] = pd.factorize(df.Division)[0] 
df["Atom"] = pd.factorize(df.Atom)[0] 
df["Injury_Collisions"] = pd.factorize(df.Injury_Collisions)[0] 
df["FTR_Collisions"] = pd.factorize(df.FTR_Collisions)[0] 
df["PD_Collisions"] = pd.factorize(df.PD_Collisions)[0] 

y_data=df['Injury_Collisions']
#dropping unecessary columns
x_data=df.drop(['Injury_Collisions','OBJECTID','OccurrenceDate','EventUniqueId','ObjectId2'],axis=1)
#separating the data into train and test splits 
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data ,test_size = 0.2, random_state=0)
# Predicting Injury Collisions with Logistic regression 
logreg = LogisticRegression()
logreg.fit(x_train, y_train)
y_pred = logreg.predict(x_test)
print('Injury collisions')
#checking the accuracy, recall and precision scores
print('Accuracy: ',metrics.accuracy_score(y_test, y_pred))
print('Recall: ',metrics.recall_score(y_test, y_pred, zero_division=1))
print('Precision',metrics.precision_score(y_test, y_pred, zero_division=1))
print('Report:',metrics.classification_report(y_test, y_pred, zero_division=1))

# Predicting FTR Collsions with Logistic regression 
print('FTR collisions')
y_data=df['FTR_Collisions']
x_data=df.drop(['FTR_Collisions','OBJECTID','OccurrenceDate','EventUniqueId','ObjectId2'],axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data ,test_size = 0.2, random_state=0)
y_pred = logreg.predict(x_test)
#checking the accuracy, recall and precision scores
print('Accuracy: ',metrics.accuracy_score(y_test, y_pred))
print('Recall: ',metrics.recall_score(y_test, y_pred, zero_division=1))
print('Precision',metrics.precision_score(y_test, y_pred, zero_division=1))
print('Report:',metrics.classification_report(y_test, y_pred, zero_division=1))

# Predicting PD Collsions with Logistic regression 
print('PD collisions')
y_data=df['PD_Collisions']
x_data=df.drop(['PD_Collisions','OBJECTID','OccurrenceDate','EventUniqueId','ObjectId2'],axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data ,test_size = 0.2, random_state=0)
y_pred = logreg.predict(x_test)
#checking the accuracy, recall and precision scores
print('Accuracy: ',metrics.accuracy_score(y_test, y_pred))
print('Recall: ',metrics.recall_score(y_test, y_pred, zero_division=1))
print('Precision',metrics.precision_score(y_test, y_pred, zero_division=1))
print('Report:',metrics.classification_report(y_test, y_pred, zero_division=1))


Injury collisions
Accuracy:  0.8824460147077332
Recall:  0.0
Precision 1.0
Report:               precision    recall  f1-score   support

           0       0.88      1.00      0.94     49079
           1       1.00      0.00      0.00      6538

    accuracy                           0.88     55617
   macro avg       0.94      0.50      0.47     55617
weighted avg       0.90      0.88      0.83     55617

FTR collisions
Accuracy:  0.851340417498247
Recall:  0.0
Precision 1.0
Report:               precision    recall  f1-score   support

           0       0.85      1.00      0.92     47349
           1       1.00      0.00      0.00      8268

    accuracy                           0.85     55617
   macro avg       0.93      0.50      0.46     55617
weighted avg       0.87      0.85      0.78     55617

PD collisions
Accuracy:  0.768739773810166
Recall:  0.0
Precision 1.0
Report:               precision    recall  f1-score   support

           0       0.77      1.00      0.87     427

In [3]:
#Support vector machine
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings("ignore")
data = '/content/Traffic_Collisions.csv'
df = pd.read_csv(data)

#SVM takes a very long time with large data sets so only taking a sample of the data
df=df.sample(n=25000)
#mapping strings to numerical values
df["Month"] = pd.factorize(df.Month)[0] 
df["Day_of_Week"] = pd.factorize(df.Day_of_Week)[0] 
df["Neighbourhood"] = pd.factorize(df.Neighbourhood)[0] 
df["Injury_Collisions"] = pd.factorize(df.Injury_Collisions)[0] 
df["FTR_Collisions"] = pd.factorize(df.FTR_Collisions)[0] 
df["PD_Collisions"] = pd.factorize(df.PD_Collisions)[0] 
#Injury Collisions
y_data=df['Injury_Collisions']
#reduce number of columns so can take a bigger sample
x_data=df.drop(['Injury_Collisions','OBJECTID','OccurrenceDate','EventUniqueId','ObjectId2','X','Y','Year','Division','Longitude','Latitude','Atom'],axis=1)

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data ,test_size = 0.2, random_state=0)
svclassifier = SVC(kernel='linear')
svclassifier.fit(x_train, y_train)
y_predict = svclassifier.predict(x_test)
#checking the accuracy, recall and precision scores
accuracy = accuracy_score(y_test,y_predict)
print('Injury Collisions')
print('Accuracy is',accuracy)
print(classification_report(y_test,y_predict))







Injury Collisions
Accuracy is 0.9934
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      4436
           1       1.00      0.94      0.97       564

    accuracy                           0.99      5000
   macro avg       1.00      0.97      0.98      5000
weighted avg       0.99      0.99      0.99      5000



In [4]:
#FTR Collisions
y_data=df['FTR_Collisions']
#reduce number of columns so can take a bigger sample
x_data=df.drop(['FTR_Collisions','OBJECTID','OccurrenceDate','EventUniqueId','ObjectId2','X','Y','Year','Division','Longitude','Latitude','Atom'],axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data ,test_size = 0.2, random_state=0)
svclassifier = SVC(kernel='linear')
svclassifier.fit(x_train, y_train)
y_predict = svclassifier.predict(x_test)
accuracy = accuracy_score(y_test,y_predict)
print('FTR Collisions')
print('Accuracy is',accuracy)
print(classification_report(y_test,y_predict))

FTR Collisions
Accuracy is 0.9624
              precision    recall  f1-score   support

           0       0.96      1.00      0.98      4239
           1       1.00      0.75      0.86       761

    accuracy                           0.96      5000
   macro avg       0.98      0.88      0.92      5000
weighted avg       0.96      0.96      0.96      5000



In [5]:
#PD Collisions
y_data=df['PD_Collisions']
#reduce number of columns so can take a bigger sample
x_data=df.drop(['PD_Collisions','OBJECTID','OccurrenceDate','EventUniqueId','ObjectId2','X','Y','Year','Division','Longitude','Latitude','Atom'],axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data ,test_size = 0.2, random_state=0)
svclassifier = SVC(kernel='linear')
svclassifier.fit(x_train, y_train)
y_predict = svclassifier.predict(x_test)
accuracy = accuracy_score(y_test,y_predict)
print('PD Collisions')
print('Accuracy is',accuracy)
print(classification_report(y_test,y_predict))

PD Collisions
Accuracy is 0.9686
              precision    recall  f1-score   support

           0       1.00      0.96      0.98      3856
           1       0.88      1.00      0.94      1144

    accuracy                           0.97      5000
   macro avg       0.94      0.98      0.96      5000
weighted avg       0.97      0.97      0.97      5000

