In [2]:
import pandas as pd
import numpy as np
import sys


from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

# so values can be viewed as scrollable element
np.set_printoptions(threshold=sys.maxsize)

In [3]:
df = pd.read_csv('.\\modified_data\\cleaned_data.csv')

scaler = preprocessing.StandardScaler()

cats = ['Month', 'Day', 'Hour']
# Separate numerical and categorical columns
numerical_cols = df.select_dtypes(include=['float64']).columns
categorical_cols = [col for col in cats if col in df.columns]

# One-hot encode categorical columns
one_hot_encoded = pd.get_dummies(df[categorical_cols], columns=categorical_cols).astype(int)

# Concatenate numerical columns and one-hot encoded categorical columns
df = pd.concat([df[numerical_cols], one_hot_encoded], axis=1)

df['Longitude'] = scaler.fit_transform(df['Longitude'].values.reshape(-1, 1))
df['Latitude'] = scaler.fit_transform(df['Latitude'].values.reshape(-1, 1))

print(df)

       Latitude  Longitude  Month_1  Month_2  Month_3  Month_4  Month_5  \
0     -0.114937   0.247816        1        0        0        0        0   
1     -0.069945   0.061847        1        0        0        0        0   
2     -0.304838  -0.643572        1        0        0        0        0   
3      0.014969  -0.790000        1        0        0        0        0   
4     -0.001289  -0.866018        1        0        0        0        0   
...         ...        ...      ...      ...      ...      ...      ...   
48777 -3.236136  -0.902290        0        0        0        1        0   
48778 -3.266814   1.532944        0        0        0        0        1   
48779 -3.476848   0.012038        0        0        0        0        0   
48780 -3.493090   1.845266        0        0        0        0        0   
48781 -3.506628   3.501369        0        0        0        0        0   

       Month_6  Month_7  Month_8  ...  Hour_14  Hour_15  Hour_16  Hour_17  \
0            0        

In [4]:
X = df.copy()
X_train, X_test = train_test_split(X, test_size=0.2, random_state=0)

# we have all inlier-data so we can set a high nu value
#   don't want to set it to 1 to leave wiggle room for noisy data
nu_value = 0.1
one_class_svm = OneClassSVM(kernel='rbf', nu=nu_value)
one_class_svm.fit(X_train)

In [5]:
y_pred = one_class_svm.predict(X_train)

In [6]:
y_train = np.full((len(X_train), 1), 1)

accuracy = accuracy_score(y_train, y_pred)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

print(classification_report(y_train, y_pred))

Training Accuracy: 89.96%
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00         0
           1       1.00      0.90      0.95     39025

    accuracy                           0.90     39025
   macro avg       0.50      0.45      0.47     39025
weighted avg       1.00      0.90      0.95     39025



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
y_pred = one_class_svm.predict(X_test)

In [8]:
y_test = np.full((len(X_test), 1), 1)

accuracy = accuracy_score(y_test, y_pred)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

print(classification_report(y_test, y_pred))

Training Accuracy: 89.78%
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00         0
           1       1.00      0.90      0.95      9757

    accuracy                           0.90      9757
   macro avg       0.50      0.45      0.47      9757
weighted avg       1.00      0.90      0.95      9757



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
import joblib
joblib.dump(one_class_svm, '..\\flask\\scripts\\model\\crash_prediction_model.joblib')

['..\\flask\\scripts\\model\\crash_prediction_model.joblib']