In [1]:
import pandas as pd
from joblib import load
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from termcolor import colored
from collections import Counter
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
X_train, X_test, y_train, y_test = load('../../preprocessing/classification_data.joblib')
X_train.head()

Unnamed: 0,Belts,PersonalInjury,PropertyDamage,CommercialLicense,CommercialVehicle,ContributedToAccident,Gender,VehicleType_'02 - Automobile',VehicleType_'03 - Station Wagon',VehicleType_'04 - Limousine',...,Model_VAN,Model_VERSA,Model_VN,Race_ASIAN,Race_BLACK,Race_HISPANIC,Race_OTHER,Race_WHITE,ArrestType_Marked Patrol,ArrestType_Unmarked Patrol
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,1,0
2,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,1,0
3,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
4,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,1,1,0


In [3]:
def report(model, X, y, text = "training"):
    y_pred = model.predict(X)
    
    print(colored("Classification report for model {} on {} data".format(type(model).__name__, text), "green"))
    print("---------------------------------------------------------------------------------")
    print(classification_report(y, y_pred, zero_division=True))
    print("---------------------------------------------------------------------------------")
    
    print(colored("Confusion matrix for model {} on {} data ".format(type(model).__name__, text), "green"))
    print("---------------------------------------------------------------------------------")
    print(pd.DataFrame(confusion_matrix(y, y_pred), columns=['Kazna', 'Oprema', 'Upozorenje'], index=['0', '1', '2']))
    print("---------------------------------------------------------------------------------")

## GridSearchCV

In [4]:
params = [{
               'kernel': ['linear'],
               'C': [0.01, 0.1, 1, 10],
          },
          {
               'kernel': ['rbf'],
               'C': [0.01, 0.1, 1, 10],
               'gamma': [0.01, 0.1, 1, 10],
          },
         ]

In [5]:
estimator = GridSearchCV(SVC(), params, scoring='accuracy', cv=5, verbose=5)

In [6]:
estimator.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END .............C=0.01, kernel=linear;, score=0.538 total time=11.9min
[CV 2/5] END .............C=0.01, kernel=linear;, score=0.679 total time=12.9min
[CV 3/5] END .............C=0.01, kernel=linear;, score=0.689 total time=11.9min
[CV 4/5] END .............C=0.01, kernel=linear;, score=0.681 total time=12.1min
[CV 5/5] END .............C=0.01, kernel=linear;, score=0.657 total time=12.9min
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.543 total time= 6.9min
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.703 total time= 6.9min
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.712 total time= 6.7min
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.702 total time= 6.1min
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.663 total time= 5.9min
[CV 1/5] END ................C=1, kernel=linear;, score=0.537 total time= 5.7min
[CV 2/5] END ................C=1, kernel=linear

In [7]:
estimator.best_params_

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

In [8]:
estimator.best_score_

0.668752624947501

In [9]:
report(estimator.best_estimator_, X_train, y_train)

[32mClassification report for model SVC on training data[0m
---------------------------------------------------------------------------------
              precision    recall  f1-score   support

           0       0.61      0.49      0.55     23810
           1       0.89      0.87      0.88     23810
           2       0.57      0.70      0.63     23810

    accuracy                           0.69     71430
   macro avg       0.69      0.69      0.69     71430
weighted avg       0.69      0.69      0.69     71430

---------------------------------------------------------------------------------
[32mConfusion matrix for model SVC on training data [0m
---------------------------------------------------------------------------------
   Kazna  Oprema  Upozorenje
0  11720    1732       10358
1   1012   20711        2087
2   6348     724       16738
---------------------------------------------------------------------------------


In [10]:
report(estimator.best_estimator_, X_test, y_test, "test")

[32mClassification report for model SVC on test data[0m
---------------------------------------------------------------------------------
              precision    recall  f1-score   support

           0       0.58      0.48      0.52      9637
           1       0.06      0.04      0.05      1049
           2       0.56      0.67      0.61     10205

    accuracy                           0.55     20891
   macro avg       0.40      0.40      0.39     20891
weighted avg       0.54      0.55      0.54     20891

---------------------------------------------------------------------------------
[32mConfusion matrix for model SVC on test data [0m
---------------------------------------------------------------------------------
   Kazna  Oprema  Upozorenje
0   4625     290        4722
1    316      39         694
2   3074     314        6817
---------------------------------------------------------------------------------
