In [1]:
import numpy as np
import scipy #only sparse matrices allowed
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier

In [20]:
#https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
svc_clf = SVC(gamma='auto').fit(X_train,y_train.ravel())
svc_predictions = svc_clf.predict(X_test)
svc_accuracy = svc_clf.score(X_test, y_test.ravel())

In [21]:
svc_accuracy

0.8781065088757396

In [4]:
#https://stackoverflow.com/questions/3518778/how-do-i-read-csv-data-into-a-record-array-in-numpy
from numpy import genfromtxt
X_test_std = genfromtxt('X_test_std.csv', delimiter=',')
X_train_ada_std = genfromtxt('X_train_ada_std.csv', delimiter=',')
X_train_std = genfromtxt('X_train_std.csv', delimiter=',')

y_test = genfromtxt('y_test.csv', delimiter=',')
y_train_ada = genfromtxt('y_train_ada.csv', delimiter=',')
y_train = genfromtxt('y_train.csv', delimiter=',')

y_test = y_test.astype(int)
y_train_ada = y_train_ada.astype(int)
y_train = y_train.astype(int)

# Gridsearch for best hyperparameters:

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingRandomSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [6]:
#https://www.analyticsvidhya.com/blog/2021/06/tune-hyperparameters-with-gridsearchcv/
#https://www.mygreatlearning.com/blog/gridsearchcv/

In [7]:
param_grid = {
    'C' : [0.5, 1.0, 1.5, 2.0],
    'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree' : [3, 5, 7, 10],
    'gamma' : ['scale','auto'],
    'class_weight' : ['balanced', None]
}

In [8]:
svc_clf = SVC()
grid = HalvingRandomSearchCV(estimator = svc_clf, param_distributions = param_grid, refit = True, verbose = 2, n_jobs=-1, factor = 1.5)
# fitting the model for grid search 
grid.fit(X_train_ada_std, y_train_ada.ravel()) 

n_iterations: 14
n_required_iterations: 14
n_possible_iterations: 19
min_resources_: 30
max_resources_: 47961
aggressive_elimination: False
factor: 1.5
----------
iter: 0
n_candidates: 256
n_resources: 30
Fitting 5 folds for each of 256 candidates, totalling 1280 fits




----------
iter: 1
n_candidates: 171
n_resources: 45
Fitting 5 folds for each of 171 candidates, totalling 855 fits
----------
iter: 2
n_candidates: 114
n_resources: 67
Fitting 5 folds for each of 114 candidates, totalling 570 fits
----------
iter: 3
n_candidates: 76
n_resources: 101
Fitting 5 folds for each of 76 candidates, totalling 380 fits
----------
iter: 4
n_candidates: 51
n_resources: 151
Fitting 5 folds for each of 51 candidates, totalling 255 fits
----------
iter: 5
n_candidates: 34
n_resources: 227
Fitting 5 folds for each of 34 candidates, totalling 170 fits
----------
iter: 6
n_candidates: 23
n_resources: 341
Fitting 5 folds for each of 23 candidates, totalling 115 fits
----------
iter: 7
n_candidates: 16
n_resources: 512
Fitting 5 folds for each of 16 candidates, totalling 80 fits
----------
iter: 8
n_candidates: 11
n_resources: 768
Fitting 5 folds for each of 11 candidates, totalling 55 fits
----------
iter: 9
n_candidates: 8
n_resources: 1153
Fitting 5 folds for each of

HalvingRandomSearchCV(estimator=SVC(), factor=1.5, n_jobs=-1,
                      param_distributions={'C': [0.5, 1.0, 1.5, 2.0],
                                           'class_weight': ['balanced', None],
                                           'degree': [3, 5, 7, 10],
                                           'gamma': ['scale', 'auto'],
                                           'kernel': ['linear', 'poly', 'rbf',
                                                      'sigmoid']},
                      verbose=2)

In [9]:
# print best parameter after tuning 
print(grid.best_params_) 
grid_predictions = grid.predict(X_test_std)

{'kernel': 'rbf', 'gamma': 'auto', 'degree': 10, 'class_weight': 'balanced', 'C': 1.5}


In [10]:
print(classification_report(y_test, grid_predictions)) 

              precision    recall  f1-score   support

           0       0.99      0.81      0.89      2827
           1       0.29      0.98      0.44       109
           2       0.62      0.97      0.76       458

    accuracy                           0.84      3394
   macro avg       0.63      0.92      0.70      3394
weighted avg       0.92      0.84      0.86      3394



In [11]:
svc_clf = SVC()
grid = HalvingRandomSearchCV(estimator = svc_clf, param_distributions = param_grid, refit = True, verbose = 2, n_jobs=-1, factor = 1.5)
# fitting the model for grid search 
grid.fit(X_train_std, y_train.ravel()) 

n_iterations: 14
n_required_iterations: 14
n_possible_iterations: 16
min_resources_: 30
max_resources_: 19256
aggressive_elimination: False
factor: 1.5
----------
iter: 0
n_candidates: 256
n_resources: 30
Fitting 5 folds for each of 256 candidates, totalling 1280 fits




----------
iter: 1
n_candidates: 171
n_resources: 45
Fitting 5 folds for each of 171 candidates, totalling 855 fits
----------
iter: 2
n_candidates: 114
n_resources: 67
Fitting 5 folds for each of 114 candidates, totalling 570 fits
----------
iter: 3
n_candidates: 76
n_resources: 101
Fitting 5 folds for each of 76 candidates, totalling 380 fits
----------
iter: 4
n_candidates: 51
n_resources: 151
Fitting 5 folds for each of 51 candidates, totalling 255 fits
----------
iter: 5
n_candidates: 34
n_resources: 227
Fitting 5 folds for each of 34 candidates, totalling 170 fits
----------
iter: 6
n_candidates: 23
n_resources: 341
Fitting 5 folds for each of 23 candidates, totalling 115 fits
----------
iter: 7
n_candidates: 16
n_resources: 512
Fitting 5 folds for each of 16 candidates, totalling 80 fits
----------
iter: 8
n_candidates: 11
n_resources: 768
Fitting 5 folds for each of 11 candidates, totalling 55 fits
----------
iter: 9
n_candidates: 8
n_resources: 1153
Fitting 5 folds for each of

HalvingRandomSearchCV(estimator=SVC(), factor=1.5, n_jobs=-1,
                      param_distributions={'C': [0.5, 1.0, 1.5, 2.0],
                                           'class_weight': ['balanced', None],
                                           'degree': [3, 5, 7, 10],
                                           'gamma': ['scale', 'auto'],
                                           'kernel': ['linear', 'poly', 'rbf',
                                                      'sigmoid']},
                      verbose=2)

In [12]:
# print best parameter after tuning 
print(grid.best_params_) 
grid_predictions = grid.predict(X_test_std)

{'kernel': 'rbf', 'gamma': 'scale', 'degree': 5, 'class_weight': None, 'C': 2.0}


In [13]:
print(classification_report(y_test, grid_predictions)) 

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      2827
           1       1.00      0.70      0.82       109
           2       0.99      0.82      0.89       458

    accuracy                           0.96      3394
   macro avg       0.98      0.84      0.90      3394
weighted avg       0.97      0.96      0.96      3394

