In [1]:
#Importing the necessary packages and libaries
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
#load in image matrix data from .npz files:
npz_x = np.load("dat_standard.npz")
x_all =  npz_x["arr_0"] 

#load in response vector from .npz files:
npz_y = np.load("response.npz")
y_all = npz_y["arr_0"]

In [3]:
#verify dimensions (x should be 25231*7500, y should be 25231):
print(np.shape(x_all))
print(np.shape(y_all))

(25231, 7500)
(25231,)


In [4]:
np.unique(y_all)

array([ 6,  8, 15, 17, 20], dtype=int8)

In [5]:
#split testing and training data:
x_train,x_test,y_train,y_test = train_test_split(x_all,y_all, test_size = 0.2,random_state=2022, stratify = y_all)

#print out shape of data subsets:
print(np.shape(x_train))
print(np.shape(x_test))
print(np.shape(y_train))
print(np.shape(y_test))

(20184, 7500)
(5047, 7500)
(20184,)
(5047,)


#### Create and Fit Models

In [6]:
# Create the Models
linear_mod = svm.SVC(kernel='linear', C=1, decision_function_shape='ovr')
rbf_mod = svm.SVC(kernel='rbf', gamma=1, C=1, decision_function_shape='ovr')
poly_mod = svm.SVC(kernel='poly', degree=3, C=1, decision_function_shape='ovr')
sig_mod = svm.SVC(kernel='sigmoid', C=1, decision_function_shape='ovr')

In [7]:
%%time
# Fit the Models
linear_mod.fit(x_train, y_train)
rbf_mod.fit(x_train, y_train)
poly_mod.fit(x_train, y_train)
sig_mod.fit(x_train, y_train)

CPU times: user 24min 40s, sys: 10.9 s, total: 24min 51s
Wall time: 4min 25s


SVC(C=1, kernel='sigmoid')

In [8]:
# Test accuracy on test data
linear_pred = linear_mod.predict(x_test)
poly_pred = poly_mod.predict(x_test)
rbf_pred = rbf_mod.predict(x_test)
sig_pred = sig_mod.predict(x_test)

In [9]:
# Retrieve the accuracy and print it for all 4 kernel functions
accuracy_lin = linear_mod.score(x_test, y_test)
accuracy_poly = poly_mod.score(x_test, y_test)
accuracy_rbf = rbf_mod.score(x_test, y_test)
accuracy_sig = sig_mod.score(x_test, y_test)

In [10]:
print("Accuracy Linear Kernel:", accuracy_lin)
print("Accuracy Polynomial Kernel:", accuracy_poly)
print("Accuracy Radial Basis Kernel:", accuracy_rbf)
print("Accuracy Sigmoid Kernel:", accuracy_sig)

Accuracy Linear Kernel: 0.3400039627501486
Accuracy Polynomial Kernel: 0.494947493560531
Accuracy Radial Basis Kernel: 0.2603526847632257
Accuracy Sigmoid Kernel: 0.3130572617396473


In [11]:
# creating a confusion matrix
cm_lin = confusion_matrix(y_test, linear_pred)
cm_poly = confusion_matrix(y_test, poly_pred)
cm_rbf = confusion_matrix(y_test, rbf_pred)
cm_sig = confusion_matrix(y_test, sig_pred)
print(cm_lin)
print(cm_poly)
print(cm_rbf)
print(cm_sig)

[[437 165 213 120 166]
 [191 254 100 141 164]
 [288 144 300  84 141]
 [201 199 109 427  84]
 [278 243 197 103 298]]
[[674  45 100 118 164]
 [203 238  26 123 260]
 [404  58 291  78 126]
 [190  39   9 715  67]
 [294 112  52  81 580]]
[[  89    0    3    0 1009]
 [   0   13    0    0  837]
 [   2    0   84    0  871]
 [   0    6    0   16  998]
 [   0    7    0    0 1112]]
[[388  60 126 270 257]
 [297 131  40 194 188]
 [345  47 133 205 227]
 [336  31  34 577  42]
 [368  97  86 217 351]]


In [25]:
%%time
param_grid = {'C': [0.1, 1, 10, 100],
              'degree': [1, 2, 3, 4, 5],
              'kernel': ['poly']}
 
grid = GridSearchCV(svm.SVC(cache_size=1000), param_grid, refit = True, verbose = 3)

CPU times: user 56 µs, sys: 8 µs, total: 64 µs
Wall time: 66.8 µs


In [26]:
%%time
grid.fit(x_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END ......C=0.1, degree=1, kernel=poly;, score=0.423 total time=  31.0s
[CV 2/5] END ......C=0.1, degree=1, kernel=poly;, score=0.427 total time=  31.1s
[CV 3/5] END ......C=0.1, degree=1, kernel=poly;, score=0.431 total time=  31.8s
[CV 4/5] END ......C=0.1, degree=1, kernel=poly;, score=0.424 total time=  34.1s
[CV 5/5] END ......C=0.1, degree=1, kernel=poly;, score=0.409 total time=  34.5s
[CV 1/5] END ......C=0.1, degree=2, kernel=poly;, score=0.384 total time=  36.6s
[CV 2/5] END ......C=0.1, degree=2, kernel=poly;, score=0.381 total time=  36.8s
[CV 3/5] END ......C=0.1, degree=2, kernel=poly;, score=0.375 total time=  36.2s
[CV 4/5] END ......C=0.1, degree=2, kernel=poly;, score=0.371 total time=  36.4s
[CV 5/5] END ......C=0.1, degree=2, kernel=poly;, score=0.379 total time=  47.6s
[CV 1/5] END ......C=0.1, degree=3, kernel=poly;, score=0.346 total time=  40.3s
[CV 2/5] END ......C=0.1, degree=3, kernel=poly

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100], 'degree': [1, 2, 3, 4, 5],
                         'kernel': ['poly']},
             verbose=3)

In [27]:
# print best parameter after tuning
print(grid.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)

{'C': 10, 'degree': 3, 'kernel': 'poly'}
SVC(C=10, kernel='poly')
