# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn import datasets, svm
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint
import time
import pickle
from sklearn.externals import joblib
from pprint import pprint
import random

# Import Dataset

In [2]:
digits = datasets.load_digits()
imgs = digits.images
labels = digits.target

In [3]:
print('Images Shape : ', imgs.shape)
print('Labels Shape : ', labels.shape)

Images Shape :  (1797, 8, 8)
Labels Shape :  (1797,)


In [4]:
x_train, x_test, y_train, y_test = train_test_split(imgs, labels, test_size=0.2, random_state=42)

In [5]:
print('Train Dataset', x_train.shape)
print('Test Dataset', x_test.shape)

print('Training Set Labels', y_train.shape)
print('Testing Set Labels', y_test.shape)

Train Dataset (1437, 8, 8)
Test Dataset (360, 8, 8)
Training Set Labels (1437,)
Testing Set Labels (360,)


# Reshape Dataset

In [6]:
train_samples, tr_x, tr_y = x_train.shape
train_dataset = x_train.reshape((train_samples, tr_x*tr_y))

test_samples, ts_x, ts_y = x_test.shape
test_dataset = x_test.reshape((test_samples, ts_x*ts_y))

print('Train Dataset', train_dataset.shape)
print('Test Dataset', test_dataset.shape)

print('Training Set Labels', y_train.shape)
print('Testing Set Labels', y_test.shape)

Train Dataset (1437, 64)
Test Dataset (360, 64)
Training Set Labels (1437,)
Testing Set Labels (360,)


# Save Dataset and Labels in MS Excel File

In [7]:
test_data_values = pd.DataFrame(test_dataset)
print(type(test_data_values))
file_test_data = 'test_data.xlsx'
test_data_values.to_excel(file_test_data, index=False, index_label=None, sheet_name='test_data', header = None)

test_label_values = pd.DataFrame(y_test)
print(type(test_label_values))
file_test_labels = 'test_labels.xlsx'
test_label_values.to_excel(file_test_labels, index=False, index_label=None, sheet_name='test_labels', header = None)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


# Define Parameters for Training

In [8]:
no_cv = 10
no_iter = 10
no_verbose = 1
no_process = -1

# Define SVM Classifier

In [9]:
clf_SVM_OVO = svm.SVC(decision_function_shape='ovo')
print('Default Parameters currently in use:\n')
pprint(clf_SVM_OVO.get_params())

Default Parameters currently in use:

{'C': 1.0,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovo',
 'degree': 3,
 'gamma': 'auto',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}


In [10]:
C = [0.001, 0.01, 0.1, 1, 5, 10, 50, 100, 500, 1000]
kernel = ['linear', 'rbf', 'sigmoid']
gamma = [0.0001, 0.001, 0.01, 0.1, 1]

In [11]:
param_SVM = {'C':C, 'kernel':kernel, 'gamma':gamma}

# Training for all possible parameters and finding best parameters

In [12]:
classifier_SVM_OVO = RandomizedSearchCV(estimator = clf_SVM_OVO, param_distributions = param_SVM, n_iter=no_iter, cv=no_cv, verbose=no_verbose, random_state=42, n_jobs=no_process)

# Fit the random search model
start_training_SVM = time.time()
classifier_SVM_OVO.fit(train_dataset, y_train)
end_training_SVM = time.time()

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    6.7s finished


# Training Time

In [13]:
training_time_SVM = end_training_SVM - start_training_SVM
print('Total Time : ', training_time_SVM)

Total Time :  6.994588613510132


# Found Attributes after training

In [14]:
# Best Accuracy score found among all possible combinations proposed by RandomizedSearchCV
print('Best Accuracy : ', classifier_SVM_OVO.best_score_)

# The index (of the cv_results_ arrays) which corresponds to the best candidate parameter setting.
print('Best Index : ', classifier_SVM_OVO.best_index_)

# Parameter setting that gave the best results on the hold out data.
print('Best Parameters : ', classifier_SVM_OVO.best_params_)

# Estimator that was chosen by the search, i.e. estimator which gave highest score
print('Best Estimator : ', classifier_SVM_OVO.best_estimator_)

# Mean Time of each cross validation (10 Mean Value for 10 Cross-Validations)
time_for_all_validations_SVM = classifier_SVM_OVO.cv_results_['mean_fit_time']
print('Mean Time of each validation : ', time_for_all_validations_SVM)

# Time of all cross-validations (10) of best index
time_best_index_SVM = time_for_all_validations_SVM[classifier_SVM_OVO.best_index_]
print('Time for all cross-validations of best index : ', time_best_index_SVM*no_cv, 'seconds')

Best Accuracy :  0.9909533750869868
Best Index :  6
Best Parameters :  {'kernel': 'rbf', 'gamma': 0.001, 'C': 5}
Best Estimator :  SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Mean Time of each validation :  [0.47367992 0.04315155 0.55085719 0.0433496  0.05629547 0.28415775
 0.11229384 0.04587891 0.31649759 0.36520424]
Time for all cross-validations of best index :  1.1229383945465088 seconds


# Export Best Parameters

In [15]:
param = classifier_SVM_OVO.best_params_
print(type(param))
print(dict.keys(param))

Kernel_Parameter = param.get("kernel")
C_Parameter = param.get("C")
Gamma_Parameter = param.get("gamma")

print('Kernel : ', Kernel_Parameter)
print('C : ', C_Parameter)
print('Gamma : ', Gamma_Parameter)

print(type(Kernel_Parameter))
print(type(C_Parameter))
print(type(Gamma_Parameter))

parameters = np.array([Kernel_Parameter, int(C_Parameter), float(Gamma_Parameter)])
print(type(parameters))
print(parameters)

<class 'dict'>
dict_keys(['kernel', 'gamma', 'C'])
Kernel :  rbf
C :  5
Gamma :  0.001
<class 'str'>
<class 'int'>
<class 'float'>
<class 'numpy.ndarray'>
['rbf' '5' '0.001']


In [16]:
#parameter_values = pd.DataFrame(parameters)
#print(type(parameter_values))
#file_parameter_values = r'E:\Masters\01-Study\Master Thesis\1-Working\3rd Task (Laptop C implementation)\2-Load Files and Work C++\Implement Shad Program\parameters.xlsx'
#parameter_values.to_excel(file_parameter_values, index=False, index_label=None, sheet_name='parameters', header = None)

import xlsxwriter

workbook = xlsxwriter.Workbook('parameters.xlsx', {'strings_to_numbers': True})
worksheet = workbook.add_worksheet()

worksheet.write(0, 0, Kernel_Parameter)
worksheet.write(1, 0, C_Parameter)
worksheet.write(2, 0, Gamma_Parameter) # Write this string as a number.

workbook.close()

# Indices of Support Vectors

In [17]:
print(classifier_SVM_OVO.best_estimator_.support_.shape)
print(classifier_SVM_OVO.best_estimator_.support_)

(720,)
[   6   47   68  122  145  161  273  306  318  409  505  582  589  608
  623  635  658  665  762  788  812  840  855  857  880  891  954  970
  994  997 1019 1047 1071 1084 1177 1197 1203 1205 1268 1337 1350 1352
 1418   17   36   38   39   66   80  119  120  131  136  138  139  151
  153  155  158  167  182  212  220  221  235  247  266  270  292  302
  311  312  330  387  411  453  488  489  496  513  529  558  559  561
  576  577  640  652  660  689  690  702  721  748  753  760  772  778
  797  801  837  841  893  908  931  934  935  936  981 1006 1040 1052
 1054 1101 1123 1127 1145 1167 1170 1179 1187 1192 1214 1264 1265 1277
 1298 1310 1313 1316 1347 1354 1374 1399 1426 1436   10   21   65   91
   99  144  162  219  262  263  268  272  282  293  366  368  373  390
  408  412  423  443  450  456  457  484  491  538  549  569  593  644
  704  779  818  828  849  860  881  892  904  939  949  965  973  992
 1004 1020 1034 1073 1114 1130 1135 1222 1230 1232 1270 1280 1291 1293

# Support Vectors (Value of x_i)

In [18]:
print(classifier_SVM_OVO.best_estimator_.support_vectors_.shape)
print(classifier_SVM_OVO.best_estimator_.support_vectors_)

(720, 64)
[[ 0.  0.  0. ...  9.  0.  0.]
 [ 0.  0.  1. ... 12.  1.  0.]
 [ 0.  0.  7. ...  0.  0.  0.]
 ...
 [ 0.  0.  8. ... 16. 12.  0.]
 [ 0.  0.  7. ...  9.  0.  0.]
 [ 0.  0.  2. ... 16. 10.  0.]]


##### Save the Support Vectors

In [19]:
xi_values = pd.DataFrame(classifier_SVM_OVO.best_estimator_.support_vectors_)
print(type(xi_values))
file_xi = 'xi.xlsx'
xi_values.to_excel(file_xi,index=False, index_label=None, sheet_name='xi', header = None)

<class 'pandas.core.frame.DataFrame'>


# Number of Support Vectors for each class

In [20]:
print(classifier_SVM_OVO.best_estimator_.n_support_.shape)
print(classifier_SVM_OVO.best_estimator_.n_support_)

(10,)
[43 93 68 73 69 70 50 71 98 85]


##### Save the number of Support Vectors for each class

In [21]:
no_SV = pd.DataFrame(classifier_SVM_OVO.best_estimator_.n_support_)
print(type(no_SV))
file_noSV = 'no_SV.xlsx'
no_SV.to_excel(file_noSV,index=False, index_label=None, sheet_name='no_SV', header = None)

<class 'pandas.core.frame.DataFrame'>


# Coefficients of the support vector in the decision function (value of alpha)

In [22]:
print(classifier_SVM_OVO.best_estimator_.dual_coef_.shape)
print(classifier_SVM_OVO.best_estimator_.dual_coef_)

(9, 720)
[[ 0.20713259  0.          0.         ... -0.         -0.
  -0.03174388]
 [ 0.          0.          0.         ... -0.         -0.
  -0.25606236]
 [ 0.          0.          0.         ... -0.         -0.
  -0.06641471]
 ...
 [ 0.          0.          0.         ... -0.         -0.
  -0.19328002]
 [ 0.17062977  0.          0.         ... -0.         -0.
  -0.09064573]
 [ 0.10543021  0.          0.         ... -0.         -0.
  -0.08414877]]


##### Save the values of 'alpha'

In [23]:
alpha_values = pd.DataFrame(classifier_SVM_OVO.best_estimator_.dual_coef_)
print(type(alpha_values))
file_alpha = 'alpha.xlsx'
alpha_values.to_excel(file_alpha, index=False, index_label=None, sheet_name='alpha', header = None)

<class 'pandas.core.frame.DataFrame'>


# Constants in decision function (Value of b)

In [24]:
print(classifier_SVM_OVO.best_estimator_.intercept_.shape)
print(classifier_SVM_OVO.best_estimator_.intercept_)

(45,)
[-0.48692391 -0.41554004 -0.35890656 -0.43998608 -0.48392321 -0.24585528
 -0.39171955 -0.46049463 -0.45239063  0.15986714  0.25218499  0.10098672
  0.12346392  0.30977389  0.0785207   0.18506128  0.14811355  0.06986461
 -0.03270043 -0.02567471  0.23355386 -0.05376773 -0.0212323  -0.03253961
 -0.13049437 -0.11872675  0.13591256 -0.15574711 -0.08873775 -0.11155316
  0.05940127  0.29253786  0.02825803  0.00564283 -0.0235015   0.31142658
 -0.00090015  0.06463665 -0.08438994 -0.22130744 -0.31815664 -0.26245467
  0.09826874 -0.01396766 -0.122389  ]


##### Save the values of 'b'

In [25]:
bias_values = pd.DataFrame(classifier_SVM_OVO.best_estimator_.intercept_)
print(type(bias_values))
file_bias = 'bias.xlsx'
bias_values.to_excel(file_bias, index=False, index_label=None, sheet_name='bias', header = None)

<class 'pandas.core.frame.DataFrame'>


# Labels of Support Vectors

In [26]:
print(y_train.shape)
y_train[(classifier_SVM_OVO.best_estimator_.support_)].shape
y_train[(classifier_SVM_OVO.best_estimator_.support_)]

(1437,)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,

##### Save the Labels of Support Vectors

In [27]:
labels_SV = pd.DataFrame(y_train[(classifier_SVM_OVO.best_estimator_.support_)])
print(type(labels_SV))
file_labels_SV = 'labels_SV.xlsx'
labels_SV.to_excel(file_labels_SV, index=False, index_label=None, sheet_name='labels_SV', header = None)

<class 'pandas.core.frame.DataFrame'>


# Predict

In [28]:
start_testing_SVM = time.time()
predict_SVM = classifier_SVM_OVO.predict(test_dataset)
end_testing_SVM = time.time()

##### Show Prediction Time and Accuracy

In [29]:
test_score_SVM = classifier_SVM_OVO.score(test_dataset, y_test)
print('Test Accuracy : ', test_score_SVM)

testing_time_SVM = end_testing_SVM - start_testing_SVM
print('Prediction Time : ', testing_time_SVM)

Test Accuracy :  0.9888888888888889
Prediction Time :  0.023310422897338867


In [30]:
print('Actual Labels Shape : ', y_test.shape)
print('Predicted Labels Shape : ', predict_SVM.shape)
print('Actual Labels : ', y_test)
print('Predicted Labels : ', predict_SVM)

Actual Labels Shape :  (360,)
Predicted Labels Shape :  (360,)
Actual Labels :  [6 9 3 7 2 1 5 2 5 2 1 9 4 0 4 2 3 7 8 8 4 3 9 7 5 6 3 5 6 3 4 9 1 4 4 6 9
 4 7 6 6 9 1 3 6 1 3 0 6 5 5 1 9 5 6 0 9 0 0 1 0 4 5 2 4 5 7 0 7 5 9 5 5 4
 7 0 4 5 5 9 9 0 2 3 8 0 6 4 4 9 1 2 8 3 5 2 9 0 4 4 4 3 5 3 1 3 5 9 4 2 7
 7 4 4 1 9 2 7 8 7 2 6 9 4 0 7 2 7 5 8 7 5 7 7 0 6 6 4 2 8 0 9 4 6 9 9 6 9
 0 3 5 6 6 0 6 4 3 9 3 9 7 2 9 0 4 5 3 6 5 9 9 8 4 2 1 3 7 7 2 2 3 9 8 0 3
 2 2 5 6 9 9 4 1 5 4 2 3 6 4 8 5 9 5 7 8 9 4 8 1 5 4 4 9 6 1 8 6 0 4 5 2 7
 4 6 4 5 6 0 3 2 3 6 7 1 5 1 4 7 6 8 8 5 5 1 6 2 8 8 9 9 7 6 2 2 2 3 4 8 8
 3 6 0 9 7 7 0 1 0 4 5 1 5 3 6 0 4 1 0 0 3 6 5 9 7 3 5 5 9 9 8 5 3 3 2 0 5
 8 3 4 0 2 4 6 4 3 4 5 0 5 2 1 3 1 4 1 1 7 0 1 5 2 1 2 8 7 0 6 4 8 8 5 1 8
 4 5 8 7 9 8 5 0 6 2 0 7 9 8 9 5 2 7 7 1 8 7 4 3 8 3 5]
Predicted Labels :  [6 9 3 7 2 1 5 2 5 2 1 9 4 0 4 2 3 7 8 8 4 3 9 7 5 6 3 5 6 3 4 9 1 4 4 6 9
 4 7 6 6 9 1 3 6 1 3 0 6 5 5 1 9 5 6 0 9 0 0 1 0 4 5 2 4 5 7 0 7 5 9 5 5 4
 7 0 4 5 5 9 9 0 2 

##### Save predicted labels

In [31]:
predict_SVM = pd.DataFrame(predict_SVM)
print(type(predict_SVM))
file_predict_results = 'predictions.xlsx'
predict_SVM.to_excel(file_predict_results, index=False, index_label=None, sheet_name='predictions', header = None)

<class 'pandas.core.frame.DataFrame'>


# Work for single Test Element

##### Load first element and reshape 

In [35]:
A=test_dataset[0,:]
B = np.reshape(A, (1, -1))

##### Predict value for chosen test element

In [36]:
classifier_SVM_OVO.best_estimator_.predict(B)  

array([6])

##### Check Decision Function

In [56]:
classifier_SVM_OVO.best_estimator_.decision_function(B)

array([[-0.10576299,  0.08611723,  0.02455942, -0.05644037, -0.14910456,
        -1.00355837,  0.12191181, -0.28268428, -0.09108589,  0.32580151,
         0.267082  ,  0.02580776,  0.10411427, -1.31007137,  0.33439272,
        -0.34777606, -0.04950281, -0.050918  , -0.1040734 , -0.17583958,
        -1.2439709 ,  0.13014175, -0.6494695 , -0.27394263, -0.18291351,
        -0.33950794, -1.22099642,  0.04059798, -0.79053617, -0.30366229,
         0.00362853, -1.22483273,  0.36530671, -0.402442  , -0.04719855,
        -1.23001569,  0.3076673 , -0.27545247,  0.15341093,  1.17550986,
         1.15629277,  1.13029723, -0.64429581, -0.33669679,  0.50455802]])