# MACHINE LEARNING _ SVM
## Using Principal Component Analysis
### Using (x-mean)/std normalization

In [41]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import h5py
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing, neighbors, naive_bayes, tree, svm, linear_model, discriminant_analysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.decomposition import PCA
import pickle
#from xgboost import XGBRegressor

In [42]:
# Reading the csv dataset
eeg_raw = pd.read_csv(r'Dataset/one_to_five.csv')

# dropping 'y' & 'Unnamed: 180' columns
eeg_raw_drop_out = eeg_raw.drop(['y','Unnamed: 180' ], axis = 1)

# dropping 'Unnamed: 0' column
eeg_raw_drop_io = eeg_raw_drop_out.drop(['Unnamed: 0'], axis = 1)

In [43]:
# Normalization
mean = np.mean(eeg_raw_drop_io, axis = 0)
std = np.std(eeg_raw_drop_io, axis = 0)
norm_input = (eeg_raw_drop_io - mean)/std

In [44]:
# Preparing Output

# classes = ['Recording of seizure activites','Healthy Patient EEG from Tumour region','Healthy Patient EEG from Healthy region','Healthy Patient EEG with Eyes Close','Healthy Patient EEG with Eyes Open']
raw_y = eeg_raw['y']
targ = pd.DataFrame(list(zip(raw_y)),columns=['one_hot_y'])
# Converting classes into one_hot encoded
target = pd.get_dummies(targ.one_hot_y)
# previewing the one_hot encoded result
target

Unnamed: 0,1,2,3,4,5
0,0,0,0,1,0
1,1,0,0,0,0
2,0,0,0,0,1
3,0,0,0,0,1
4,0,0,0,0,1
...,...,...,...,...,...
11495,0,1,0,0,0
11496,1,0,0,0,0
11497,0,0,0,0,1
11498,0,0,1,0,0


In [45]:
# converting input and target into array
norm_input_array = np.array(norm_input)
target_array = np.array(target)
raw_y_array = np.array(raw_y)

In [46]:
print("Input Shape: ", norm_input_array.shape)
print("Output Shape: ", target_array.shape)
print("Output Shape_Y: ", raw_y_array.shape)
print("Input Datatype: ", norm_input_array.dtype)
print("Output Datatype: ", target_array.dtype)
print("Output Datatype: ", raw_y_array.dtype)

Input Shape:  (11500, 178)
Output Shape:  (11500, 5)
Output Shape_Y:  (11500,)
Input Datatype:  float64
Output Datatype:  uint8
Output Datatype:  int64


In [47]:
# Shuffle the dataset
x, y, ry = shuffle(norm_input_array, target_array, raw_y_array)

# Splitting x and y, X = 60%, Y = 60%
X_train, x_test = train_test_split(x, test_size = 0.4, shuffle = False)
Y_train, y_test = train_test_split(y, test_size = 0.4, shuffle = False)
RY_train, ry_test = train_test_split(ry, test_size = 0.4, shuffle = False)

# Splitting X_test and Y_test into 20% of the overall dataset  
X_cv, X_test = train_test_split(x_test, test_size = 0.5, shuffle = False)
Y_cv, Y_test = train_test_split(y_test, test_size = 0.5, shuffle = False)
RY_cv, RY_test = train_test_split(ry_test, test_size = 0.5, shuffle = False)

In [48]:
print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("Raw_Y_train shape: ", RY_train.shape)
print("X_cv shape: ", X_cv.shape)
print("Y_cv shape: ", Y_cv.shape)
print("Raw_Y_cv shape: ", RY_cv.shape)
print("X_test shape: ", X_test.shape)
print("Y_test shape: ", Y_test.shape)
print("Raw_Y_test shape: ", RY_test.shape)

X_train shape:  (6900, 178)
Y_train shape:  (6900, 5)
Raw_Y_train shape:  (6900,)
X_cv shape:  (2300, 178)
Y_cv shape:  (2300, 5)
Raw_Y_cv shape:  (2300,)
X_test shape:  (2300, 178)
Y_test shape:  (2300, 5)
Raw_Y_test shape:  (2300,)


In [49]:
# Logistics Regression
lr = linear_model.LogisticRegression(multi_class = 'auto', solver='liblinear')
# K Nearest Neighbor
KNN = neighbors.KNeighborsClassifier(n_neighbors=178)
# Support Vector Machine (SVM) Guassian Kernel
SVC = svm.SVC(kernel='rbf')
# Support Vector Machine (SVM) Polynomial
SVC2 = svm.SVC(kernel='poly', degree = 2)
# Decision Tree
Tree = tree.DecisionTreeClassifier()
# Random Forest Tree
RF = RandomForestClassifier(max_depth=10, random_state=0)
# Naive Bayes
NB = naive_bayes.GaussianNB()
# Discriminant Analysis
DA = discriminant_analysis.LinearDiscriminantAnalysis()

In [50]:
# Initialize PCA
pca = PCA(n_components = 100)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
X_cv_pca = pca.transform(X_cv)

In [51]:
#Fitting the Model
lr.fit(X_train_pca, RY_train)
KNN.fit(X_train_pca, RY_train)
SVC.fit(X_train_pca, RY_train)
SVC2.fit(X_train_pca, RY_train)
Tree.fit(X_train_pca, RY_train)
RF.fit(X_train_pca, RY_train)
NB.fit(X_train_pca, RY_train)
DA.fit(X_train_pca, RY_train)

LinearDiscriminantAnalysis()

In [52]:
# Testing the Model
#lr_acc = lr.score(X_cv_pca, Y_cv)
KNN_acc = KNN.score(X_cv_pca, RY_cv)
SVC_acc = SVC.score(X_cv_pca, RY_cv)
SVC2_acc = SVC2.score(X_cv_pca, RY_cv)
Tree_acc = Tree.score(X_cv_pca, RY_cv)
RF_acc = RF.score(X_cv_pca, RY_cv)
NB_acc = NB.score(X_cv_pca, RY_cv)
DA_acc = DA.score(X_cv_pca, RY_cv)

In [53]:
# Printing Accuracy
#print("Logisitics Regression Accuracy: ", lr_acc)
print("K Nearest Neighbour Accuracy: ", KNN_acc)
print("Support Vector Accuracy: ", SVC_acc)
print("Support Vector (Polynomial) Accuracy: ", SVC2_acc)
print("Decision Tree Accuracy: ", Tree_acc)
print("Random Forest Accuracy: ", RF_acc)
print("Naive Bayes Accuracy: ", NB_acc)
print("Discriminant Analysis Accuracy: ", DA_acc)

K Nearest Neighbour Accuracy:  0.27260869565217394
Support Vector Accuracy:  0.5417391304347826
Support Vector (Polynomial) Accuracy:  0.47739130434782606
Decision Tree Accuracy:  0.5413043478260869
Random Forest Accuracy:  0.6647826086956522
Naive Bayes Accuracy:  0.5947826086956521
Discriminant Analysis Accuracy:  0.25608695652173913


In [54]:
# Prediction
lr_y_pred = lr.predict(X_test_pca)
KNN_y_pred = KNN.predict(X_test_pca)
SVC_y_pred = SVC.predict(X_test_pca)
SVC2_y_pred = SVC2.predict(X_test_pca)
Tree_y_pred = Tree.predict(X_test_pca)
RF_y_pred = RF.predict(X_test_pca)
NB_y_pred = NB.predict(X_test_pca)
DA_y_pred = DA.predict(X_test_pca)


In [55]:
#print(confusion_matrix(RY_test, lr_y_pred))
#print(confusion_matrix(RY_test, lr_y_pred, normalize = 'true'))
#print(classification_report(RY_test, lr_y_pred))

In [56]:
print(confusion_matrix(RY_test, KNN_y_pred))
print(confusion_matrix(RY_test, KNN_y_pred, normalize = 'true'))
print(classification_report(RY_test, KNN_y_pred))

[[ 80  94 181 110  26]
 [  0 197 238   2  10]
 [  0 134 274   0  26]
 [  0 106 174  30 122]
 [  0 189 269   1  37]]
[[0.16293279 0.19144603 0.36863544 0.22403259 0.05295316]
 [0.         0.44071588 0.53243848 0.00447427 0.02237136]
 [0.         0.30875576 0.63133641 0.         0.05990783]
 [0.         0.24537037 0.40277778 0.06944444 0.28240741]
 [0.         0.38104839 0.54233871 0.00201613 0.07459677]]
              precision    recall  f1-score   support

           1       1.00      0.16      0.28       491
           2       0.27      0.44      0.34       447
           3       0.24      0.63      0.35       434
           4       0.21      0.07      0.10       432
           5       0.17      0.07      0.10       496

    accuracy                           0.27      2300
   macro avg       0.38      0.28      0.23      2300
weighted avg       0.39      0.27      0.23      2300



In [57]:
print(confusion_matrix(RY_test, SVC_y_pred))
print(confusion_matrix(RY_test, SVC_y_pred, normalize = 'true'))
print(classification_report(RY_test, SVC_y_pred))

[[446  33   5   7   0]
 [ 23  80  95   6 243]
 [  1  53 125  14 241]
 [  0  31  39 215 147]
 [  0  10  45  30 411]]
[[0.90835031 0.06720978 0.0101833  0.01425662 0.        ]
 [0.05145414 0.17897092 0.21252796 0.01342282 0.54362416]
 [0.00230415 0.12211982 0.28801843 0.03225806 0.55529954]
 [0.         0.07175926 0.09027778 0.49768519 0.34027778]
 [0.         0.02016129 0.09072581 0.06048387 0.82862903]]
              precision    recall  f1-score   support

           1       0.95      0.91      0.93       491
           2       0.39      0.18      0.24       447
           3       0.40      0.29      0.34       434
           4       0.79      0.50      0.61       432
           5       0.39      0.83      0.53       496

    accuracy                           0.56      2300
   macro avg       0.58      0.54      0.53      2300
weighted avg       0.59      0.56      0.54      2300



In [58]:
print(confusion_matrix(RY_test, SVC2_y_pred))
print(confusion_matrix(RY_test, SVC2_y_pred, normalize = 'true'))
print(classification_report(RY_test, SVC2_y_pred))

[[417  24  34   9   7]
 [ 12  18 102  11 304]
 [  0   3 107  14 310]
 [  0   0   7 145 280]
 [  0   0  12  38 446]]
[[0.84928717 0.04887984 0.06924644 0.01832994 0.01425662]
 [0.02684564 0.04026846 0.22818792 0.0246085  0.68008949]
 [0.         0.00691244 0.24654378 0.03225806 0.71428571]
 [0.         0.         0.0162037  0.33564815 0.64814815]
 [0.         0.         0.02419355 0.0766129  0.89919355]]
              precision    recall  f1-score   support

           1       0.97      0.85      0.91       491
           2       0.40      0.04      0.07       447
           3       0.41      0.25      0.31       434
           4       0.67      0.34      0.45       432
           5       0.33      0.90      0.48       496

    accuracy                           0.49      2300
   macro avg       0.56      0.47      0.44      2300
weighted avg       0.56      0.49      0.45      2300



In [59]:
print(confusion_matrix(RY_test, Tree_y_pred))
print(confusion_matrix(RY_test, Tree_y_pred, normalize = 'true'))
print(classification_report(RY_test, Tree_y_pred))

[[365  49  28  32  17]
 [ 17 192 158  37  43]
 [  9 169 183  29  44]
 [ 16  24  26 282  84]
 [  4  63  54 111 264]]
[[0.74338086 0.09979633 0.05702648 0.06517312 0.03462322]
 [0.03803132 0.4295302  0.35346756 0.08277405 0.09619687]
 [0.02073733 0.38940092 0.42165899 0.06682028 0.10138249]
 [0.03703704 0.05555556 0.06018519 0.65277778 0.19444444]
 [0.00806452 0.12701613 0.10887097 0.22379032 0.53225806]]
              precision    recall  f1-score   support

           1       0.89      0.74      0.81       491
           2       0.39      0.43      0.41       447
           3       0.41      0.42      0.41       434
           4       0.57      0.65      0.61       432
           5       0.58      0.53      0.56       496

    accuracy                           0.56      2300
   macro avg       0.57      0.56      0.56      2300
weighted avg       0.58      0.56      0.56      2300



In [60]:
print(confusion_matrix(RY_test, RF_y_pred))
print(confusion_matrix(RY_test, RF_y_pred, normalize = 'true'))
print(classification_report(RY_test, RF_y_pred))

[[475   1   1  10   4]
 [ 28  31 296  29  63]
 [  7  16 314  31  66]
 [  3   0   1 355  73]
 [  0   0  28  88 380]]
[[0.96741344 0.00203666 0.00203666 0.0203666  0.00814664]
 [0.06263982 0.06935123 0.66219239 0.06487696 0.1409396 ]
 [0.01612903 0.03686636 0.7235023  0.07142857 0.15207373]
 [0.00694444 0.         0.00231481 0.82175926 0.16898148]
 [0.         0.         0.05645161 0.17741935 0.76612903]]
              precision    recall  f1-score   support

           1       0.93      0.97      0.95       491
           2       0.65      0.07      0.13       447
           3       0.49      0.72      0.58       434
           4       0.69      0.82      0.75       432
           5       0.65      0.77      0.70       496

    accuracy                           0.68      2300
   macro avg       0.68      0.67      0.62      2300
weighted avg       0.69      0.68      0.63      2300



In [61]:
print(confusion_matrix(RY_test, NB_y_pred))
print(confusion_matrix(RY_test, NB_y_pred, normalize = 'true'))
print(classification_report(RY_test, NB_y_pred))

[[419  36   1  35   0]
 [ 12  87 321  16  11]
 [  4  32 345  41  12]
 [  6  29  23 294  80]
 [  2  13 151  52 278]]
[[0.85336049 0.07331976 0.00203666 0.0712831  0.        ]
 [0.02684564 0.19463087 0.71812081 0.03579418 0.0246085 ]
 [0.00921659 0.07373272 0.79493088 0.09447005 0.02764977]
 [0.01388889 0.06712963 0.05324074 0.68055556 0.18518519]
 [0.00403226 0.02620968 0.30443548 0.10483871 0.56048387]]
              precision    recall  f1-score   support

           1       0.95      0.85      0.90       491
           2       0.44      0.19      0.27       447
           3       0.41      0.79      0.54       434
           4       0.67      0.68      0.68       432
           5       0.73      0.56      0.63       496

    accuracy                           0.62      2300
   macro avg       0.64      0.62      0.60      2300
weighted avg       0.65      0.62      0.61      2300



In [62]:
print(confusion_matrix(RY_test, DA_y_pred))
print(confusion_matrix(RY_test, DA_y_pred, normalize = 'true'))
print(classification_report(RY_test, DA_y_pred))

[[179  74  62 125  51]
 [ 29  79 145 151  43]
 [ 17  81 143 157  36]
 [ 54  67  84 184  43]
 [ 46 119 133 148  50]]
[[0.36456212 0.15071283 0.12627291 0.25458248 0.10386965]
 [0.06487696 0.17673378 0.32438479 0.33780761 0.09619687]
 [0.03917051 0.18663594 0.32949309 0.36175115 0.08294931]
 [0.125      0.15509259 0.19444444 0.42592593 0.09953704]
 [0.09274194 0.23991935 0.26814516 0.2983871  0.10080645]]
              precision    recall  f1-score   support

           1       0.55      0.36      0.44       491
           2       0.19      0.18      0.18       447
           3       0.25      0.33      0.29       434
           4       0.24      0.43      0.31       432
           5       0.22      0.10      0.14       496

    accuracy                           0.28      2300
   macro avg       0.29      0.28      0.27      2300
weighted avg       0.30      0.28      0.27      2300



In [63]:
#Saving Model
#pickle.dump(lr, open('1d_ML_lr.sav', 'wb'))
pickle.dump(KNN, open('1d_ML_KNN.sav', 'wb'))
pickle.dump(SVC, open('1d_ML_SVC.sav', 'wb'))
pickle.dump(SVC2, open('1d_ML_SVC2.sav', 'wb'))
pickle.dump(Tree, open('1d_ML_Tree.sav', 'wb'))
pickle.dump(RF, open('1d_ML_RF.sav', 'wb'))
pickle.dump(NB, open('1d_ML_NB.sav', 'wb'))
pickle.dump(DA, open('1d_ML_DA.sav', 'wb'))

In [66]:
#loading model
'''#lr = pickle.load(open('1d_ML_lr.sav', 'rb'))
KNN = pickle.load(open('1d_ML_KNN.sav', 'rb'))
SVC = pickle.load(open('1d_ML_SVC.sav', 'rb'))
SVC2 = pickle.load(open('1d_ML_SVC2.sav', 'rb'))
Tree = pickle.load(open('1d_ML_Tree.sav', 'rb'))
RF = pickle.load(open('1d_ML_RF.sav', 'rb'))
NB = pickle.load(open('1d_ML_NB.sav', 'rb'))
DA = pickle.load(open('1d_ML_DA.sav', 'rb'))'''