# MACHINE LEARNING _ SVM
### Using (x-mean)/std normalization

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import h5py
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing, neighbors, naive_bayes, tree, svm, linear_model, discriminant_analysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
import pickle
#from xgboost import XGBRegressor

In [2]:
# Reading the csv dataset
eeg_raw = pd.read_csv(r'Dataset/one_to_five.csv')

# dropping 'y' & 'Unnamed: 180' columns
eeg_raw_drop_out = eeg_raw.drop(['y','Unnamed: 180' ], axis = 1)

# dropping 'Unnamed: 0' column
eeg_raw_drop_io = eeg_raw_drop_out.drop(['Unnamed: 0'], axis = 1)

In [3]:
# Normalization
mean = np.mean(eeg_raw_drop_io, axis = 0)
std = np.std(eeg_raw_drop_io, axis = 0)
norm_input = (eeg_raw_drop_io - mean)/std

In [4]:
# Preparing Output

# classes = ['Recording of seizure activites','Healthy Patient EEG from Tumour region','Healthy Patient EEG from Healthy region','Healthy Patient EEG with Eyes Close','Healthy Patient EEG with Eyes Open']
raw_y = eeg_raw['y']
targ = pd.DataFrame(list(zip(raw_y)),columns=['one_hot_y'])
# Converting classes into one_hot encoded
target = pd.get_dummies(targ.one_hot_y)
# previewing the one_hot encoded result
target

Unnamed: 0,1,2,3,4,5
0,0,0,0,1,0
1,1,0,0,0,0
2,0,0,0,0,1
3,0,0,0,0,1
4,0,0,0,0,1
...,...,...,...,...,...
11495,0,1,0,0,0
11496,1,0,0,0,0
11497,0,0,0,0,1
11498,0,0,1,0,0


In [5]:
# converting input and target into array
norm_input_array = np.array(norm_input)
target_array = np.array(target)
raw_y_array = np.array(raw_y)

In [6]:
print("Input Shape: ", norm_input_array.shape)
print("Output Shape: ", target_array.shape)
print("Output Shape_Y: ", raw_y_array.shape)
print("Input Datatype: ", norm_input_array.dtype)
print("Output Datatype: ", target_array.dtype)
print("Output Datatype: ", raw_y_array.dtype)

Input Shape:  (11500, 178)
Output Shape:  (11500, 5)
Output Shape_Y:  (11500,)
Input Datatype:  float64
Output Datatype:  uint8
Output Datatype:  int64


In [7]:
# Shuffle the dataset
x, y, ry = shuffle(norm_input_array, target_array, raw_y_array)

# Splitting x and y, X = 60%, Y = 60%
X_train, x_test = train_test_split(x, test_size = 0.4, shuffle = False)
Y_train, y_test = train_test_split(y, test_size = 0.4, shuffle = False)
RY_train, ry_test = train_test_split(ry, test_size = 0.4, shuffle = False)

# Splitting X_test and Y_test into 20% of the overall dataset  
X_cv, X_test = train_test_split(x_test, test_size = 0.5, shuffle = False)
Y_cv, Y_test = train_test_split(y_test, test_size = 0.5, shuffle = False)
RY_cv, RY_test = train_test_split(ry_test, test_size = 0.5, shuffle = False)

In [8]:
print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("Raw_Y_train shape: ", RY_train.shape)
print("X_cv shape: ", X_cv.shape)
print("Y_cv shape: ", Y_cv.shape)
print("Raw_Y_cv shape: ", RY_cv.shape)
print("X_test shape: ", X_test.shape)
print("Y_test shape: ", Y_test.shape)
print("Raw_Y_test shape: ", RY_test.shape)

X_train shape:  (6900, 178)
Y_train shape:  (6900, 5)
Raw_Y_train shape:  (6900,)
X_cv shape:  (2300, 178)
Y_cv shape:  (2300, 5)
Raw_Y_cv shape:  (2300,)
X_test shape:  (2300, 178)
Y_test shape:  (2300, 5)
Raw_Y_test shape:  (2300,)


In [9]:
# Logistics Regression
lr = linear_model.LogisticRegression()
# K Nearest Neighbor
KNN = neighbors.KNeighborsClassifier(n_neighbors=178)
# Support Vector Machine (SVM) Guassian Kernel
SVC = svm.SVC(kernel='rbf')
# Support Vector Machine (SVM) Polynomial
SVC2 = svm.SVC(kernel='poly', degree = 2)
# Decision Tree
Tree = tree.DecisionTreeClassifier()
# Random Forest Tree
RF = RandomForestClassifier(max_depth=10, random_state=0)
# Naive Bayes
NB = naive_bayes.GaussianNB()
# Discriminant Analysis
DA = discriminant_analysis.LinearDiscriminantAnalysis()

In [10]:
#Fitting the Model
#lr.fit(X_train, RY_train)
KNN.fit(X_train, RY_train)
SVC.fit(X_train, RY_train)
SVC2.fit(X_train, RY_train)
Tree.fit(X_train, RY_train)
RF.fit(X_train, RY_train)
NB.fit(X_train, RY_train)
DA.fit(X_train, RY_train)

LinearDiscriminantAnalysis()

In [11]:
# Testing the Model
#lr_acc = lr.score(X_cv, Y_cv)
KNN_acc = KNN.score(X_cv, RY_cv)
SVC_acc = SVC.score(X_cv, RY_cv)
SVC2_acc = SVC2.score(X_cv, RY_cv)
Tree_acc = Tree.score(X_cv, RY_cv)
RF_acc = RF.score(X_cv, RY_cv)
NB_acc = NB.score(X_cv, RY_cv)
DA_acc = DA.score(X_cv, RY_cv)

In [12]:
# Printing Accuracy
#print("Logisitics Regression Accuracy: ", lr_acc)
print("K Nearest Neighbour Accuracy: ", KNN_acc)
print("Support Vector Accuracy: ", SVC_acc)
print("Support Vector (Polynomial) Accuracy: ", SVC2_acc)
print("Decision Tree Accuracy: ", Tree_acc)
print("Random Forest Accuracy: ", RF_acc)
print("Naive Bayes Accuracy: ", NB_acc)
print("Discriminant Analysis Accuracy: ", DA_acc)

K Nearest Neighbour Accuracy:  0.2582608695652174
Support Vector Accuracy:  0.5504347826086956
Support Vector (Polynomial) Accuracy:  0.46260869565217394
Decision Tree Accuracy:  0.4752173913043478
Random Forest Accuracy:  0.5347826086956522
Naive Bayes Accuracy:  0.45
Discriminant Analysis Accuracy:  0.26


In [13]:
# Prediction
#lr_y_pred = lr.predict(X_test)
KNN_y_pred = KNN.predict(X_test)
SVC_y_pred = SVC.predict(X_test)
SVC2_y_pred = SVC2.predict(X_test)
Tree_y_pred = Tree.predict(X_test)
RF_y_pred = RF.predict(X_test)
NB_y_pred = NB.predict(X_test)
DA_y_pred = DA.predict(X_test)


In [14]:
#print(confusion_matrix(RY_test, lr_y_pred))
#print(confusion_matrix(RY_test, lr_y_pred, normalize = 'true'))
#print(classification_report(RY_test, lr_y_pred))

In [15]:
print(confusion_matrix(RY_test, KNN_y_pred))
print(confusion_matrix(RY_test, KNN_y_pred, normalize = 'true'))
print(classification_report(RY_test, KNN_y_pred))

[[ 72 106 168  77  34]
 [  0 189 248   0  16]
 [  0 136 278   0  38]
 [  0 114 193  40 132]
 [  0 133 272   0  54]]
[[0.15754923 0.23194748 0.36761488 0.16849015 0.07439825]
 [0.         0.41721854 0.54746137 0.         0.03532009]
 [0.         0.30088496 0.61504425 0.         0.0840708 ]
 [0.         0.23799582 0.40292276 0.08350731 0.27557411]
 [0.         0.28976035 0.59259259 0.         0.11764706]]
              precision    recall  f1-score   support

           1       1.00      0.16      0.27       457
           2       0.28      0.42      0.33       453
           3       0.24      0.62      0.35       452
           4       0.34      0.08      0.13       479
           5       0.20      0.12      0.15       459

    accuracy                           0.28      2300
   macro avg       0.41      0.28      0.25      2300
weighted avg       0.41      0.28      0.25      2300



In [16]:
print(confusion_matrix(RY_test, SVC_y_pred))
print(confusion_matrix(RY_test, SVC_y_pred, normalize = 'true'))
print(classification_report(RY_test, SVC_y_pred))

[[425  15   7  10   0]
 [ 27  81 103   3 239]
 [  4  41 130  12 265]
 [  0  54  42 218 165]
 [  0  18  22   8 411]]
[[0.92997812 0.03282276 0.01531729 0.02188184 0.        ]
 [0.05960265 0.17880795 0.22737307 0.00662252 0.52759382]
 [0.00884956 0.09070796 0.28761062 0.02654867 0.58628319]
 [0.         0.11273486 0.08768267 0.45511482 0.34446764]
 [0.         0.03921569 0.04793028 0.01742919 0.89542484]]
              precision    recall  f1-score   support

           1       0.93      0.93      0.93       457
           2       0.39      0.18      0.24       453
           3       0.43      0.29      0.34       452
           4       0.87      0.46      0.60       479
           5       0.38      0.90      0.53       459

    accuracy                           0.55      2300
   macro avg       0.60      0.55      0.53      2300
weighted avg       0.60      0.55      0.53      2300



In [17]:
print(confusion_matrix(RY_test, SVC2_y_pred))
print(confusion_matrix(RY_test, SVC2_y_pred, normalize = 'true'))
print(classification_report(RY_test, SVC2_y_pred))

[[387  20  29   7  14]
 [ 12  23  63  12 343]
 [  2   3  73  17 357]
 [  0   0   7 145 327]
 [  0   0   1  26 432]]
[[0.84682713 0.04376368 0.06345733 0.01531729 0.03063457]
 [0.02649007 0.05077263 0.13907285 0.02649007 0.75717439]
 [0.00442478 0.00663717 0.16150442 0.03761062 0.78982301]
 [0.         0.         0.01461378 0.30271399 0.68267223]
 [0.         0.         0.00217865 0.05664488 0.94117647]]
              precision    recall  f1-score   support

           1       0.97      0.85      0.90       457
           2       0.50      0.05      0.09       453
           3       0.42      0.16      0.23       452
           4       0.70      0.30      0.42       479
           5       0.29      0.94      0.45       459

    accuracy                           0.46      2300
   macro avg       0.58      0.46      0.42      2300
weighted avg       0.58      0.46      0.42      2300



In [18]:
print(confusion_matrix(RY_test, Tree_y_pred))
print(confusion_matrix(RY_test, Tree_y_pred, normalize = 'true'))
print(classification_report(RY_test, Tree_y_pred))

[[342  56  26  26   7]
 [ 24 160 135  49  85]
 [ 15 123 176  65  73]
 [ 17  70  76 203 113]
 [  3  75 106  94 181]]
[[0.74835886 0.12253829 0.05689278 0.05689278 0.01531729]
 [0.05298013 0.35320088 0.29801325 0.10816777 0.18763797]
 [0.03318584 0.27212389 0.38938053 0.14380531 0.16150442]
 [0.03549061 0.14613779 0.15866388 0.42379958 0.23590814]
 [0.00653595 0.16339869 0.23093682 0.20479303 0.39433551]]
              precision    recall  f1-score   support

           1       0.85      0.75      0.80       457
           2       0.33      0.35      0.34       453
           3       0.34      0.39      0.36       452
           4       0.46      0.42      0.44       479
           5       0.39      0.39      0.39       459

    accuracy                           0.46      2300
   macro avg       0.48      0.46      0.47      2300
weighted avg       0.48      0.46      0.47      2300



In [19]:
print(confusion_matrix(RY_test, RF_y_pred))
print(confusion_matrix(RY_test, RF_y_pred, normalize = 'true'))
print(classification_report(RY_test, RF_y_pred))

[[431   5   4  15   2]
 [ 25  52  72  40 264]
 [ 17  25 111  36 263]
 [  5   9   8 287 170]
 [  0   4  40  48 367]]
[[0.94310722 0.01094092 0.00875274 0.03282276 0.00437637]
 [0.05518764 0.11479029 0.1589404  0.08830022 0.58278146]
 [0.03761062 0.05530973 0.24557522 0.07964602 0.58185841]
 [0.01043841 0.01878914 0.01670146 0.59916493 0.35490605]
 [0.         0.0087146  0.08714597 0.10457516 0.79956427]]
              precision    recall  f1-score   support

           1       0.90      0.94      0.92       457
           2       0.55      0.11      0.19       453
           3       0.47      0.25      0.32       452
           4       0.67      0.60      0.63       479
           5       0.34      0.80      0.48       459

    accuracy                           0.54      2300
   macro avg       0.59      0.54      0.51      2300
weighted avg       0.59      0.54      0.51      2300



In [20]:
print(confusion_matrix(RY_test, NB_y_pred))
print(confusion_matrix(RY_test, NB_y_pred, normalize = 'true'))
print(classification_report(RY_test, NB_y_pred))

[[373  80   2   2   0]
 [ 26  73  57  65 232]
 [  0  70  81  69 232]
 [  1 133  87 127 131]
 [  0  18  54  41 346]]
[[0.81619256 0.1750547  0.00437637 0.00437637 0.        ]
 [0.05739514 0.1611479  0.12582781 0.14348786 0.51214128]
 [0.         0.15486726 0.17920354 0.15265487 0.51327434]
 [0.00208768 0.2776618  0.18162839 0.2651357  0.27348643]
 [0.         0.03921569 0.11764706 0.08932462 0.75381264]]
              precision    recall  f1-score   support

           1       0.93      0.82      0.87       457
           2       0.20      0.16      0.18       453
           3       0.29      0.18      0.22       452
           4       0.42      0.27      0.32       479
           5       0.37      0.75      0.49       459

    accuracy                           0.43      2300
   macro avg       0.44      0.44      0.42      2300
weighted avg       0.44      0.43      0.42      2300



In [21]:
print(confusion_matrix(RY_test, DA_y_pred))
print(confusion_matrix(RY_test, DA_y_pred, normalize = 'true'))
print(classification_report(RY_test, DA_y_pred))

[[154  67  72  78  86]
 [ 38  98 111  96 110]
 [ 25  97 121 107 102]
 [ 62  90 101 136  90]
 [ 42  86  98 110 123]]
[[0.33698031 0.14660832 0.15754923 0.17067834 0.18818381]
 [0.08388521 0.21633554 0.24503311 0.21192053 0.24282561]
 [0.05530973 0.21460177 0.26769912 0.23672566 0.22566372]
 [0.12943633 0.18789144 0.21085595 0.28392484 0.18789144]
 [0.09150327 0.18736383 0.21350763 0.23965142 0.26797386]]
              precision    recall  f1-score   support

           1       0.48      0.34      0.40       457
           2       0.22      0.22      0.22       453
           3       0.24      0.27      0.25       452
           4       0.26      0.28      0.27       479
           5       0.24      0.27      0.25       459

    accuracy                           0.27      2300
   macro avg       0.29      0.27      0.28      2300
weighted avg       0.29      0.27      0.28      2300



In [26]:
#Saving Model
#pickle.dump(lr, open('1b_ML_lr.sav', 'wb'))
pickle.dump(KNN, open('1b_ML_KNN.sav', 'wb'))
pickle.dump(SVC, open('1b_ML_SVC.sav', 'wb'))
pickle.dump(SVC2, open('1b_ML_SVC2.sav', 'wb'))
pickle.dump(Tree, open('1b_ML_Tree.sav', 'wb'))
pickle.dump(RF, open('1b_ML_RF.sav', 'wb'))
pickle.dump(NB, open('1b_ML_NB.sav', 'wb'))
pickle.dump(DA, open('1b_ML_DA.sav', 'wb'))

In [27]:
#loading model
'''#lr = pickle.load(open('1b_ML_lr.sav', 'rb'))
KNN = pickle.load(open('1b_ML_KNN.sav', 'rb'))
SVC = pickle.load(open('1b_ML_SVC.sav', 'rb'))
SVC2 = pickle.load(open('1b_ML_SVC2.sav', 'rb'))
Tree = pickle.load(open('1b_ML_Tree.sav', 'rb'))
RF = pickle.load(open('1b_ML_RF.sav', 'rb'))
NB = pickle.load(open('1b_ML_NB.sav', 'rb'))
DA = pickle.load(open('1b_ML_DA.sav', 'rb'))'''