In [1]:
import numpy as np
import pandas as pd

from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('eeg_data.csv')
data.head()

Unnamed: 0,FP1_power_delta,FP1_power_theta,FP1_power_alpha,FP1_power_beta,FP1_power_gamma,FP2_power_delta,FP2_power_theta,FP2_power_alpha,FP2_power_beta,FP2_power_gamma,...,T5_STD_D4,T5_STD_D5,T5_STD_D6,T6_STD_D1,T6_STD_D2,T6_STD_D3,T6_STD_D4,T6_STD_D5,T6_STD_D6,epileptic_
0,15.980332,8.633358,3.092433,0.846559,0.372832,17.581626,9.937238,3.485713,0.945396,0.428797,...,31.427918,29.699413,32.013546,3.583559,12.529528,45.831132,61.890813,34.783454,33.245906,1
1,223.267803,41.573241,13.043525,6.463381,6.168712,298.274432,50.013025,14.411482,6.65923,4.94225,...,24.82272,47.185918,93.619195,14.158489,15.217886,30.927407,77.324322,153.442359,213.78095,1
2,158.673907,31.788608,9.956198,3.736977,2.926979,232.762741,39.332981,10.956081,3.478036,2.153082,...,21.708222,47.412375,85.729645,9.493639,11.824569,24.555772,59.229187,112.406306,171.618208,1
3,29.767326,3.875543,1.553127,0.438266,0.140495,45.206547,5.173542,2.028637,0.528962,0.161283,...,26.319506,39.628894,51.473202,1.872515,5.392639,17.29678,35.824586,44.892899,79.61346,1
4,33.821075,3.63521,1.34728,0.313205,0.080396,55.670043,4.925017,1.783729,0.394463,0.102254,...,26.111048,39.177537,52.925676,1.408291,4.556997,12.733549,25.676801,44.406617,94.770581,1


In [4]:
x = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Feature Selection

In [5]:
clf = RandomForestClassifier(n_estimators= 100 ,random_state=42)
features_to_select = 300

rfe = RFE(estimator=clf, n_features_to_select=features_to_select)
rfe.fit(x, y)

In [6]:
selected_features = np.array(x.columns)[rfe.support_]
selected_features

array(['FP2_power_delta', 'FP2_power_gamma', 'F3_power_theta',
       'F4_power_delta', 'F4_power_beta', 'F4_power_gamma',
       'C3_power_delta', 'C3_power_theta', 'C3_power_alpha',
       'C3_power_beta', 'C4_power_delta', 'C4_power_theta',
       'P3_power_theta', 'P3_power_alpha', 'P3_power_beta',
       'P4_power_delta', 'P4_power_alpha', 'O1_power_theta',
       'O1_power_alpha', 'O2_power_theta', 'O2_power_alpha',
       'F7_power_delta', 'F7_power_gamma', 'F8_power_delta',
       'F8_power_gamma', 'T3_power_delta', 'T4_power_delta',
       'T5_power_delta', 'T5_power_theta', 'T5_power_alpha',
       'T6_power_delta', 'T6_power_theta', 'A1_power_delta',
       'A1_power_theta', 'A1_power_alpha', 'A1_power_beta',
       'A1_power_gamma', 'A2_power_delta', 'A2_power_beta',
       'A2_power_gamma', 'FZ_power_delta', 'FZ_power_alpha',
       'CZ_power_delta', 'CZ_power_theta', 'PZ_power_delta',
       'PZ_power_theta', 'PZ_power_alpha', 'T1_power_delta',
       'T1_power_theta', 'T

In [23]:
X_train_rfe = rfe.transform(X_train)
X_test_rfe = rfe.transform(X_test)

Naive Bayes

In [24]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

nb = GaussianNB()
nb.fit(X_train_rfe,y_train)

In [25]:
nb_score=nb.score(X_test_rfe,y_test)
y_predict=nb.predict(X_test_rfe)
y_true=y_test

In [26]:
print('Accuracy of NB: '+ str(nb_score))
precision,recall,fscore,none= precision_recall_fscore_support(y_true, y_predict, average='weighted')
print('Precision of NB: '+(str(precision)))
print('Recall of NB: '+(str(recall)))
print('F1-score of NB: '+(str(fscore)))


Accuracy of NB: 0.5630630630630631
Precision of NB: 0.7615777369875731
Recall of NB: 0.5630630630630631
F1-score of NB: 0.44138971507392566


Logistic Regression

In [27]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state = 22)

lr.fit(X_train_rfe,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [29]:
lr_score=lr.score(X_test_rfe,y_test)
y_predict=lr.predict(X_test_rfe)
y_true=y_test

print('Accuracy of LR: '+ str(lr_score))
precision,recall,fscore,none= precision_recall_fscore_support(y_true, y_predict, average='weighted')
print('Precision of LR: '+(str(precision)))
print('Recall of LR: '+(str(recall)))
print('F1-score of LR: '+(str(fscore)))

Accuracy of LR: 0.5923423423423423
Precision of LR: 0.66551726498535
Recall of LR: 0.5923423423423423
F1-score of LR: 0.5562025499963977


KNN

In [34]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train_rfe, y_train)

In [35]:
knn_score=knn.score(X_test_rfe,y_test)
y_predict=knn.predict(X_test_rfe)
y_true=y_test

print('Accuracy of KNN: '+ str(knn_score))
precision,recall,fscore,none= precision_recall_fscore_support(y_true, y_predict, average='weighted')
print('Precision of KNN: '+(str(precision)))
print('Recall of KNN: '+(str(recall)))
print('F1-score of KNN: '+(str(fscore)))

Accuracy of KNN: 0.8085585585585585
Precision of KNN: 0.8100338061275562
Recall of KNN: 0.8085585585585585
F1-score of KNN: 0.8077955548856779


Random Forest

In [37]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state = 0)
rf.fit(X_train_rfe, y_train)

In [38]:
rf_score=rf.score(X_test_rfe,y_test)
y_predict=rf.predict(X_test_rfe)
y_true=y_test

print('Accuracy of RF: '+ str(rf_score))
precision,recall,fscore,none= precision_recall_fscore_support(y_true, y_predict, average='weighted')
print('Precision of RF: '+(str(precision)))
print('Recall of RF: '+(str(recall)))
print('F1-score of RF: '+(str(fscore)))

Accuracy of RF: 0.8445945945945946
Precision of RF: 0.8453339703339704
Recall of RF: 0.8445945945945946
F1-score of RF: 0.8442336681562657


SVM

In [39]:
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=1, random_state=42)
svm.fit(X_train_rfe, y_train)

In [40]:
svm_score=svm.score(X_test_rfe,y_test)
y_predict=svm.predict(X_test_rfe)
y_true=y_test

print('Accuracy of SVM: '+ str(rf_score))
precision,recall,fscore,none= precision_recall_fscore_support(y_true, y_predict, average='weighted')
print('Precision of SVM: '+(str(precision)))
print('Recall of SVM: '+(str(recall)))
print('F1-score of SVM: '+(str(fscore)))

Accuracy of SVM: 0.8445945945945946
Precision of SVM: 0.7563114838976909
Recall of SVM: 0.545045045045045
F1-score of SVM: 0.4049674606560834


k-means

In [42]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2)
y_pred=kmeans.fit(X_test_rfe)



In [45]:
from sklearn.metrics import silhouette_score

centroids = kmeans.cluster_centers_
labels = kmeans.labels_

silhouette_avg = silhouette_score(X_test_rfe, labels)
silhouette_avg

0.9838027655589254