In [4]:
import os
import glob
import re
import numpy as np
import numpy.fft as fft
import mne
import matplotlib.pyplot as plt
from scipy import stats, signal
from numpy import save, load
import pandas as pd
import sklearn
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from numpy import mean
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold

In [61]:
def read_patient(subject_id):
    base_path = "features/"
    X=load('features/data_chb{:02d}/features_{}_00.npy'.format(subject_id, subject_id))
    y=load('features/data_chb{:02d}/targets_{}_00.npy'.format(subject_id, subject_id))
    edf_file_names = sorted(glob.glob(os.path.join(base_path, "data_chb{:02d}/*.npy".format(subject_id))))
    files=len(edf_file_names)
    print(files)
    print(X.shape)
    start=1
    for fileno in range(start, files//2):
        X=np.concatenate((X, load('features/data_chb{:02d}/features_{}_{:02d}.npy'.format(subject_id, subject_id,   fileno))))
        y=np.concatenate((y, load('features/data_chb{:02d}/targets_{}_{:02d}.npy'.format(subject_id,subject_id,     fileno))))
    return X,y


In [33]:
grid_df=pd.DataFrame(columns=['train_acc', 'test_acc', 'interictal', 'preictal', 'tn', 'fp', 'fn', 'tp', 'spec', 'sen', 'f1', 'C', 'w0', 'w2'])
grid_df

Unnamed: 0,train_acc,test_acc,interictal,preictal,tn,fp,fn,tp,spec,sen,f1,C,w0,w2


In [126]:
patient_id=7
X, y = read_patient(patient_id)
X_shape, y_shape = X.shape, y.shape
print(X_shape, y_shape)

38
(1440, 208)
(24082, 208) (24082,)


In [127]:
df=pd.DataFrame(data=X)
df['target']=y
print(df.shape)
df=df.dropna()
print(df.shape)

(24082, 209)
(24082, 209)


In [128]:
df_interictal=df[df['target']==0]
# print(df_interictal.shape)
# df_interictal=df_interictal.sample(frac=0.7)
df_preictal=df[df['target']==2]
interictal_shape, preictal_shape = df_interictal.shape, df_preictal.shape
print(df_interictal.shape, df_preictal.shape)

(22569, 209) (535, 209)


In [129]:
X_interictal=np.array(df_interictal[df.columns[:-1]]).astype('float32')
y_interictal=np.array(df_interictal['target']).astype('float32')
X_preictal  =np.array(df_preictal[df.columns[:-1]]).astype('float32')
y_preictal  =np.array(df_preictal['target']).astype('float32')
print(X_interictal.shape)
print(y_interictal.shape)
print(X_preictal.shape)
print(y_preictal.shape)

(22569, 208)
(22569,)
(535, 208)
(535,)


In [130]:
X_interictal_train, X_interictal_test, y_interictal_train, y_interictal_test =train_test_split(X_interictal,y_interictal,test_size=0.1, random_state=42)
X_preictal_train, X_preictal_test, y_preictal_train, y_preictal_test=train_test_split(X_preictal, y_preictal,test_size=0.1, random_state=42)

In [131]:
X_train = np.concatenate((X_interictal_train, X_preictal_train))
X_test = np.concatenate((X_interictal_test, X_preictal_test))
y_train = np.concatenate((y_interictal_train, y_preictal_train))
y_test = np.concatenate((y_interictal_test, y_preictal_test))
X_train_shape =X_train.shape
X_test_shape = X_test.shape
y_train_shape =y_train.shape 
y_test_shape = y_test.shape
print(X_train_shape)
print(X_test_shape)
print(y_train_shape)
print(y_test_shape)

(20793, 208)
(2311, 208)
(20793,)
(2311,)


In [132]:
clf = SVC(C=100, class_weight={0: 1, 2: 10})
clf.fit(X_train, y_train)
trainAcc = clf.score(X_train, y_train)
testAcc = clf.score(X_test, y_test)
print("**SVM Results:**")
print("Training Accuracy: %d"%(trainAcc*100)+"%")
print("Testing Accuracy: %d"%(testAcc *100)+"%")
y_pred=clf.predict(X_test)
tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test, y_pred).ravel()
print("TN:{}, FP:{}, FN:{}, TP:{}".format(tn, fp, fn, tp))
specificity=(tn)/(tn+fp)
sensitivity=(tp)/(tp+fn)
print('specificity= {} , sensitivity= {}'.format(specificity, sensitivity))

**SVM Results:**
Training Accuracy: 96%
Testing Accuracy: 95%
TN:2182, FP:75, FN:24, TP:30
specificity= 0.9667700487372618 , sensitivity= 0.5555555555555556


In [133]:
balance = [{0:1,2:1}, {0:1,2:8}, {0:1,2:10}, {0:1,2:12}, {0:1,2:15}]
costs=[100.0, 150.0, 200.0]
param_grid = dict(C=costs, class_weight=balance)

In [134]:
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=1, random_state=1)
grid = GridSearchCV(estimator=clf, param_grid=param_grid, n_jobs=-1, cv=cv, scoring='f1_weighted')


In [135]:
grid_result = grid.fit(X_train, y_train)

In [136]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.970223 using {'C': 200.0, 'class_weight': {0: 1, 2: 8}}
0.965436 (0.000142) with: {'C': 100.0, 'class_weight': {0: 1, 2: 1}}
0.968628 (0.002234) with: {'C': 100.0, 'class_weight': {0: 1, 2: 8}}
0.966235 (0.001687) with: {'C': 100.0, 'class_weight': {0: 1, 2: 10}}
0.963678 (0.002105) with: {'C': 100.0, 'class_weight': {0: 1, 2: 12}}
0.959618 (0.002186) with: {'C': 100.0, 'class_weight': {0: 1, 2: 15}}
0.965436 (0.000142) with: {'C': 150.0, 'class_weight': {0: 1, 2: 1}}
0.969324 (0.001697) with: {'C': 150.0, 'class_weight': {0: 1, 2: 8}}
0.966522 (0.002147) with: {'C': 150.0, 'class_weight': {0: 1, 2: 10}}
0.964040 (0.001871) with: {'C': 150.0, 'class_weight': {0: 1, 2: 12}}
0.960182 (0.001884) with: {'C': 150.0, 'class_weight': {0: 1, 2: 15}}
0.965672 (0.000329) with: {'C': 200.0, 'class_weight': {0: 1, 2: 1}}
0.970223 (0.001626) with: {'C': 200.0, 'class_weight': {0: 1, 2: 8}}
0.966720 (0.002143) with: {'C': 200.0, 'class_weight': {0: 1, 2: 10}}
0.964475 (0.002031) with: {'C': 

In [137]:
grid_df.loc[patient_id]=[trainAcc, testAcc, interictal_shape[0], preictal_shape[0], tn, fp, fn, tp, specificity, sensitivity,grid_result.best_score_,  grid_result.best_params_['C'], grid_result.best_params_['class_weight'][0],grid_result.best_params_['class_weight'][2] ]
grid_df

Unnamed: 0,train_acc,test_acc,interictal,preictal,tn,fp,fn,tp,spec,sen,f1,C,w0,w2
1,0.875654,0.888383,12188.0,975.0,1073.0,146.0,1.0,97.0,0.88023,0.989796,0.901247,150.0,1.0,10.0
2,0.970446,0.955701,11814.0,368.0,1136.0,46.0,8.0,29.0,0.961083,0.783784,0.977954,100.0,1.0,1.0
3,0.976759,0.979933,11099.0,854.0,1100.0,10.0,14.0,72.0,0.990991,0.837209,0.978225,200.0,1.0,1.0
4,0.956899,0.950843,13875.0,356.0,1342.0,46.0,24.0,12.0,0.966859,0.333333,0.962679,100.0,1.0,1.0
5,0.948236,0.947529,12457.0,681.0,1246.0,0.0,69.0,0.0,1.0,0.0,0.923042,100.0,1.0,1.0
6,0.707487,0.718227,18791.0,1498.0,1354.0,526.0,46.0,104.0,0.720213,0.693333,0.897937,100.0,1.0,1.0
7,0.961958,0.957161,22569.0,535.0,2182.0,75.0,24.0,30.0,0.96677,0.555556,0.970223,200.0,1.0,8.0


In [138]:
grid_df.to_csv(r'grid_search.csv')
