In [30]:
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
from sklearn.model_selection import train_test_split
from keras.models import Sequential, Model, Input, load_model
from keras.layers import Dense, Dropout
from keras.layers import Embedding, Activation, Flatten
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D, BatchNormalization
from keras import utils
from keras import optimizers
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
import time
from IPython.display import Image
from IPython.core.display import HTML
from keras.callbacks import ModelCheckpoint

In [19]:
def normalize(X):
    return ((X - np.mean(X) ) / np.std(X) )

def folder_to_df(letter):
    full_path ="datasets/"+ letter + "/*.*"  #datasets of A,B,C,D,E waas stored as folders in "datasets" folder
    files = glob.glob(full_path)  #files=files=glob.glob(full_path)
    small_df = []
    for file in files:
        small_df.append(pd.read_csv(file,header=None))
    big_df = pd.concat(small_df, axis= 1)
    return big_df.T

# def folder_to_df(letter): #import the .txt files
#     full_path ="data/bonn_uni_datasets/"+ letter + "/*.*"
#     files = files = glob.glob(full_path)
#     df_list = []
#     for file in files:
#         df_list.append(pd.read_csv(file, header = None))
#     big_df = pd.concat(df_list, ignore_index=True, axis= 1)
#     return big_df.T

def load_as_df():
    A = folder_to_df('A')
    B = folder_to_df('B')
    C = folder_to_df('C')
    D = folder_to_df('D')
    E = folder_to_df('E')
    
    normal = A.append(B).reset_index(drop = True)
    interictal = C.append(D).reset_index(drop = True)
    ictal = E

    return normal, interictal, ictal

def window(a, w = 512, o = 64, copy = False): #window sliding function
    #default for training, for testing data we will split each signal in four of 1024 and apply
    #a window size of 512 with a stride (o) of 256
    sh = (a.size - w + 1, w)
    st = a.strides * 2
    view = np.lib.stride_tricks.as_strided(a, strides = st, shape = sh)[0::o]
    if copy:
        return view.copy()
    else:
        return view

def enrich_train(df): #enrich data by splicing the 4097-long signals 
    #into 512 long ones with a stride of 64
    labels = df.iloc[:,-1]
    data = df.iloc[:, :-1]
    res = list()
    for i in range(len(data)):
        res += [window(data.iloc[i].values)]
    return res

def reshape_x(arr): #shape the input data into the correct form (x1,x2,1)
    nrows = arr.shape[0]
    ncols = arr.shape[1]
    return arr.reshape(nrows, ncols, 1)



#DATASET DESCRIPTION:
This database comprises of 100 single channels EEG of 23.6 seconds with sampling rate of 173.61 Hz. Its spectral bandwidth range is between 0.5 Hz and 85 Hz. It was taken from a 128 channel acquisition system. Five patients EEG sets were cut out from a multi-channel EEG recording and named A, B, C, D and E. Set A and B are the surface EEG recorded during eyes closed and open situation of healthy patients respectively. Set C and D are the intracranial EEG recorded during a seizure free from within seizure generating area and from outside seizure generating area of epileptic patients respectively. Set E is the intracranial EEG of an epileptic patient during epileptic seizures. Each set contains 100 text files wherein each text file has 4097 samples of 1 EEG time series in ASCII code. A band pass filter with cut off frequency as 0.53 Hz and 40 Hz has been applied on the data. It is an artifact free data and hence no prior pre-processing is required for the classification of healthy (non-epileptic) and un-healthy (epileptic) signals. The strong eye movement’s artefacts were omitted. It was made available in 2001. The extended version of this data is now a part of EPILEPSIA project.
Palak Handa,Monika Mathur,Nidhi Goel "Open and free EEG datasets for epilepsy diagnosis"

In [20]:
glob.glob("datasets/A/*.*")

['datasets/A\\Z001.txt',
 'datasets/A\\Z002.txt',
 'datasets/A\\Z003.txt',
 'datasets/A\\Z004.txt',
 'datasets/A\\Z005.txt',
 'datasets/A\\Z006.txt',
 'datasets/A\\Z007.txt',
 'datasets/A\\Z008.txt',
 'datasets/A\\Z009.txt',
 'datasets/A\\Z010.txt',
 'datasets/A\\Z011.txt',
 'datasets/A\\Z012.txt',
 'datasets/A\\Z013.txt',
 'datasets/A\\Z014.txt',
 'datasets/A\\Z015.txt',
 'datasets/A\\Z016.txt',
 'datasets/A\\Z017.txt',
 'datasets/A\\Z018.txt',
 'datasets/A\\Z019.txt',
 'datasets/A\\Z020.txt',
 'datasets/A\\Z021.txt',
 'datasets/A\\Z022.txt',
 'datasets/A\\Z023.txt',
 'datasets/A\\Z024.txt',
 'datasets/A\\Z025.txt',
 'datasets/A\\Z026.txt',
 'datasets/A\\Z027.txt',
 'datasets/A\\Z028.txt',
 'datasets/A\\Z029.txt',
 'datasets/A\\Z030.txt',
 'datasets/A\\Z031.txt',
 'datasets/A\\Z032.txt',
 'datasets/A\\Z033.txt',
 'datasets/A\\Z034.txt',
 'datasets/A\\Z035.txt',
 'datasets/A\\Z036.txt',
 'datasets/A\\Z037.txt',
 'datasets/A\\Z038.txt',
 'datasets/A\\Z039.txt',
 'datasets/A\\Z040.txt',


In [21]:
def format_enrich_train(normal, interictal, ictal):
    
    #enrich data and reshape it to have a two dimensional array instead of three
    normal_train_enr = np.asarray(enrich_train(normal)).reshape(-1, np.asarray(enrich_train(normal)).shape[-1])
    interictal_train_enr = np.asarray(enrich_train(interictal)).reshape(-1, np.asarray(enrich_train(interictal)).shape[-1])
    ictal_train_enr = np.asarray(enrich_train(ictal)).reshape(-1, np.asarray(enrich_train(ictal)).shape[-1])

    #change into a dataframe to add labels easily
    normal_train_enr_df = pd.DataFrame(normal_train_enr)
    interictal_train_enr_df = pd.DataFrame(interictal_train_enr)
    ictal_train_enr_df = pd.DataFrame(ictal_train_enr)
    
    normal_train_enr_df['labels'] = 0 # normal
    interictal_train_enr_df['labels'] = 1 #interictal
    ictal_train_enr_df['labels'] = 2 #ictal

    #concat all
    data_labels = pd.concat([normal_train_enr_df, interictal_train_enr_df, ictal_train_enr_df], ignore_index = True)
    

    #separates data and labels into numpy arrays for keras
    data = data_labels.drop('labels', axis = 1).values
    labels = data_labels.labels.values
    
    #labels = np.expand_dims(labels, axis=1)
    
    return data, labels

In [22]:
normal, interictal, ictal = load_as_df()

In [None]:
X, y = format_enrich_train(normal, interictal, ictal)

In [44]:

model = Sequential()
#Conv - 1
model.add(Conv1D(24, 5,strides =  3, input_shape=(512,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))

#Conv - 2
model.add(Conv1D(16, 3,strides =  2))
model.add(BatchNormalization())
model.add(Activation('relu'))

#Conv - 3
model.add(Conv1D(8, 3,strides =  2))
model.add(BatchNormalization())
model.add(Activation('relu'))

#FC -1
model.add(Flatten())
model.add(Dense(20))
model.add(Activation('relu'))
#Dropout
model.add(Dropout(0.5))
#FC -2
model.add(Dense(3,activation = 'softmax'))
#softmax
#model.add(Activation('softmax'))

adam = tensorflow.keras.optimizers.Adam(lr=0.00002, beta_1=0.9, beta_2=0.999, epsilon=0.00000001, decay=0.0, amsgrad=False)

model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])

model.fit(X,y,epochs=5)

Epoch 1/5


ValueError: in user code:

    File "C:\Users\IK\anaconda3\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\engine\training.py", line 860, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\engine\training.py", line 918, in compute_loss
        return self.compiled_loss(
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\losses.py", line 1789, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "C:\Users\IK\anaconda3\lib\site-packages\keras\backend.py", line 5083, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 3) are incompatible


In [34]:
def train_evaluate_model(model, xtrain, ytrain, xval, yval, fold):
    model_name = 'P-1D-CNN'
    checkpointer = ModelCheckpoint(filepath='checkpoints/'+'fold'+ str(fold)+'.'+model_name + '.{epoch:03d}-{acc:.3f}.h5',verbose=0,monitor ='acc', save_best_only=True)
    history = model.fit(xtrain, ytrain, batch_size=32, callbacks = [checkpointer],epochs=200, verbose = 1)
    print(history)
    score = model.evaluate(xval, yval, batch_size=32)
    print('\n')
    print(score)
    return score, history

In [41]:
n_folds = 10
X, y = format_enrich_train(normal, interictal, ictal)
#initialize 10 fold validation
skf = StratifiedKFold(n_splits=10, shuffle=True)


#10 fold cross validation loop
# for i, (train, test) in enumerate(skf.split(X,y)):
#     print("Running Fold", i+1, "/", n_folds)
start_time = time.time()
X = reshape_x(X)
xtrain, xval = X[train], X[test]
ytrain, yval = y[train], y[test]
ytrain = tensorflow.keras.utils.to_categorical(ytrain, num_classes=3, dtype='float32')
yval = tensorflow.keras.utils.to_categorical(yval, num_classes=3, dtype='float32')


model = None # Clearing the NN.
model = create_model()
# score, history = train_evaluate_model(model, xtrain, ytrain, xval, yval, i+1)
# print("Ran ", i+1, "/", n_folds, "Fold in %s seconds ---" % (time.time() - start_time))

In [39]:
best_model = load_model('best_model.0.966.h5')

OSError: No file or directory found at best_model.0.966.h5

In [None]:
def split_vote(df):
    res = list()
    for i in range(len(df)):
        res += [window(df.iloc[i].values,w= 512, o = 256)]
    return np.asarray(res)

def count_votes(my_list): 
    freq = {} 
    for i in my_list: 
        if (i in freq): 
            freq[i] += 1
        else: 
            freq[i] = 1
    return freq

def reshape_signal(signal):
    signal = np.expand_dims(signal, axis=1)
    signal = np.expand_dims(signal, axis=0)
    return np.asarray(signal)

def evaluate_subsignals(subsignals,model):
    vote_list = np.array([])
    for i in range(len(subsignals)):
        mini_signal = reshape_signal(subsignals[i])
        ynew = model.predict_classes(mini_signal)
        vote_list = np.append(vote_list, ynew)
    decision = count_votes(vote_list)
    return decision_to_str(decision), vote_list

def decision_to_str(dec):
    res = list()
    for key,val in dec.items():
        if key == 0:
            res += ['normal: ' + str(val) + ' votes' + '\n']
        if key == 1:
            res += ['ictal: ' + str(val) + ' votes' + '\n']
        if key == 2:
            res += ['interictal: ' + str(val) + ' votes' + '\n']
    return res

In [None]:
big_signal = split_vote(ictal_vote)
subsignals = big_signal[0]

In [None]:
decision, vote_list = evaluate_subsignals(subsignals,best_model)
print(vote_list)
print(decision[0])