#Read and Process the data

In [2]:
%%capture
%pip install mne numpy matplotlib pandas

In [3]:
from glob import glob #helps to read the file from the folder
import os 
import mne #used to analyze EEG dataset for python
import numpy as np
import pandas
import matplotlib.pyplot as plt

from google.colab import drive # connecting to google drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
# set the path to the directory containing the EDF files
dir_path = '/content/drive/MyDrive/lab/project/HS/Practice/EEG_classification/EEG_healthy_schizophrenia'

In [5]:
# get all file paths in the directory
all_file_path = glob(os.path.join(dir_path, '*.edf'))

In [6]:
# split into healthy and schizophrenia
healthy_file_path = [i for i in all_file_path if 'h' in os.path.basename(i).split('/')[0]]
schizo_file_path = [i for i in all_file_path if 's' in os.path.basename(i).split('/')[0]]

In [None]:
# print(len(all_file_path)) # print the total number of files found
# print(len(healthy_file_path)) # print the number of healthy files
# print(len(schizo_file_path)) # print the number of schizophrenia files

In [8]:
# all_file_path[0]

'/content/drive/MyDrive/lab/project/HS/Practice/EEG_classification/EEG_healthy_schizophrenia/h13.edf'

In [28]:
def read_data(file_path):
    data=mne.io.read_raw_edf(file_path, preload=True)
    data.set_eeg_reference() #average of every channel is reference of each channels
    data.filter(l_freq = 0.1, h_freq=60)   #filtering between 0.1 Hz and 60Hz, continuous data -> have to convert it into segments
    epochs=mne.make_fixed_length_epochs(data, duration = 5, overlap = 1)
    data = epochs.get_data()
    return epochs  #returns with epochs

In [26]:
%%capture
data = read_data(healthy_file_path[0])

In [27]:
%%capture  
healthy_epochs_array = [read_data(i) for i in healthy_file_path]
schizo_epochs_array = [read_data(i) for i in schizo_file_path]

In [22]:
# healthy_epochs_array[0].shape

(241, 19, 1250)

In [23]:
# schizo_epochs_array[0].shape

(211, 19, 1250)

In [24]:
healthy_epochs_array[0].shape, schizo_epochs_array[0].shape

((241, 19, 1250), (211, 19, 1250))

In [25]:
healthy_epochs_labels = [len(i)*[0] for i in healthy_epochs_array]   #231 *  [0]  (len(i) = no. of epochs for each file. each epoch is labelled as 0 if it belongs to healthy person)
schizo_epochs_labels = [len(i)*[1] for i in schizo_epochs_array]   # epoch is labelled as 1 for schizophren person
len(healthy_epochs_labels), len(schizo_epochs_labels)

(14, 14)

In [36]:
epochs_array = healthy_epochs_array+schizo_epochs_array #Combining the files into a list 
epochs_labels = healthy_epochs_labels + schizo_epochs_labels # Combining the labels into a list

In [37]:
groups = [[i]*len(j) for i,j in enumerate(epochs_array)]  # i=0, j=first value of data_list  -> i = 1, j = data_list[1] 
# [i]*len(j) --> if there are 10 elements on the first place of data_list --> 0 x 10 --> [0000000000]
# if there are 20 elements on second place of data_list --> 1 x 20
len(groups)

28

#Deep Learning CNN

1. Creating epochs_array, epochs_labels, groups_array

In [38]:
epochs_array = np.vstack(epochs_array)
epochs_labels=np.hstack(epochs_labels)
groups_array=np.hstack(groups)

In [40]:
epochs_array.shape, epochs_labels.shape, groups_array.shape

((7201, 19, 1250), (7201,), (7201,))

In [41]:
epochs_array = np.moveaxis(epochs_array,1,2)
epochs_array.shape

(7201, 1250, 19)

2. Creating the CNN model

In [44]:
from tensorflow.keras.layers import Conv1D, BatchNormalization, LeakyReLU, MaxPool1D, GlobalAveragePooling1D, Dense, Dropout, AveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session

def cnnmodel():
      clear_session()
      model = Sequential()
      model.add(Conv1D(filters=5, kernel_size=3, strides=1, input_shape=(1250,19)))  #1
      model.add(BatchNormalization())
      model.add(LeakyReLU() )
      model.add(MaxPool1D(pool_size=2,strides=2))#2
      model.add(Conv1D(filters=5,kernel_size=3,strides=1) )#3
      model.add(LeakyReLU())
      model.add(MaxPool1D(pool_size=2,strides=2) )#4
      model.add(Dropout(0.5))
      model.add(Conv1D(filters=5,kernel_size=3,strides=1) )#5
      model.add(LeakyReLU() )
      model.add(AveragePooling1D(pool_size=2,strides=2))#6
      model.add(Dropout(0.5))
      model.add(Conv1D(filters=5,kernel_size=3,strides=1) )#7
      model.add(LeakyReLU())
      model.add(AveragePooling1D(pool_size=2,strides=2) )#8
      model.add(Conv1D(filters=5,kernel_size=3,strides=1) )#9
      model.add(LeakyReLU())
      model.add(GlobalAveragePooling1D())#10
      model.add(Dense(1,activation='sigmoid' ))#11

      model.compile('adam', loss= 'binary_crossentropy', metrics=['accuracy'])
      return model

model = cnnmodel()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 1248, 5)           290       
                                                                 
 batch_normalization (BatchN  (None, 1248, 5)          20        
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 1248, 5)           0         
                                                                 
 max_pooling1d (MaxPooling1D  (None, 624, 5)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 622, 5)            80        
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 622, 5)            0

In [52]:
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler
gkf = GroupKFold()
scaler = StandardScaler()

In [62]:
from mne.viz import epochs
accuracy = []

for train_index, val_index in gkf.split(epochs_array, epochs_labels, groups = groups_array):
      train_features, train_labels = epochs_array[train_index], epochs_labels[train_index]
      val_features, val_labels =epochs_array[val_index], epochs_labels[val_index]
      train_features = scaler.fit_transform(train_features.reshape(-1, train_features.shape[-1])).reshape(train_features.shape)
      val_features = scaler.fit_transform(val_features.reshape(-1, val_features.shape[-1])).reshape(val_features.shape)
    
      model = cnnmodel()
      model.fit(train_features, train_labels, epochs = 10, batch_size=10, validation_data=(val_features, val_labels))
      accuracy.append(model.evaluate(val_features, val_labels)[1])
      break
# for train_index, val_index in gkf.split(epochs_array, epochs_labels, groups = groups_array):
#     train_features, train_labels = epochs_array[train_index], epochs_labels[train_index]
#     val_features, val_labels =epochs_array[val_index], epochs_labels[val_index]
    
#     # check if train_features is too large to reshape
#     if train_features.size > 2**32-1:
#         train_features = scaler.fit_transform(train_features.T).T
#     else:
#         train_features = scaler.fit_transform(train_features.reshape(-1, train_features.shape[-1])).reshape(train_features.shape)
    
#     break

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [59]:
train_features.shape, val_features.shape

((5744, 1250, 19), (1457, 1250, 19))