In [9]:
from glob import glob
import os
import mne 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [10]:
all_file_path = glob('dataverse_files/*.edf')
print(len(all_file_path))

28


In [11]:
healthy_file_path = [i for i in all_file_path if 'h' in i.split('\\')[1]]
patient_file_path = [i for i in all_file_path if 's' in i.split('\\')[1]]
print(len(healthy_file_path), len(patient_file_path))

14 14


In [12]:
def read_data(file_path):
    data = mne.io.read_raw_edf(file_path, preload=True)
    data.set_eeg_reference()
    data.filter(l_freq=0.5, h_freq=45)
    epochs = mne.make_fixed_length_epochs(data, duration=5, overlap=1)
    array = epochs.get_data()
    return array

In [13]:
sample_data = read_data(healthy_file_path[0])

Extracting EDF parameters from dataverse_files\h01.edf...
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 231249  =      0.000 ...   924.996 secs...
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1651 samples (6.604 s)

Not setting metadata
231 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 231 

In [14]:
sample_data.shape  # (n_epochs, n_channels, length of signal)

(231, 19, 1250)

In [15]:
%%capture
healthy_epoch_array = [read_data(i) for i in healthy_file_path]
patient_epoch_array = [read_data(i) for i in patient_file_path]

In [16]:
healthy_epoch_array[0].shape
patient_epoch_array[0].shape

(211, 19, 1250)

In [17]:
healthy_epoch_labels = [len(i) *[0] for i in healthy_epoch_array]
patient_epoch_labels = [len(i) *[1] for i in patient_epoch_array]    

In [18]:
data_list = healthy_epoch_array + patient_epoch_array
labels_list = healthy_epoch_labels + patient_epoch_labels   

In [19]:
group_list = [[i]*len(j) for i,j in enumerate(data_list)]
len(group_list)

28

In [20]:
data_array = np.vstack(data_list)
labels_array = np.hstack(labels_list)
group_array = np.hstack(group_list)

print(data_array.shape, labels_array.shape, group_array.shape )

(7201, 19, 1250) (7201,) (7201,)


In [21]:
data_array = np.moveaxis(data_array,1,2) 

In [22]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, LeakyReLU, MaxPool1D, Dense, Dropout, AveragePooling1D, GlobalAveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session

def cnnmodel():
    clear_session()
    
    model = Sequential()
    
    # Input layer
    model.add(Input(shape=(1250, 19)))
    
    # Block 1
    model.add(Conv1D(filters=5, kernel_size=3, strides=1, padding='same'))  # Changed to 'same' padding
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(MaxPool1D(pool_size=2, strides=2))  # Changed strides from 42 to 2
    
    # Block 2
    model.add(Conv1D(filters=5, kernel_size=3, strides=1, padding='same'))
    model.add(LeakyReLU())
    model.add(MaxPool1D(pool_size=2, strides=2))  # Changed strides from 42 to 2
    model.add(Dropout(0.5))
    
    # Block 3
    model.add(Conv1D(filters=5, kernel_size=3, strides=1, padding='same'))
    model.add(LeakyReLU())
    model.add(AveragePooling1D(pool_size=2, strides=2))  # Changed strides from 42 to 2
    model.add(Dropout(0.5))
    
    # Block 4
    model.add(Conv1D(filters=5, kernel_size=3, strides=1, padding='same'))
    model.add(LeakyReLU())
    model.add(AveragePooling1D(pool_size=2, strides=2))  # Changed strides from 42 to 2
    
    # Block 5
    model.add(Conv1D(filters=5, kernel_size=3, strides=1, padding='same'))
    model.add(LeakyReLU())
    model.add(GlobalAveragePooling1D())  # Reduces the time dimension to 1 safely
    
    # Output layer
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Create and display the model summary
model = cnnmodel()
model.summary()

In [23]:
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler
gkf = GroupKFold()

In [24]:
accuracy = []
for train_index, val_index in gkf.split(data_array, labels_array, groups=group_array):
    X_train, X_val = data_array[train_index], data_array[val_index]
    y_train, y_val = labels_array[train_index], labels_array[val_index]
    
    scaler = StandardScaler()
    n_samples, n_timesteps, n_channels = X_train.shape
    X_train_reshaped = X_train.reshape(-1, n_channels)
    X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(n_samples, n_timesteps, n_channels)
    
    n_val_samples = X_val.shape[0]
    X_val_reshaped = X_val.reshape(-1, n_channels)
    X_val_scaled = scaler.transform(X_val_reshaped).reshape(n_val_samples, n_timesteps, n_channels)
    
    model = cnnmodel()
    model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
    
    loss, acc = model.evaluate(X_val_scaled, y_val, verbose=0)
    accuracy.append(acc)
    print(f'Fold accuracy: {acc}')

Fold accuracy: 0.7680164575576782
Fold accuracy: 0.7007447481155396
Fold accuracy: 0.6738794445991516
Fold accuracy: 0.5931405425071716
Fold accuracy: 0.8290713429450989
