In [24]:
from glob import glob
import os
import mne 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [25]:
all_file_path = glob('dataverse_files/*.edf')
print(len(all_file_path))

28


In [26]:
healthy_file_path = [i for i in all_file_path if 'h' in i.split('\\')[1]]
patient_file_path = [i for i in all_file_path if 's' in i.split('\\')[1]]
print(len(healthy_file_path), len(patient_file_path))

14 14


In [27]:
def read_data(file_path):
    data = mne.io.read_raw_edf(file_path, preload=True)
    data.set_eeg_reference()
    data.filter(l_freq=0.5, h_freq=45)
    epochs = mne.make_fixed_length_epochs(data, duration=5, overlap=1)
    array = epochs.get_data()
    return array

In [28]:
sample_data = read_data(healthy_file_path[0])

Extracting EDF parameters from dataverse_files\h01.edf...
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 231249  =      0.000 ...   924.996 secs...
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1651 samples (6.604 s)

Not setting metadata
231 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 231 

In [29]:
sample_data.shape  # (n_epochs, n_channels, length of signal)

(231, 19, 1250)

In [30]:
%%capture
healthy_epoch_array = [read_data(i) for i in healthy_file_path]
patient_epoch_array = [read_data(i) for i in patient_file_path]

In [31]:
healthy_epoch_array[0].shape
patient_epoch_array[0].shape

(211, 19, 1250)

In [32]:
healthy_epoch_labels = [len(i) *[0] for i in healthy_epoch_array]
patient_epoch_labels = [len(i) *[1] for i in patient_epoch_array]    

In [33]:
data_list = healthy_epoch_array + patient_epoch_array
labels_list = healthy_epoch_labels + patient_epoch_labels   

In [34]:
group_list = [[i]*len(j) for i,j in enumerate(data_list)]
len(group_list)

28

In [35]:
data_array = np.vstack(data_list)
labels_array = np.hstack(labels_list)
group_array = np.hstack(group_list)

print(data_array.shape, labels_array.shape, group_array.shape )

(7201, 19, 1250) (7201,) (7201,)


# Feature Extraction

In [39]:
from scipy import stats
def mean(x):
    return np.mean(x, axis=-1)
def std(x):
    return np.std(x, axis=-1)
def ptp(x):
    return np.ptp(x, axis=-1)
def var(x):
    return np.var(x, axis=-1)
def minim(x):
    return np.min(x, axis=-1)
def maxim(x):
    return np.max(x, axis=-1)
def argminim(x):
    return np.argmin(x, axis=-1)
def argmaxim(x):
    return np.argmax(x, axis=-1)
def rms(x):
    return np.sqrt(np.mean(x**2, axis=-1))
def abs_diff_signal(x):
    return np.sum(np.abs(np.diff(x)), axis=-1)
def skewness(x):
    return stats.skew(x, axis=-1)
def kurtosis(x):
    return stats.kurtosis(x, axis=-1)
def concatenate_features(x):
    return np.concatenate([mean(x), std(x), ptp(x), var(x), minim(x), maxim(x), argminim(x), argmaxim(x), rms(x), abs_diff_signal(x), skewness(x), kurtosis(x)], axis=-1)