In [None]:
pip install mne matplotlib

Collecting mne
  Downloading mne-0.24.1-py3-none-any.whl (7.4 MB)
[K     |████████████████████████████████| 7.4 MB 4.9 MB/s 
Installing collected packages: mne
Successfully installed mne-0.24.1


In [None]:
!pip install pyedflib

Collecting pyedflib
  Downloading pyEDFlib-0.1.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 5.4 MB/s 
Installing collected packages: pyedflib
Successfully installed pyedflib-0.1.25


In [None]:
pip install git+https://github.com/forrestbao/pyeeg.git

Collecting git+https://github.com/forrestbao/pyeeg.git
  Cloning https://github.com/forrestbao/pyeeg.git to /tmp/pip-req-build-qyj5bpbc
  Running command git clone -q https://github.com/forrestbao/pyeeg.git /tmp/pip-req-build-qyj5bpbc
Building wheels for collected packages: pyeeg
  Building wheel for pyeeg (setup.py) ... [?25l[?25hdone
  Created wheel for pyeeg: filename=pyeeg-0.4.4-py2.py3-none-any.whl size=28131 sha256=6706bafea91f85232971661211d50a17727c2cf7b4f97508aa29bdb265f5e3a0
  Stored in directory: /tmp/pip-ephem-wheel-cache-dpm8nskt/wheels/b0/23/e4/703c908bda656959957029fa145879aa79307b2545a2ef0271
Successfully built pyeeg
Installing collected packages: pyeeg
Successfully installed pyeeg-0.4.4


In [None]:
import os
import glob
import mne
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pyeeg
from scipy.stats import kurtosis, skew
from scipy.signal import argrelextrema, welch
from scipy.integrate import cumtrapz
import statistics
import time

In [None]:
def eeg_features(data):
    data = np.asarray(data)
    res  = np.zeros([22])
    Kmax = 5
    # M    = 10
    # R    = 0.3
    Band = [1,5,10,15,20,25]
    Fs   = 256
    power, power_ratio = pyeeg.bin_power(data, Band, Fs)
    f, P = welch(data, fs=Fs, window='hanning', noverlap=0, nfft=int(256.))       # Signal power spectrum
    area_freq = cumtrapz(P, f, initial=0)
    res[0] = np.sqrt(np.sum(np.power(data, 2)) / data.shape[0])                   # amplitude RMS
    res[1] = statistics.stdev(data)**2                                            # variance
    res[2] = kurtosis(data)                                                       # kurtosis
    res[3] = skew(data)                                                           # skewness
    res[4] = max(data)                                                            # max amplitude
    res[5] = min(data)                                                            # min amplitude
    res[6] = len(argrelextrema(data, np.greater)[0])                              # number of local extrema or peaks
    res[7] = ((data[:-1] * data[1:]) < 0).sum()                                   # number of zero crossings
    res[8] = pyeeg.hfd(data, Kmax)                                                # Higuchi Fractal Dimension
    res[9] = pyeeg.pfd(data)                                                      # Petrosian Fractal Dimension
    res[10] = pyeeg.hurst(data)                                                   # Hurst exponent
    res[11] = pyeeg.spectral_entropy(data, Band, Fs, Power_Ratio=power_ratio)     # spectral entropy (1.21s)
    res[12] = area_freq[-1]                                                       # total power
    res[13] = f[np.where(area_freq >= res[12] / 2)[0][0]]                         # median frequency
    res[14] = f[np.argmax(P)]                                                     # peak frequency
    res[15], res[16] = pyeeg.hjorth(data)                                         # Hjorth mobility and complexity
    res[17] = power_ratio[0]
    res[18] = power_ratio[1]
    res[19] = power_ratio[2]
    res[20] = power_ratio[3]
    res[21] = power_ratio[4]
    # res[22] = pyeeg.samp_entropy(data, M, R)             # sample entropy
    # res[23] = pyeeg.ap_entropy(data, M, R)             # approximate entropy (1.14s)
    return (res)

In [None]:
def eeg_preprocessing(file, seizures, epoch_length = 10, step_size = 1, start_time = 0):
    start = time.time()

    # reading in data
    raw = mne.io.read_raw_edf(file)

    # apply filterbank
    raw = raw.load_data().filter(l_freq=0.25, h_freq=25)
    channels = raw.ch_names                                  # column names

    # Divide into epochs
    res = []
    while start_time <= max(raw.times) + 0.01 - epoch_length:  # max(raw.times) = 3600
        features = []
        start, stop = raw.time_as_index([start_time, start_time + epoch_length])
        temp = raw[:, start:stop][0]

        # start time as ID
        features.append(start_time)

        # features
        for i in range(23):
            features.extend(eeg_features(temp[i]).tolist())

        # seizure flag for y
        if filename in seizures:  # if file has seizure
            for seizure in seizures[filename]:
                if start_time > seizure[0] and start_time < seizure[1]:
                    features.append(1)
                elif start_time + epoch_length > seizure[0] and start_time + epoch_length < seizure[1]:
                    features.append(1)
                else:
                    features.append(0)
        else:
            features.append(0)

        res.append(features)
        start_time += step_size
        print("Section ", str(len(res)), "; start: ", start, " ; stop: ", stop)

    # formatting
    feature_names = ["rms", "variance", "kurtosis", "skewness", "max_amp", "min_amp", "n_peaks", "n_crossings",
        "hfd", "pfd", "hurst_exp", "spectral_entropy", "total_power", "median_freq", "peak_freq",
        "hjorth_mobility", "hjorth_complexity", "power_1hz", "power_5hz", "power_10hz", "power_15hz", "power_20hz"]

    column_names = ["start_time"]
    for channel in channels:
        for name in feature_names:
            column_names.append(channel + "_" + name)
    column_names.append("seizure")

    res = pd.DataFrame(res, columns=column_names)

    end = time.time()
    print("Finished preprocessing ", file, f" took {(end - start) / 60} minutes")
    return res


In [None]:
def eeg_visualize(raw, start_time, end_time):
    n = 2

    # MNE-Python's interactive data browser to get a better visualization
    raw.plot()

    # select a time frame
    start, stop = raw.time_as_index([100, 115])  # 100 s to 115 s data segment
    temp, times = raw[:, start:stop]
    fig, axs = plt.subplots(n)
    fig.suptitle('Patient EEG')
    plt.xlabel('time (s)')
    plt.ylabel('MEG data (T)')
    for i in range(n):
        axs[i].plot(times, temp[i].T)
    plt.show()

In [None]:
!wget -r -N -c -np https://physionet.org/files/chbmit/1.0.0/chb01/

--2022-02-06 16:08:06--  https://physionet.org/files/chbmit/1.0.0/chb01/
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘physionet.org/files/chbmit/1.0.0/chb01/index.html’

physionet.org/files     [ <=>                ]   6.01K  --.-KB/s    in 0s      

Last-modified header missing -- time-stamps turned off.
2022-02-06 16:08:06 (70.3 MB/s) - ‘physionet.org/files/chbmit/1.0.0/chb01/index.html’ saved [6159]

Loading robots.txt; please ignore errors.
--2022-02-06 16:08:06--  https://physionet.org/robots.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 22 [text/plain]
Saving to: ‘physionet.org/robots.txt’


2022-02-06 16:08:06 (2.01 MB/s) - ‘physionet.org/robots.txt’ saved [22/22]

--2022-02-06 16:08:06--  https://physionet.org/files/chbmit/1.0.0/chb01/chb01-su

In [None]:
!wget -r -N -c -np https://physionet.org/files/chbmit/1.0.0/chb02/

--2022-02-06 16:09:38--  https://physionet.org/files/chbmit/1.0.0/chb02/
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘physionet.org/files/chbmit/1.0.0/chb02/index.html’

physionet.org/files     [ <=>                ]   4.84K  --.-KB/s    in 0s      

Last-modified header missing -- time-stamps turned off.
2022-02-06 16:09:38 (337 MB/s) - ‘physionet.org/files/chbmit/1.0.0/chb02/index.html’ saved [4959]

Loading robots.txt; please ignore errors.
--2022-02-06 16:09:38--  https://physionet.org/robots.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK

    The file is already fully retrieved; nothing to do.

--2022-02-06 16:09:38--  https://physionet.org/files/chbmit/1.0.0/chb02/chb02-summary.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awai

In [None]:
!wget -r -N -c -np https://physionet.org/files/chbmit/1.0.0/chb05/

--2022-02-06 16:10:28--  https://physionet.org/files/chbmit/1.0.0/chb05/
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘physionet.org/files/chbmit/1.0.0/chb05/index.html’

physionet.org/files     [ <=>                ]   5.43K  --.-KB/s    in 0s      

Last-modified header missing -- time-stamps turned off.
2022-02-06 16:10:28 (499 MB/s) - ‘physionet.org/files/chbmit/1.0.0/chb05/index.html’ saved [5556]

Loading robots.txt; please ignore errors.
--2022-02-06 16:10:28--  https://physionet.org/robots.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK

    The file is already fully retrieved; nothing to do.

--2022-02-06 16:10:28--  https://physionet.org/files/chbmit/1.0.0/chb05/chb05-summary.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awai

In [None]:
!wget -r -N -c -np https://physionet.org/files/chbmit/1.0.0/chb08/

--2022-02-06 16:11:56--  https://physionet.org/files/chbmit/1.0.0/chb08/
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘physionet.org/files/chbmit/1.0.0/chb08/index.html’

physionet.org/files     [ <=>                ]   3.25K  --.-KB/s    in 0s      

Last-modified header missing -- time-stamps turned off.
2022-02-06 16:11:57 (297 MB/s) - ‘physionet.org/files/chbmit/1.0.0/chb08/index.html’ saved [3333]

Loading robots.txt; please ignore errors.
--2022-02-06 16:11:57--  https://physionet.org/robots.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK

    The file is already fully retrieved; nothing to do.

--2022-02-06 16:11:57--  https://physionet.org/files/chbmit/1.0.0/chb08/chb08-summary.txt
Reusing existing connection to physionet.org:443.
HTTP request sent, awai

In [None]:
folder = "/content/physionet.org/files/chbmit/1.0.0/chb02"
files = [file for file in os.listdir(folder) if file.endswith(".edf")]
print(files)

['chb02_19.edf', 'chb02_30.edf', 'chb02_18.edf', 'chb02_31.edf', 'chb02_16.edf', 'chb02_26.edf', 'chb02_22.edf', 'chb02_21.edf', 'chb02_27.edf', 'chb02_01.edf', 'chb02_15.edf', 'chb02_13.edf', 'chb02_04.edf', 'chb02_24.edf', 'chb02_32.edf', 'chb02_28.edf', 'chb02_17.edf', 'chb02_20.edf', 'chb02_16+.edf', 'chb02_33.edf', 'chb02_14.edf', 'chb02_35.edf', 'chb02_02.edf', 'chb02_12.edf', 'chb02_11.edf', 'chb02_25.edf', 'chb02_29.edf', 'chb02_09.edf', 'chb02_08.edf', 'chb02_03.edf', 'chb02_07.edf', 'chb02_10.edf', 'chb02_05.edf', 'chb02_06.edf', 'chb02_34.edf', 'chb02_23.edf']


In [None]:
seizures =	{
    "chb02_16": [[130, 212]],
    "chb05_06": [[417, 532]],
    "chb05_13": [[1086, 1196]],
    "chb05_16": [[2317, 2413]],
    "chb05_17": [[2451, 2571]],
    "chb05_22": [[2348, 2465]],
    "chb08_02": [[2670, 2841]],
    "chb08_05": [[2856, 3046]],
    "chb08_11": [[2988, 3211]],
    "chb08_13": [[2417, 2577]],
    "chb08_21": [[2083, 2347]]
}

In [None]:
for filename in files:
    file = os.path.join(folder, filename)
    filename = os.path.splitext(filename)[0]
    res = eeg_preprocessing(file, seizures)
    res.to_csv(os.path.join("data", filename + '.csv'), index=False)

print("done")

Extracting EDF parameters from /content/physionet.org/files/chbmit/1.0.0/chb02/chb02_19.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw = mne.io.read_raw_edf(file)


Reading 0 ... 921599  =      0.000 ...  3599.996 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.25 - 25 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.25
- Lower transition bandwidth: 0.25 Hz (-6 dB cutoff frequency: 0.12 Hz)
- Upper passband edge: 25.00 Hz
- Upper transition bandwidth: 6.25 Hz (-6 dB cutoff frequency: 28.12 Hz)
- Filter length: 3381 samples (13.207 sec)



  (p, _, _, _) = numpy.linalg.lstsq(x, L)
  R_S = R_T / S_T
  [m, c] = numpy.linalg.lstsq(A, R_S)[0]


Section  1 ; start:  0  ; stop:  2560
Section  2 ; start:  256  ; stop:  2816
Section  3 ; start:  512  ; stop:  3072
Section  4 ; start:  768  ; stop:  3328
Section  5 ; start:  1024  ; stop:  3584
Section  6 ; start:  1280  ; stop:  3840
Section  7 ; start:  1536  ; stop:  4096
Section  8 ; start:  1792  ; stop:  4352
Section  9 ; start:  2048  ; stop:  4608
Section  10 ; start:  2304  ; stop:  4864
Section  11 ; start:  2560  ; stop:  5120
Section  12 ; start:  2816  ; stop:  5376
Section  13 ; start:  3072  ; stop:  5632
Section  14 ; start:  3328  ; stop:  5888
Section  15 ; start:  3584  ; stop:  6144
Section  16 ; start:  3840  ; stop:  6400
Section  17 ; start:  4096  ; stop:  6656
Section  18 ; start:  4352  ; stop:  6912
Section  19 ; start:  4608  ; stop:  7168
Section  20 ; start:  4864  ; stop:  7424
Section  21 ; start:  5120  ; stop:  7680
Section  22 ; start:  5376  ; stop:  7936
Section  23 ; start:  5632  ; stop:  8192
Section  24 ; start:  5888  ; stop:  8448
Section