## Import Required Libraries

In [2]:
import mne
import pandas as pd
from glob2 import glob

## Loading normal (non-epilepsy) & abnormal(epilepsy) EDF files from NMT DATASET

In [3]:
normal_edf_files = glob('Dataset/normal/train/*.edf')
abnormal_edf_files = glob('Dataset/abnormal/train/*.edf')

# Extract normal, abnormal patient IDs and combine them for All patients ID list

In [4]:
normal_patient_id = list(set([x.split('/')[-1].split('.')[0] for x in normal_edf_files]))
abnormal_patient_id = list(set([x.split('/')[-1].split('.')[0] for x in abnormal_edf_files]))
all_patient_id = abnormal_patient_id + normal_patient_id

# Sampling frequency is 200.0 hz according to NMT dataset. Window length of 10 second is chosen

In [5]:
SAMPLING_FREQ = 200.0
WINDOW_LENGTH_SECONDS = 20.0
WINDOW_LENGTH_SAMPLES = int(WINDOW_LENGTH_SECONDS * SAMPLING_FREQ)

dataset_index_rows = []


# Information from the EDF files are extracted, labaled and saved in a dictionary

In [6]:
for idx, patient in enumerate(all_patient_id):
    print(f"{patient}: {idx+1}/{len(all_patient_id)}\n\n")
    if patient in normal_patient_id:
        label = 'no_epilepsy'
        print(label)
        raw_file_path = f'Dataset/normal/{patient}.edf'
        print(raw_file_path)
        raw_data = mne.io.read_raw_edf(
            raw_file_path, verbose=False, preload=False)
    elif patient in abnormal_patient_id:
        label = 'epilespy'
        print(label)
        raw_file_path = f'Dataset/abnormal/{patient}.edf'
        print(raw_file_path)
        raw_data = mne.io.read_raw_edf(
            raw_file_path, verbose=False, preload=False)

    for start_sample_index in range(0, int(int(raw_data.times[-1]) * SAMPLING_FREQ), WINDOW_LENGTH_SAMPLES):
        end_sample_index = start_sample_index + (WINDOW_LENGTH_SAMPLES - 1)

        # ensure 10 seconds are available in window and recording does not end
        if end_sample_index > raw_data.n_times:
            break

    row = {}
    row["patient_id"] = patient
    row["raw_file_path"] = raw_file_path
    row["record_length_seconds"] = raw_data.times[-1]
    # this is the desired SFREQ using which sample indices are derived.
    # CAUTION - this is not the original SFREQ at which the data is recorded.
    row["sampling_freq"] = SAMPLING_FREQ
    row["channel_config"] = '02_tcp_le'
    row["start_sample_index"] = start_sample_index
    row["end_sample_index"] = end_sample_index
    row["text_label"] = label
    row["numeric_label"] = 0 if label == "no_epilepsy" else 1
    dataset_index_rows.append(row)


train\0001127: 1/2232


epilespy
Dataset/abnormal/train\0001127.edf
train\0002382: 2/2232


epilespy
Dataset/abnormal/train\0002382.edf
train\0001042: 3/2232


epilespy
Dataset/abnormal/train\0001042.edf
train\0000285: 4/2232


epilespy
Dataset/abnormal/train\0000285.edf
train\0000558: 5/2232


epilespy
Dataset/abnormal/train\0000558.edf
train\0002325: 6/2232


epilespy
Dataset/abnormal/train\0002325.edf
train\0001280: 7/2232


epilespy
Dataset/abnormal/train\0001280.edf
train\0001464: 8/2232


epilespy
Dataset/abnormal/train\0001464.edf
train\0000268: 9/2232


epilespy
Dataset/abnormal/train\0000268.edf
train\0001120: 10/2232


epilespy
Dataset/abnormal/train\0001120.edf
train\0000188: 11/2232


epilespy
Dataset/abnormal/train\0000188.edf
train\0000045: 12/2232


epilespy
Dataset/abnormal/train\0000045.edf
train\0000315: 13/2232


epilespy
Dataset/abnormal/train\0000315.edf
train\0001773: 14/2232


epilespy
Dataset/abnormal/train\0001773.edf
train\0000451: 15/2232


epilespy
Dataset/a

# Patient information is saved as `nmt_data.csv` under data folder

In [7]:
df = pd.DataFrame(dataset_index_rows, columns=["patient_id",
                                               "raw_file_path",
                                               "record_length_seconds",
                                               "sampling_freq",
                                               "channel_config",
                                               "start_sample_index",
                                               "end_sample_index",
                                               "text_label",
                                               "numeric_label"])
df.to_csv("data/nmt_data.csv", index=False)