Import Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter, welch
from scipy import stats
import mne
from mne.filter import filter_data
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score

Summary file parsing to extract seizure start and end times

In [2]:
def extract_seizure_events_from_txt(folder_path):
    """
    Parses all .txt summary files in a folder to extract seizure start/end times
    for each corresponding .edf file.

    """
    seizure_info = {}

    txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
    if not txt_files:
        raise FileNotFoundError("No .txt summary files found in the folder.")

    for txt_file in txt_files:
        summary_file = os.path.join(folder_path, txt_file)
        current_file = None
        current_start = None

        with open(summary_file, "r") as f:
            for line in f:
                line = line.strip()

                if line.startswith("File Name:"):
                    current_file = line.split(":", 1)[1].strip()
                    if current_file not in seizure_info:
                        seizure_info[current_file] = []

                elif line.startswith("Seizure Start Time:"):
                    current_start = int(line.split(":", 1)[1].strip().split()[0])

                elif line.startswith("Seizure End Time:") and current_start is not None:
                    end_time = int(line.split(":", 1)[1].strip().split()[0])
                    seizure_info[current_file].append((current_start, end_time))
                    current_start = None 

    return seizure_info


Data Loading

In [3]:
def load_edf_with_seizures(edf_path, seizure_times, sampling_rate=256):
    """
    Load EDF file and return data with seizure annotations.
    Keeps only first occurrence of duplicate base channel names.
    """
    raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)

    base_names = [name.split('-')[0] + '-' + name.split('-')[1] if name.count('-') >= 2 else name for name in raw.ch_names]

    seen = set()
    keep = []
    for i, name in enumerate(base_names):
        if name not in seen:
            seen.add(name)
            keep.append(raw.ch_names[i])

    raw.pick_channels(keep)

    seizure_samples = [
        (int(start * sampling_rate), int(end * sampling_rate))
        for start, end in seizure_times
    ]

    return {
        'raw': raw,
        'seizure_samples': seizure_samples
    }


In [4]:
def batch_load_edf_with_seizures(data_folder):
    """
    Loads all .edf files in a folder with their seizure annotations.

    """
    seizure_info = extract_seizure_events_from_txt(data_folder)
    loaded_data = {}

    for fname in os.listdir(data_folder):
        if fname.endswith('.edf') and fname in seizure_info:
            edf_path = os.path.join(data_folder, fname)
            seizure_times = seizure_info[fname]

            try:
                result = load_edf_with_seizures(edf_path, seizure_times)
                loaded_data[fname] = result
                print(f"Loaded {fname} with {len(result['seizure_samples'])} seizure intervals")
            except Exception as e:
                print(f"Failed to load {fname}: {e}")

    return loaded_data

Data Processing

In [5]:
def process_single_file(raw, edf_filename, output_folder, selected_channels, seizure_windows):
    """
    Preprocesses EEG and saves .npz with metadata.
    """
    os.makedirs(output_folder, exist_ok=True)

    raw.pick_channels(selected_channels)
    data = raw.get_data()
    sfreq = raw.info['sfreq']


    data = filter_data(data, sfreq=sfreq, l_freq=0.5, h_freq=25.0, verbose=False)

    base_name = os.path.splitext(edf_filename)[0]
    save_path = os.path.join(output_folder, f"{base_name}_preprocessed.npz")
    np.savez(save_path,
             data=data,
             seizure_windows=np.array(seizure_windows, dtype=np.int32),
             sampling_rate=sfreq,
             channels=np.array(selected_channels),
             file_name=edf_filename)

    return data

In [None]:
def preprocess_and_save(edf_folder):
    """
    Batch preprocesses and saves all EDF files in folder.
    Skips files missing required channels.
    """
    output_folder = os.path.join(edf_folder, 'preprocessed')
    os.makedirs(output_folder, exist_ok=True)

    selected_channels = [
        'FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1',
        'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1',
        'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2',
        'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2',
        'FZ-CZ', 'CZ-PZ'
    ]

    seizure_dict = extract_seizure_events_from_txt(edf_folder)

    for fname in os.listdir(edf_folder):
        if fname.endswith('.edf'):
            edf_path = os.path.join(edf_folder, fname)
            seizure_times = seizure_dict.get(fname, [])

            try:
                # Load with filtering of duplicate channels
                data_obj = load_edf_with_seizures(edf_path, seizure_times)
                raw = data_obj['raw']

                # Check if all selected channels are present (after cleaning)
                available_channels = set(raw.ch_names)
                if not all(chan in available_channels for chan in selected_channels):
                    print(f"Skipping {fname}: missing expected channels.")
                    continue

                # Now process and save
                process_single_file(
                    raw=raw,
                    edf_filename=fname,
                    output_folder=output_folder,
                    selected_channels=selected_channels,
                    seizure_windows=data_obj['seizure_samples']
                )

                base_name = os.path.splitext(fname)[0]
                print(f"Saved to: {os.path.join(output_folder, base_name + '_preprocessed.npz')}\n")

            except Exception as e:
                print(f"Failed to process {fname}: {e}")


In [7]:
def segment_and_label(eeg_data, seizure_windows, window_duration=2.0, sampling_rate=256, overlap=0.5):
    """
    Segments EEG into overlapping windows and labels them
    """
    window_size = int(window_duration*sampling_rate)
    step_size = int(window_size*(1-overlap))
    channels, total_samples = eeg_data.shape
    X,y = [], []
    for start in range(0, total_samples-window_size + 1, step_size):
        end = start + window_size
        window = eeg_data[:, start:end]

        label = 0
        for sz_start, sz_end in seizure_windows:
            if end > sz_start and start < sz_end:
                label = 1
                break
        X.append(window) 
        y.append(label)

    X = np.stack(X)
    y = np.stack(y)

    return X,y

In [8]:
def batch_segment_preprocessed(folder_path):
    """
    Segments all preprocessed .npz files and 2s windows and labels them
    """
    output_folder = os.path.join(folder_path, 'segmented')
    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(folder_path):
        if fname.endswith("_preprocessed.npz"):
            full_path = os.path.join(folder_path, fname)

            try:
                npz = np.load(full_path, allow_pickle=True)
                eeg = npz['data']
                sz_windows = npz["seizure_windows"]
                sfreq = int(npz['sampling_rate'])
                file_name = str(npz['file_name'])
                channels = list(npz['channels'])
                X, y = segment_and_label(eeg_data=eeg,
                         seizure_windows=sz_windows,
                         window_duration=2.0,
                         sampling_rate=sfreq,
                         overlap = 0.5)
                base_name = fname.replace('_preprocessed.npz', '')
                save_path = os.path.join(output_folder, f"{base_name}_segmented.npz")
                np.savez(save_path,
                         X=X,
                         y=y,
                         sampling_rate=sfreq,
                         channels=channels,
                         file_name=file_name)

                print(f"Segmented and saved: {save_path}")

            except Exception as e:
                print(f"Failed on {fname}: {e}")

Concatenate all NPZ files

In [9]:

def load_segmented_dataset(folder_path):
    """
    Loads and concatenates all segmented .npz files in a folder.
    """
    X_list, y_list = [], []

    for fname in os.listdir(folder_path):
        if fname.endswith('_segmented.npz'):
            npz_path = os.path.join(folder_path, fname)
            try:
                npz = np.load(npz_path)
                X = npz['X']
                y = npz['y']
                X_list.append(X)
                y_list.append(y)
            except Exception as e:
                print(f"Could not load {fname}: {e}")

    X_total = np.concatenate(X_list, axis=0)
    y_total = np.concatenate(y_list, axis=0)

    return X_total, y_total

############ Calling The Functions ############

In [10]:
extract_seizure_events_from_txt("/Users/folasewaabdulsalam/Seizure_Onset_Prediction/data")

{'chb06_01.edf': [],
 'chb06_02.edf': [],
 'chb06_03.edf': [],
 'chb06_04.edf': [],
 'chb06_05.edf': [],
 'chb06_06.edf': [],
 'chb06_07.edf': [],
 'chb06_08.edf': [],
 'chb06_09.edf': [],
 'chb06_10.edf': [],
 'chb06_12.edf': [],
 'chb06_13.edf': [],
 'chb06_14.edf': [],
 'chb06_15.edf': [],
 'chb06_16.edf': [],
 'chb06_17.edf': [],
 'chb06_18.edf': [],
 'chb06_24.edf': [],
 'chb05_01.edf': [],
 'chb05_02.edf': [],
 'chb05_03.edf': [],
 'chb05_04.edf': [],
 'chb05_05.edf': [],
 'chb05_06.edf': [(417, 532)],
 'chb05_07.edf': [],
 'chb05_08.edf': [],
 'chb05_09.edf': [],
 'chb05_10.edf': [],
 'chb05_11.edf': [],
 'chb05_12.edf': [],
 'chb05_13.edf': [(1086, 1196)],
 'chb05_14.edf': [],
 'chb05_15.edf': [],
 'chb05_16.edf': [(2317, 2413)],
 'chb05_17.edf': [(2451, 2571)],
 'chb05_18.edf': [],
 'chb05_19.edf': [],
 'chb05_20.edf': [],
 'chb05_21.edf': [],
 'chb05_22.edf': [(2348, 2465)],
 'chb05_23.edf': [],
 'chb05_24.edf': [],
 'chb05_25.edf': [],
 'chb05_26.edf': [],
 'chb05_27.edf': [

In [11]:
batch_load_edf_with_seizures("/Users/folasewaabdulsalam/Seizure_Onset_Prediction/data")

NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Loaded chb01_43.edf with 0 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb02_05.edf with 0 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb02_14.edf with 0 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb05_22.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb04_05.edf with 1 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_21.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb06_01.edf with 0 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_26.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb04_28.edf with 0 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_18.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb06_09.edf with 0 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_15.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_03.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_16.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb06_18.edf with 0 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb06_24.edf with 0 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb01_04.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb04_08.edf with 1 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb05_13.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb05_06.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb02_19.edf with 1 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Loaded chb02_16+.edf with 1 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb02_35.edf with 0 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb05_16.edf with 1 seizure intervals
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
Loaded chb05_17.edf with 1 seizure intervals


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


{'chb01_43.edf': {'raw': <RawEDF | chb01_43.edf, 22 x 921600 (3600.0 s), ~154.7 MiB, data loaded>,
  'seizure_samples': []},
 'chb02_05.edf': {'raw': <RawEDF | chb02_05.edf, 22 x 921600 (3600.0 s), ~154.7 MiB, data loaded>,
  'seizure_samples': []},
 'chb02_14.edf': {'raw': <RawEDF | chb02_14.edf, 22 x 921600 (3600.0 s), ~154.7 MiB, data loaded>,
  'seizure_samples': []},
 'chb05_22.edf': {'raw': <RawEDF | chb05_22.edf, 22 x 921600 (3600.0 s), ~154.7 MiB, data loaded>,
  'seizure_samples': [(601088, 631040)]},
 'chb04_05.edf': {'raw': <RawEDF | chb04_05.edf, 22 x 2441216 (9536.0 s), ~409.8 MiB, data loaded>,
  'seizure_samples': [(1997824, 2010368)]},
 'chb01_21.edf': {'raw': <RawEDF | chb01_21.edf, 22 x 921600 (3600.0 s), ~154.7 MiB, data loaded>,
  'seizure_samples': [(83712, 107520)]},
 'chb06_01.edf': {'raw': <RawEDF | chb06_01.edf, 22 x 3693312 (14427.0 s), ~619.9 MiB, data loaded>,
  'seizure_samples': []},
 'chb01_26.edf': {'raw': <RawEDF | chb01_26.edf, 22 x 595200 (2325.0 s), 

In [21]:
preprocess_and_save("/Users/folasewaabdulsalam/Seizure_Onset_Prediction/data")

Failed to process chb01_43.edf: New channel names are not unique, renaming failed
Failed to process chb02_05.edf: New channel names are not unique, renaming failed
Failed to process chb02_14.edf: New channel names are not unique, renaming failed
Failed to process chb05_22.edf: New channel names are not unique, renaming failed
Failed to process chb04_05.edf: New channel names are not unique, renaming failed
Failed to process chb01_21.edf: New channel names are not unique, renaming failed
Failed to process chb06_01.edf: New channel names are not unique, renaming failed
Failed to process chb03_36.edf: New channel names are not unique, renaming failed
Failed to process chb01_26.edf: New channel names are not unique, renaming failed
Failed to process chb03_35.edf: New channel names are not unique, renaming failed
Failed to process chb04_28.edf: New channel names are not unique, renaming failed
Failed to process chb01_18.edf: New channel names are not unique, renaming failed
Failed to proces

  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=Fa

In [12]:
edf_path = '/Users/folasewaabdulsalam/Seizure_Onset_Prediction/data/chb05_17.edf'  # replace with your actual file
raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)

# Show all channel names
print("Available channels:")
print(raw.ch_names)

Available channels:
['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8-0', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'T8-P8-1']


  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)


In [24]:
from pyedflib import EdfReader

edf_path = "/Users/folasewaabdulsalam/Seizure_Onset_Prediction/data/chb05_17.edf"
edf = EdfReader(edf_path)
channel_labels = [edf.getLabel(i) for i in range(edf.signals_in_file)]
print(channel_labels)


['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'T8-P8']
