In [87]:
import numpy as np
import os
import h5py
import pyedflib
from tqdm import tqdm
import pandas as pd
from scipy.signal import butter, filtfilt, iirnotch, sosfilt, sosfiltfilt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score,  precision_recall_fscore_support
from imblearn.over_sampling import SMOTE
import time

First extract the seizure files from the summary.txt

In [33]:
edf_path = "/Users/folasewaabdulsalam/Seizure_Onset/edf_files"
summary_path= "/Users/folasewaabdulsalam/Seizure_Onset/summary_files"
output_path = "/Users/folasewaabdulsalam/Seizure_Onset/seizure_files"
os.makedirs(output_path , exist_ok=True)

def extract_seizure_segments(edf_path, summary_path, output_path):
    # Process each summary file
    for summary_file in os.listdir(summary_path):
        if summary_file.endswith(".txt"):
            participant_id = summary_file.replace("-summary.txt", "")
            
            # Read the summary file
            with open(os.path.join(summary_path, summary_file), "r") as f:
                current_file = None
                seizure_times = {}
                
                for line in f:
                    # Detect file names
                    if line.startswith("File Name:"):
                        current_file = line.split(":")[1].strip().replace(".edf", "")
                        seizure_times[current_file] = []

                    # Detect seizure start and end times
                    if "Seizure Start Time:" in line:
                        start_time = int(line.split(":")[1].strip().replace(" seconds", ""))
                        seizure_times[current_file].append(("start", start_time))

                    if "Seizure End Time:" in line:
                        end_time = int(line.split(":")[1].strip().replace(" seconds", ""))
                        seizure_times[current_file].append(("end", end_time))

                # Extract and save seizure segments
                for file_name, events in seizure_times.items():
                    edf_file = f"{file_name}.edf"
                    edf_file_path = os.path.join(edf_path, edf_file)

                    if os.path.exists(edf_file_path):
                        with pyedflib.EdfReader(edf_file_path) as f:
                            signals = np.array([f.readSignal(i) for i in range(f.signals_in_file)])
                            fs = f.getSampleFrequency(0)

                            # Extract seizure segments
                            starts = [t[1] for t in events if t[0] == "start"]
                            ends = [t[1] for t in events if t[0] == "end"]

                            for start, end in zip(starts, ends):
                                start_sample = int(start * fs)
                                end_sample = int(end * fs)
                                seizure_segment = signals[:, start_sample:end_sample]

                                # Save the seizure segment
                                output_file = os.path.join(output_path, f"{file_name}_{start}_{end}_seizure.npy")
                                np.save(output_file, seizure_segment)
                                print(f"Saved {output_file}")

print("Seizure extraction complete!")


# Run the extraction
extract_seizure_segments(edf_path, summary_path, output_path)


Seizure extraction complete!
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_01_362_414_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_02_731_796_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_03_432_501_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_04_2162_2214_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_34_1982_2029_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_35_2592_2656_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb03_36_1725_1778_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb24_01_480_505_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb24_01_2451_2476_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb24_03_231_260_seizure.npy
Saved /Users/folasewaabdulsalam/Seizure_Onset/seizure_files/chb24_03_28

Convert to .h5

In [34]:
#convert full files to .h5
edf_path = "/Users/folasewaabdulsalam/Seizure_Onset/edf_files"
h5_path= "/Users/folasewaabdulsalam/Seizure_Onset/h5_files"
os.makedirs(h5_path, exist_ok=True)

def convert_edf_to_h5(edf_path, h5_path):
    for edf_file in os.listdir(edf_path):
        if edf_file.endswith(".edf"):
            edf_file_path = os.path.join(edf_path, edf_file)
            h5_file_path = os.path.join(h5_path, edf_file.replace(".edf", ".h5"))
            
            with pyedflib.EdfReader(edf_file_path) as f:
                signals = np.array([f.readSignal(i) for i in range(f.signals_in_file)])
                channels = f.getSignalLabels()
                sample_rate = f.getSampleFrequency(0)

            # Save as .h5
            with h5py.File(h5_file_path, "w") as hf:
                hf.create_dataset("data", data=signals, compression="gzip")
                hf.attrs["channels"] = channels
                hf.attrs["sample_rate"] = sample_rate
            
            print(f"Converted {edf_file} to {h5_file_path}")

print("Converting full .edf files...")
convert_edf_to_h5(edf_path, h5_path)


Converting full .edf files...
Converted chb24_03.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_03.h5
Converted chb24_17.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_17.h5
Converted chb24_16.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_16.h5
Converted chb24_02.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_02.h5
Converted chb01_43.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb01_43.h5
Converted chb24_14.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_14.h5
Converted chb23_09.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb23_09.h5
Converted chb23_20.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb23_20.h5
Converted chb24_01.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_01.h5
Converted chb23_08.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb23_08.h5
Converted chb24_15.edf to /Users/folasewaabdulsalam/Seizure_Onset/h5_files/chb24_15.h5
Converted chb

***startttttt****

Preprocessing seziures and non seizures

In [44]:
#extracting seizure annotations from summary.txt
summary_path = "/Users/folasewaabdulsalam/Seizure_Onset/summary_files"
annotations_file = "/Users/folasewaabdulsalam/Seizure_Onset/seizure_annotations.txt"

def extract_seizure_annotations(summary_path, output_file):
    seizure_annotations = {}

    for summary_file in os.listdir(summary_path):
        if summary_file.endswith(".txt"):
            with open(os.path.join(summary_path, summary_file), "r") as f:
                current_file = None
                for line in f:
                    # Detect file names
                    if line.startswith("File Name:"):
                        current_file = line.split(":")[1].strip().replace(".edf", "")
                        seizure_annotations[current_file] = []

                    # Detect seizure start and end times
                    if "Seizure Start Time:" in line:
                        start_time = int(line.split(":")[1].strip().replace(" seconds", ""))
                        seizure_annotations[current_file].append(("start", start_time))

                    if "Seizure End Time:" in line:
                        end_time = int(line.split(":")[1].strip().replace(" seconds", ""))
                        seizure_annotations[current_file].append(("end", end_time))

    # Save the annotations as a single file
    with open(output_file, "w") as f:
        for file, events in seizure_annotations.items():
            for event in events:
                f.write(f"{file} {event[0]} {event[1]}\n")
    
    print(f"Annotations saved to {output_file}")

# Run the extraction
extract_seizure_annotations(summary_path, annotations_file)

Annotations saved to /Users/folasewaabdulsalam/Seizure_Onset/seizure_annotations.txt


In [71]:

# Directories
h5_full_path = "/Users/folasewaabdulsalam/Seizure_Onset/h5_files"
annotations_file = "/Users/folasewaabdulsalam/Seizure_Onset/seizure_annotations.txt"
preprocessed_path = "/Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data"
os.makedirs(preprocessed_path, exist_ok=True)

FILTER_BANDS = [
    (0.5, 3.625),
    (3.625, 6.75),
    (6.75, 9.875),
    (9.875, 13.0),
    (13.0, 16.125),
    (16.125, 19.25),
    (19.25, 22.375),
    (22.375, 25.0)
]

SELECTED_CHANNELS = list(range(18))

WINDOW_SIZE = 2 * 256  # 2 seconds at 256 Hz
FS = 256  # Sampling rate

def load_annotations(annotations_file):
    """
    Load seizure annotations from a plain text file.
    """
    seizure_annotations = {}
    with open(annotations_file, "r") as f:
        for line in f:
            # Split by space
            parts = line.strip().split()
            
            # Extract file name (without the _preprocessed suffix)
            file_name = parts[0].strip()
            
            # Extract event and time
            event = parts[1].strip().encode("utf-8")
            time = int(parts[2].strip())
            
            # Store the annotation
            if file_name not in seizure_annotations:
                seizure_annotations[file_name] = []
            seizure_annotations[file_name].append((event, time))
    
    # Convert each list to a structured numpy array
    for key in seizure_annotations.keys():
        seizure_annotations[key] = np.array(seizure_annotations[key], dtype=[("event", "S5"), ("time", "i4")])
    
    return seizure_annotations

def preprocess_h5_files(h5_full_path, preprocessed_path, annotations_file):
    """
    Preprocess .h5 files by applying bandpass filtering and channel selection.
    """
    # Load annotations
    seizure_annotations = load_annotations(annotations_file)

    for file_name in tqdm(os.listdir(h5_full_path), desc="Preprocessing .h5 Files"):
        if file_name.endswith(".h5"):
            file_path = os.path.join(h5_full_path, file_name)
            output_file_path = os.path.join(preprocessed_path, file_name.replace(".h5", "_preprocessed.h5"))

            with h5py.File(file_path, "r") as hf:
                signals = hf["data"][:]

                # Select only the first 18 channels
                if signals.shape[0] < 18:
                    print(f"⚠️ WARNING: Skipping file {file_name} with fewer than 18 channels. Found {signals.shape[0]}")
                    continue

                signals = signals[:18, :]  # Use the first 18 channels

                # Apply bandpass filtering
                sos = butter(4, [0.5, 25.0], btype='band', fs=FS, output='sos')
                filtered_signals = sosfiltfilt(sos, signals, axis=1)

                # Save the preprocessed data
                with h5py.File(output_file_path, "w") as out_hf:
                    out_hf.create_dataset("data", data=filtered_signals, compression="gzip")
                    out_hf.attrs["channels"] = list(range(18))

                    # Attach annotations if available
                    file_base_name = file_name.replace(".h5", "")
                    if file_base_name in seizure_annotations:
                        out_hf.attrs["seizure_annotations"] = seizure_annotations[file_base_name]

                print(f"Saved {output_file_path}")

preprocess_h5_files(h5_full_path, preprocessed_path, annotations_file)
print("\nPreprocessing complete!")



Preprocessing .h5 Files:   1%|▏         | 1/73 [00:04<05:40,  4.73s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_36_preprocessed.h5


Preprocessing .h5 Files:   3%|▎         | 2/73 [00:14<09:14,  7.81s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_06_preprocessed.h5


Preprocessing .h5 Files:   4%|▍         | 3/73 [00:34<15:15, 13.08s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_16_preprocessed.h5


Preprocessing .h5 Files:   5%|▌         | 4/73 [00:38<11:04,  9.63s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_03_preprocessed.h5


Preprocessing .h5 Files:   7%|▋         | 5/73 [00:42<08:45,  7.73s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_19_preprocessed.h5


Preprocessing .h5 Files:   8%|▊         | 6/73 [00:46<07:18,  6.54s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_46_preprocessed.h5


Preprocessing .h5 Files:  10%|▉         | 7/73 [00:51<06:22,  5.80s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_03_preprocessed.h5


Preprocessing .h5 Files:  11%|█         | 8/73 [00:55<05:45,  5.32s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_09_preprocessed.h5


Preprocessing .h5 Files:  12%|█▏        | 9/73 [01:00<05:24,  5.07s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_06_preprocessed.h5


Preprocessing .h5 Files:  14%|█▎        | 10/73 [01:04<05:05,  4.84s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_22_preprocessed.h5


Preprocessing .h5 Files:  15%|█▌        | 11/73 [01:08<04:52,  4.71s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_19_preprocessed.h5


Preprocessing .h5 Files:  16%|█▋        | 12/73 [01:13<04:38,  4.57s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_07_preprocessed.h5


Preprocessing .h5 Files:  18%|█▊        | 13/73 [01:17<04:27,  4.45s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_02_preprocessed.h5


Preprocessing .h5 Files:  19%|█▉        | 14/73 [01:21<04:19,  4.40s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_16_preprocessed.h5


Preprocessing .h5 Files:  21%|██        | 15/73 [01:25<04:13,  4.36s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_08_preprocessed.h5


Preprocessing .h5 Files:  22%|██▏       | 16/73 [01:30<04:07,  4.34s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_36_preprocessed.h5


Preprocessing .h5 Files:  23%|██▎       | 17/73 [01:34<04:03,  4.35s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_17_preprocessed.h5


Preprocessing .h5 Files:  25%|██▍       | 18/73 [01:38<03:58,  4.34s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_18_preprocessed.h5


Preprocessing .h5 Files:  26%|██▌       | 19/73 [01:43<03:53,  4.32s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_06_preprocessed.h5


Preprocessing .h5 Files:  27%|██▋       | 20/73 [01:45<03:25,  3.88s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_26_preprocessed.h5


Preprocessing .h5 Files:  29%|██▉       | 21/73 [01:50<03:26,  3.98s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_43_preprocessed.h5


Preprocessing .h5 Files:  30%|███       | 22/73 [01:53<03:11,  3.75s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_07_preprocessed.h5


Preprocessing .h5 Files:  32%|███▏      | 23/73 [01:57<03:16,  3.93s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_13_preprocessed.h5


Preprocessing .h5 Files:  33%|███▎      | 24/73 [02:13<06:13,  7.61s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_17_preprocessed.h5


Preprocessing .h5 Files:  34%|███▍      | 25/73 [02:18<05:17,  6.61s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_02_preprocessed.h5


Preprocessing .h5 Files:  36%|███▌      | 26/73 [02:22<04:38,  5.92s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_07_preprocessed.h5


Preprocessing .h5 Files:  37%|███▋      | 27/73 [02:26<04:09,  5.42s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_17_preprocessed.h5


Preprocessing .h5 Files:  38%|███▊      | 28/73 [02:31<03:49,  5.10s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_27_preprocessed.h5


Preprocessing .h5 Files:  40%|███▉      | 29/73 [02:35<03:35,  4.89s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_16+_preprocessed.h5


Preprocessing .h5 Files:  41%|████      | 30/73 [02:39<03:23,  4.72s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_38_preprocessed.h5


Preprocessing .h5 Files:  42%|████▏     | 31/73 [02:52<05:03,  7.24s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_08_preprocessed.h5


Preprocessing .h5 Files:  44%|████▍     | 32/73 [02:57<04:20,  6.34s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_33_preprocessed.h5


Preprocessing .h5 Files:  45%|████▌     | 33/73 [03:01<03:51,  5.78s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_03_preprocessed.h5


Preprocessing .h5 Files:  47%|████▋     | 34/73 [03:05<03:28,  5.34s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_13_preprocessed.h5


Preprocessing .h5 Files:  48%|████▊     | 35/73 [03:10<03:11,  5.03s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_39_preprocessed.h5


Preprocessing .h5 Files:  49%|████▉     | 36/73 [03:28<05:37,  9.12s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_09_preprocessed.h5


Preprocessing .h5 Files:  51%|█████     | 37/73 [03:33<04:35,  7.65s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_32_preprocessed.h5


Preprocessing .h5 Files:  52%|█████▏    | 38/73 [03:37<03:53,  6.66s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_02_preprocessed.h5


Preprocessing .h5 Files:  53%|█████▎    | 39/73 [03:41<03:22,  5.96s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_12_preprocessed.h5


Preprocessing .h5 Files:  55%|█████▍    | 40/73 [03:46<03:00,  5.47s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_38_preprocessed.h5


Preprocessing .h5 Files:  56%|█████▌    | 41/73 [03:47<02:16,  4.26s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_16_preprocessed.h5


Preprocessing .h5 Files:  58%|█████▊    | 42/73 [04:07<04:36,  8.91s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_19_preprocessed.h5


Preprocessing .h5 Files:  59%|█████▉    | 43/73 [04:11<03:46,  7.54s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_06_preprocessed.h5


Preprocessing .h5 Files:  60%|██████    | 44/73 [04:15<03:10,  6.56s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_18_preprocessed.h5


Preprocessing .h5 Files:  62%|██████▏   | 45/73 [04:17<02:19,  5.00s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_22_preprocessed.h5


Preprocessing .h5 Files:  63%|██████▎   | 46/73 [04:21<02:08,  4.77s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_08_preprocessed.h5


Preprocessing .h5 Files:  64%|██████▍   | 47/73 [04:25<02:01,  4.65s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_16_preprocessed.h5


Preprocessing .h5 Files:  66%|██████▌   | 48/73 [04:30<01:53,  4.54s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_31_preprocessed.h5


Preprocessing .h5 Files:  67%|██████▋   | 49/73 [04:34<01:47,  4.47s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_01_preprocessed.h5


Preprocessing .h5 Files:  68%|██████▊   | 50/73 [04:38<01:41,  4.40s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_11_preprocessed.h5


Preprocessing .h5 Files:  70%|██████▉   | 51/73 [04:43<01:37,  4.43s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_35_preprocessed.h5


Preprocessing .h5 Files:  71%|███████   | 52/73 [04:47<01:32,  4.40s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_05_preprocessed.h5


Preprocessing .h5 Files:  73%|███████▎  | 53/73 [04:52<01:29,  4.47s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_21_preprocessed.h5


Preprocessing .h5 Files:  74%|███████▍  | 54/73 [04:56<01:24,  4.45s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_15_preprocessed.h5


Preprocessing .h5 Files:  75%|███████▌  | 55/73 [05:00<01:19,  4.42s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb02_34_preprocessed.h5


Preprocessing .h5 Files:  77%|███████▋  | 56/73 [05:05<01:14,  4.39s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_04_preprocessed.h5


Preprocessing .h5 Files:  78%|███████▊  | 57/73 [05:09<01:10,  4.38s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_20_preprocessed.h5


Preprocessing .h5 Files:  79%|███████▉  | 58/73 [05:13<01:05,  4.35s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb24_14_preprocessed.h5


Preprocessing .h5 Files:  81%|████████  | 59/73 [05:18<01:00,  4.32s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_15_preprocessed.h5


Preprocessing .h5 Files:  82%|████████▏ | 60/73 [05:22<00:55,  4.30s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_01_preprocessed.h5


Preprocessing .h5 Files:  84%|████████▎ | 61/73 [05:26<00:51,  4.31s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_35_preprocessed.h5


Preprocessing .h5 Files:  85%|████████▍ | 62/73 [05:31<00:47,  4.31s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_21_preprocessed.h5


Preprocessing .h5 Files:  86%|████████▋ | 63/73 [05:49<01:25,  8.50s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_10_preprocessed.h5


Preprocessing .h5 Files:  88%|████████▊ | 64/73 [05:53<01:05,  7.27s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_05_preprocessed.h5


Preprocessing .h5 Files:  89%|████████▉ | 65/73 [05:57<00:50,  6.33s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_05_preprocessed.h5


Preprocessing .h5 Files:  90%|█████████ | 66/73 [06:02<00:40,  5.72s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_34_preprocessed.h5


Preprocessing .h5 Files:  92%|█████████▏| 67/73 [06:08<00:34,  5.83s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb23_20_preprocessed.h5


Preprocessing .h5 Files:  93%|█████████▎| 68/73 [06:12<00:26,  5.37s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_01_preprocessed.h5


Preprocessing .h5 Files:  95%|█████████▍| 69/73 [06:16<00:19,  4.99s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_04_preprocessed.h5


Preprocessing .h5 Files:  96%|█████████▌| 70/73 [06:20<00:14,  4.76s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_10_preprocessed.h5


Preprocessing .h5 Files:  97%|█████████▋| 71/73 [06:25<00:09,  4.62s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb05_01_preprocessed.h5


Preprocessing .h5 Files:  99%|█████████▊| 72/73 [06:29<00:04,  4.50s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb03_34_preprocessed.h5


Preprocessing .h5 Files: 100%|██████████| 73/73 [06:33<00:00,  5.39s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/preprocessed_data/chb01_04_preprocessed.h5

Preprocessing complete!





In [81]:
#extracting features
feature_path = "/Users/folasewaabdulsalam/Seizure_Onset/feature_path"
os.makedirs(feature_path, exist_ok=True)

def extract_spectral_energy(data, fs=FS):
    """
    Extracts spectral energy features for each 2-second window.
    """
    num_channels = data.shape[0]
    num_samples = data.shape[1]
    num_windows = (num_samples - WINDOW_SIZE) // WINDOW_SIZE + 1

    feature_vectors = []

    for i in range(num_windows):
        start = i * WINDOW_SIZE
        end = start + WINDOW_SIZE
        window = data[:, start:end]

        # Extract 8-band energy per channel
        features = []
        for low_cut, high_cut in FILTER_BANDS:
            nyquist = 0.5 * fs
            low = low_cut / nyquist
            high = high_cut / nyquist
            b, a = butter(4, [low, high], btype='band')
            filtered = filtfilt(b, a, window, axis=1)
            
            # Calculate energy (sum of squares)
            energy = np.sum(filtered ** 2, axis=1)  # Shape: (18,)
            features.append(energy)

        # Flatten to create the final feature vector (8 bands × 18 channels = 144)
        feature_vector = np.concatenate(features)
        feature_vectors.append(feature_vector)

    return np.array(feature_vectors)

def extract_and_save_features(preprocessed_path, feature_path, annotations_file):
    """
    Extract features from all preprocessed files.
    """
    # Load annotations
    seizure_annotations = load_annotations(annotations_file)

    # Get all preprocessed .h5 files
    h5_files = [f for f in os.listdir(preprocessed_path) if f.endswith("_preprocessed.h5")]
    
    for h5_file in tqdm(h5_files, desc="Extracting Features"):
        file_path = os.path.join(preprocessed_path, h5_file)
        output_file = os.path.join(feature_path, h5_file.replace("_preprocessed.h5", "_features.h5"))
        file_base_name = h5_file.replace("_preprocessed.h5", "")

        # Load the preprocessed data
        with h5py.File(file_path, "r") as hf:
            signals = hf["data"][:]
            # Use only the first 18 channels
            signals = signals[SELECTED_CHANNELS, :]
            features = extract_spectral_energy(signals)
        
        # Extract and add annotations
        labels = np.zeros(features.shape[0], dtype=int)  # Default to non-seizure
        if file_base_name in seizure_annotations:
            for event, time in seizure_annotations[file_base_name]:
                if event == b"start":
                    start_idx = time // 2
                elif event == b"end":
                    end_idx = time // 2
                    labels[start_idx:end_idx] = 1  # Mark seizure windows
        
        # Save the features and labels
        with h5py.File(output_file, "w") as out_hf:
            out_hf.create_dataset("features", data=features, compression="gzip")
            out_hf.create_dataset("labels", data=labels, compression="gzip")
            out_hf.attrs["channels"] = SELECTED_CHANNELS
            if file_base_name in seizure_annotations:
                out_hf.attrs["seizure_annotations"] = seizure_annotations[file_base_name]
        
        print(f"Saved {output_file}")

# Run the feature extraction
extract_and_save_features(preprocessed_path, feature_path, annotations_file)

print("\nFeature extraction complete!")

Extracting Features:   1%|▏         | 1/73 [00:05<06:08,  5.12s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_46_features.h5


Extracting Features:   3%|▎         | 2/73 [00:09<05:52,  4.97s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_01_features.h5


Extracting Features:   4%|▍         | 3/73 [00:15<05:55,  5.08s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_07_features.h5


Extracting Features:   5%|▌         | 4/73 [00:20<05:54,  5.14s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_18_features.h5


Extracting Features:   7%|▋         | 5/73 [00:25<05:57,  5.26s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_11_features.h5


Extracting Features:   8%|▊         | 6/73 [00:31<05:51,  5.24s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_34_features.h5


Extracting Features:  10%|▉         | 7/73 [00:36<05:45,  5.24s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_08_features.h5


Extracting Features:  11%|█         | 8/73 [00:41<05:43,  5.28s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_03_features.h5


Extracting Features:  12%|█▏        | 9/73 [00:46<05:34,  5.23s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_04_features.h5


Extracting Features:  14%|█▎        | 10/73 [00:52<05:31,  5.27s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_13_features.h5


Extracting Features:  15%|█▌        | 11/73 [00:57<05:29,  5.31s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_33_features.h5


Extracting Features:  16%|█▋        | 12/73 [01:02<05:20,  5.26s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_16_features.h5


Extracting Features:  18%|█▊        | 13/73 [01:07<05:05,  5.08s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_10_features.h5


Extracting Features:  19%|█▉        | 14/73 [01:12<04:54,  5.00s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_38_features.h5


Extracting Features:  21%|██        | 15/73 [01:17<04:55,  5.10s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_19_features.h5


Extracting Features:  22%|██▏       | 16/73 [01:24<05:22,  5.66s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_20_features.h5


Extracting Features:  23%|██▎       | 17/73 [01:29<05:03,  5.42s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_36_features.h5


Extracting Features:  25%|██▍       | 18/73 [01:34<04:49,  5.27s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_06_features.h5


Extracting Features:  26%|██▌       | 19/73 [01:38<04:35,  5.10s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_32_features.h5


Extracting Features:  27%|██▋       | 20/73 [01:43<04:27,  5.04s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_05_features.h5


Extracting Features:  29%|██▉       | 21/73 [01:48<04:20,  5.01s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_08_features.h5


Extracting Features:  30%|███       | 22/73 [01:53<04:16,  5.03s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_17_features.h5


Extracting Features:  32%|███▏      | 23/73 [01:55<03:15,  3.91s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_16_features.h5


Extracting Features:  33%|███▎      | 24/73 [01:59<03:23,  4.15s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_36_features.h5


Extracting Features:  34%|███▍      | 25/73 [02:04<03:29,  4.36s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_18_features.h5


Extracting Features:  36%|███▌      | 26/73 [02:09<03:34,  4.57s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_39_features.h5


Extracting Features:  37%|███▋      | 27/73 [02:14<03:35,  4.69s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_19_features.h5


Extracting Features:  38%|███▊      | 28/73 [02:19<03:32,  4.73s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_01_features.h5


Extracting Features:  40%|███▉      | 29/73 [02:24<03:29,  4.75s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_07_features.h5


Extracting Features:  41%|████      | 30/73 [02:29<03:26,  4.80s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_06_features.h5


Extracting Features:  42%|████▏     | 31/73 [02:30<02:39,  3.79s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_22_features.h5


Extracting Features:  44%|████▍     | 32/73 [02:35<02:49,  4.12s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_16+_features.h5


Extracting Features:  45%|████▌     | 33/73 [02:40<02:54,  4.37s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_16_features.h5


Extracting Features:  47%|████▋     | 34/73 [02:45<02:55,  4.51s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_09_features.h5


Extracting Features:  48%|████▊     | 35/73 [02:50<02:54,  4.60s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_02_features.h5


Extracting Features:  49%|████▉     | 36/73 [02:55<02:52,  4.67s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_35_features.h5


Extracting Features:  51%|█████     | 37/73 [03:09<04:30,  7.50s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_08_features.h5


Extracting Features:  52%|█████▏    | 38/73 [03:13<03:54,  6.69s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_15_features.h5


Extracting Features:  53%|█████▎    | 39/73 [03:18<03:29,  6.15s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_34_features.h5


Extracting Features:  55%|█████▍    | 40/73 [03:23<03:12,  5.83s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_22_features.h5


Extracting Features:  56%|█████▌    | 41/73 [03:29<02:59,  5.62s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_03_features.h5


Extracting Features:  58%|█████▊    | 42/73 [03:33<02:47,  5.39s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_38_features.h5


Extracting Features:  59%|█████▉    | 43/73 [03:38<02:35,  5.18s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_05_features.h5


Extracting Features:  60%|██████    | 44/73 [03:43<02:27,  5.07s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_35_features.h5


Extracting Features:  62%|██████▏   | 45/73 [03:48<02:19,  4.96s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_21_features.h5


Extracting Features:  63%|██████▎   | 46/73 [03:51<02:01,  4.51s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_07_features.h5


Extracting Features:  64%|██████▍   | 47/73 [03:56<01:58,  4.58s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_06_features.h5


Extracting Features:  66%|██████▌   | 48/73 [04:01<01:57,  4.70s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_01_features.h5


Extracting Features:  67%|██████▋   | 49/73 [04:06<01:58,  4.93s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_20_features.h5


Extracting Features:  68%|██████▊   | 50/73 [04:10<01:42,  4.45s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_26_features.h5


Extracting Features:  70%|██████▉   | 51/73 [04:15<01:41,  4.63s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_02_features.h5


Extracting Features:  71%|███████   | 52/73 [04:35<03:16,  9.34s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_19_features.h5


Extracting Features:  73%|███████▎  | 53/73 [04:40<02:40,  8.03s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_04_features.h5


Extracting Features:  74%|███████▍  | 54/73 [05:00<03:39, 11.56s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_16_features.h5


Extracting Features:  75%|███████▌  | 55/73 [05:05<02:52,  9.60s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb05_17_features.h5


Extracting Features:  77%|███████▋  | 56/73 [05:10<02:18,  8.15s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_12_features.h5


Extracting Features:  78%|███████▊  | 57/73 [05:14<01:54,  7.17s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_21_features.h5


Extracting Features:  79%|███████▉  | 58/73 [05:19<01:37,  6.50s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_03_features.h5


Extracting Features:  81%|████████  | 59/73 [05:24<01:24,  6.03s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_05_features.h5


Extracting Features:  82%|████████▏ | 60/73 [05:41<02:00,  9.29s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_17_features.h5


Extracting Features:  84%|████████▎ | 61/73 [05:46<01:35,  7.94s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_01_features.h5


Extracting Features:  85%|████████▍ | 62/73 [05:51<01:17,  7.04s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_15_features.h5


Extracting Features:  86%|████████▋ | 63/73 [05:56<01:03,  6.40s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_13_features.h5


Extracting Features:  88%|████████▊ | 64/73 [06:01<00:53,  5.89s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_14_features.h5


Extracting Features:  89%|████████▉ | 65/73 [06:20<01:19,  9.89s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_09_features.h5


Extracting Features:  90%|█████████ | 66/73 [06:39<01:28, 12.71s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_10_features.h5


Extracting Features:  92%|█████████▏| 67/73 [06:44<01:02, 10.41s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_31_features.h5


Extracting Features:  93%|█████████▎| 68/73 [06:49<00:43,  8.70s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb24_02_features.h5


Extracting Features:  95%|█████████▍| 69/73 [06:54<00:30,  7.51s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_04_features.h5


Extracting Features:  96%|█████████▌| 70/73 [06:58<00:19,  6.67s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb03_34_features.h5


Extracting Features:  97%|█████████▋| 71/73 [07:03<00:12,  6.08s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb01_43_features.h5


Extracting Features:  99%|█████████▊| 72/73 [07:08<00:05,  5.68s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb02_27_features.h5


Extracting Features: 100%|██████████| 73/73 [07:18<00:00,  6.00s/it]

Saved /Users/folasewaabdulsalam/Seizure_Onset/feature_path/chb23_06_features.h5

Feature extraction complete!





In [86]:
#loading features and labels with the annotations
from collections import Counter
def load_features_and_labels(feature_path, annotations_file):
    """
    Load features and labels from all feature files.
    """
    features = []
    labels = []

    # Load annotations
    seizure_annotations = {}
    with open(annotations_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            file_name = parts[0].strip()
            event_type = parts[1].strip().encode("utf-8")
            event_time = int(parts[2].strip())
            
            if file_name not in seizure_annotations:
                seizure_annotations[file_name] = []
            seizure_annotations[file_name].append((event_type, event_time))

    for file_name in os.listdir(feature_path):
        if file_name.endswith("_features.h5"):
            file_base_name = file_name.replace("_features.h5", "")
            file_path = os.path.join(feature_path, file_name)
            
            with h5py.File(file_path, "r") as hf:
                data = hf["features"][:]
                
                # Check for seizure annotations
                file_annotations = seizure_annotations.get(file_base_name, [])
                labels_for_file = np.zeros(data.shape[0], dtype=int)

                # Mark seizure windows
                for event, time in file_annotations:
                    if event == b"start":
                        start_idx = time // 2
                        labels_for_file[start_idx:] = 1
                    elif event == b"end":
                        end_idx = time // 2
                        labels_for_file[end_idx:] = 0

                features.append(data)
                labels.append(labels_for_file)

    # Convert to numpy arrays
    features = np.vstack(features)
    labels = np.concatenate(labels)
    
    return features, labels



In [93]:

#training the model
model_results_path = "/Users/folasewaabdulsalam/Seizure_Onset/model_results"

# Load Data
features, labels = load_features_and_labels(feature_path, annotations_file)

# Preserve Original Seizure-to-Non-Seizure Ratio
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Print Ratio
print("\nTraining Set Ratio:", Counter(y_train))
print("Test Set Ratio:", Counter(y_test))

# Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance Training Data
smote = SMOTE(random_state=42, k_neighbors=5)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Print Balanced Ratio
print("\nBalanced Training Set Ratio:", Counter(y_train_balanced))

# Define Models
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Train and Evaluate
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity and Specificity
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    
    # Latency Calculation (Onset Delay)
    latency = np.mean([max(0, i - j) for i, j in zip(np.where(y_pred == 1)[0], np.where(y_test == 1)[0])]) if np.any(y_test == 1) else 0

    # Save Results
    with open(os.path.join(model_results_path, f"{model_name}_results.txt"), "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} samples\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} samples")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nTraining Complete!")


Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Set Ratio: Counter({np.int64(0): 31757, np.int64(1): 200})





Balanced Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 127028})

Training Naive Bayes...

Model: Naive Bayes
F1 Score: 0.9814
Sensitivity (Recall): 0.4800
Specificity: 0.9764
Latency: 0.4000 samples
Confusion Matrix:
[[31008   749]
 [  104    96]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     31757
           1       0.11      0.48      0.18       200

    accuracy                           0.97     31957
   macro avg       0.56      0.73      0.59     31957
weighted avg       0.99      0.97      0.98     31957


Training Random Forest...

Model: Random Forest
F1 Score: 0.9970
Sensitivity (Recall): 0.7200
Specificity: 0.9988
Latency: 1359.5519 samples
Confusion Matrix:
[[31718    39]
 [   56   144]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31757
           1       0.79      0.72      0.75       200

    ac

Parameters: { "use_label_encoder" } are not used.




Model: XGBoost
F1 Score: 0.9975
Sensitivity (Recall): 0.8600
Specificity: 0.9983
Latency: 4.1150 samples
Confusion Matrix:
[[31703    54]
 [   28   172]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31757
           1       0.76      0.86      0.81       200

    accuracy                           1.00     31957
   macro avg       0.88      0.93      0.90     31957
weighted avg       1.00      1.00      1.00     31957


Training Complete!


In [94]:
#training 2
# Paths
model_results_path = "/Users/folasewaabdulsalam/Seizure_Onset/model_results"
os.makedirs(model_results_path, exist_ok=True)

# Load Features and Labels
features, labels = load_features_and_labels(feature_path, annotations_file)

# Preserve Original Seizure-to-Non-Seizure Ratio
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Print Ratio
print("\nTraining Set Ratio:", Counter(y_train))
print("Test Set Ratio:", Counter(y_test))

# Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance Training Data
smote = SMOTE(random_state=42, k_neighbors=5)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Print Balanced Ratio
print("\nBalanced Training Set Ratio:", Counter(y_train_balanced))

# Define Models
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Train and Evaluate
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity and Specificity
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    
    # Improved Latency Calculation (Onset Delay in Seconds)
    latency = 0
    true_onsets = np.where(y_test == 1)[0]
    predicted_onsets = np.where(y_pred == 1)[0]

    if len(true_onsets) > 0 and len(predicted_onsets) > 0:
        latency_list = []
        for pred in predicted_onsets:
            # Find the closest true onset before the predicted onset
            closest_true = true_onsets[true_onsets <= pred]
            if len(closest_true) > 0:
                latency_list.append((pred - closest_true[-1]) * 2)  # Convert to seconds

        # Calculate the average latency if any valid matches were found
        latency = np.mean(latency_list) if latency_list else 0

    # Save Results
    with open(os.path.join(model_results_path, f"{model_name}_results2.txt"), "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} seconds\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} seconds")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nTraining Complete!")



Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Set Ratio: Counter({np.int64(0): 31757, np.int64(1): 200})





Balanced Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 127028})

Training Naive Bayes...

Model: Naive Bayes
F1 Score: 0.9814
Sensitivity (Recall): 0.4800
Specificity: 0.9764
Latency: 282.6888 seconds
Confusion Matrix:
[[31008   749]
 [  104    96]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     31757
           1       0.11      0.48      0.18       200

    accuracy                           0.97     31957
   macro avg       0.56      0.73      0.59     31957
weighted avg       0.99      0.97      0.98     31957


Training Random Forest...


KeyboardInterrupt: 

In [97]:
#trainig 3
# Load Features and Labels
features, labels = load_features_and_labels(feature_path, annotations_file)

# Preserve Original Seizure-to-Non-Seizure Ratio
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Print Initial Ratio
print("\nTraining Set Ratio:", Counter(y_train))
print("Test Set Ratio:", Counter(y_test))

# Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Downsample the Majority Class
def downsample(X, y):
    """
    Downsamples the majority class to match the size of the minority class.
    """
    # Separate the majority and minority classes
    X_majority = X[y == 0]
    y_majority = y[y == 0]
    X_minority = X[y == 1]
    y_minority = y[y == 1]

    # Downsample the majority class
    np.random.seed(42)
    majority_indices = np.random.choice(len(X_majority), len(X_minority), replace=False)
    X_majority_downsampled = X_majority[majority_indices]
    y_majority_downsampled = y_majority[majority_indices]

    # Combine the downsampled majority class with the minority class
    X_balanced = np.vstack((X_majority_downsampled, X_minority))
    y_balanced = np.hstack((y_majority_downsampled, y_minority))

    # Shuffle the balanced data
    indices = np.arange(len(y_balanced))
    np.random.shuffle(indices)
    return X_balanced[indices], y_balanced[indices]

X_train_balanced, y_train_balanced = downsample(X_train, y_train)

# Print Balanced Ratio
print("\nBalanced Training Set Ratio:", Counter(y_train_balanced))

# Define Models
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Train and Evaluate
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity and Specificity
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    
    # Improved Latency Calculation (Onset Delay in Seconds)
    latency = 0
    true_onsets = np.where(y_test == 1)[0]
    predicted_onsets = np.where(y_pred == 1)[0]

    if len(true_onsets) > 0 and len(predicted_onsets) > 0:
        latency_list = []
        for pred in predicted_onsets:
            # Find the closest true onset before the predicted onset
            closest_true = true_onsets[true_onsets <= pred]
            if len(closest_true) > 0:
                latency_list.append((pred - closest_true[-1]) * 2)  # Convert to seconds

        # Calculate the average latency if any valid matches were found
        latency = np.mean(latency_list) if latency_list else 0

    # Save Results
    with open(os.path.join(model_results_path, f"{model_name}_results3.txt"), "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} seconds\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} seconds")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nTraining Complete!")


Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Set Ratio: Counter({np.int64(0): 31757, np.int64(1): 200})

Balanced Training Set Ratio: Counter({np.int64(0): 798, np.int64(1): 798})

Training Naive Bayes...

Model: Naive Bayes
F1 Score: 0.9684
Sensitivity (Recall): 0.6250
Specificity: 0.9509
Latency: 293.1960 seconds
Confusion Matrix:
[[30197  1560]
 [   75   125]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     31757
           1       0.07      0.62      0.13       200

    accuracy                           0.95     31957
   macro avg       0.54      0.79      0.55     31957
weighted avg       0.99      0.95      0.97     31957


Training Random Forest...

Model: Random Forest
F1 Score: 0.9714
Sensitivity (Recall): 0.9350
Specificity: 0.9539
Latency: 279.2515 seconds
Confusion Matrix:
[[30293  1464]
 [   13   187]]
Classification Report:
              precision    recall  f1-sco

Parameters: { "use_label_encoder" } are not used.




Model: XGBoost
F1 Score: 0.9723
Sensitivity (Recall): 0.9450
Specificity: 0.9555
Latency: 285.5241 seconds
Confusion Matrix:
[[30344  1413]
 [   11   189]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98     31757
           1       0.12      0.94      0.21       200

    accuracy                           0.96     31957
   macro avg       0.56      0.95      0.59     31957
weighted avg       0.99      0.96      0.97     31957


Training Complete!


In [None]:
from imblearn.under_sampling import RandomUnderSampler
# Load Data
features, labels = load_features_and_labels(feature_path, annotations_file)

# Preserve Original Seizure-to-Non-Seizure Ratio
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Print Ratio
print("\nTraining Set Ratio:", Counter(y_train))
print("Test Set Ratio:", Counter(y_test))

# Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance Training Data (Downsampling)
rus = RandomUnderSampler(random_state=42)
X_train_balanced, y_train_balanced = rus.fit_resample(X_train, y_train)

# Print Balanced Ratio
print("\nBalanced Training Set Ratio:", Counter(y_train_balanced))

# Define Models
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Train and Evaluate
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity and Specificity
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    
        # Latency Calculation (Onset Delay)
    
    latency = np.mean([max(0, i - j) for i, j in zip(np.where(y_pred == 1)[0], np.where(y_test == 1)[0])]) if np.any(y_test == 1) else 0
    
    # Save Results
    with open(os.path.join(model_results_path, f"{model_name}_results4.txt"), "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} seconds\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} seconds")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nTraining Complete!")


Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Set Ratio: Counter({np.int64(0): 31757, np.int64(1): 200})

Balanced Training Set Ratio: Counter({np.int64(0): 798, np.int64(1): 798})

Training Naive Bayes...

Model: Naive Bayes
F1 Score: 0.9684
Sensitivity (Recall): 0.6250
Specificity: 0.9509
Latency: 0.0000 seconds
Confusion Matrix:
[[30197  1560]
 [   75   125]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     31757
           1       0.07      0.62      0.13       200

    accuracy                           0.95     31957
   macro avg       0.54      0.79      0.55     31957
weighted avg       0.99      0.95      0.97     31957


Training Random Forest...





Model: Random Forest
F1 Score: 0.9705
Sensitivity (Recall): 0.9350
Specificity: 0.9524
Latency: 0.0000 seconds
Confusion Matrix:
[[30244  1513]
 [   13   187]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.98     31757
           1       0.11      0.94      0.20       200

    accuracy                           0.95     31957
   macro avg       0.55      0.94      0.59     31957
weighted avg       0.99      0.95      0.97     31957


Training XGBoost...


Parameters: { "use_label_encoder" } are not used.




Model: XGBoost
F1 Score: 0.9723
Sensitivity (Recall): 0.9450
Specificity: 0.9555
Latency: 0.0000 seconds
Confusion Matrix:
[[30344  1413]
 [   11   189]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98     31757
           1       0.12      0.94      0.21       200

    accuracy                           0.96     31957
   macro avg       0.56      0.95      0.59     31957
weighted avg       0.99      0.96      0.97     31957


Training Complete!


In [103]:
#training 5
# Load Data
features, labels = load_features_and_labels(feature_path, annotations_file)

# Preserve Original Seizure-to-Non-Seizure Ratio
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Print Ratio
print("\nTraining Set Ratio:", Counter(y_train))
print("Test Set Ratio:", Counter(y_test))

# Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance Training Data
smote = SMOTE(random_state=42, k_neighbors=5)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Print Balanced Ratio
print("\nBalanced Training Set Ratio:", Counter(y_train_balanced))

# Define Models
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Train and Evaluate
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity and Specificity
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    
    # Latency Calculation (Onset Delay)
    latency = np.mean([max(0, i - j) for i, j in zip(np.where(y_pred == 1)[0], np.where(y_test == 1)[0])]) if np.any(y_test == 1) else 0

    # Save Results
    with open(os.path.join(model_results_path, f"{model_name}_results5.txt"), "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} samples\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} samples")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nTraining Complete!")


Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Set Ratio: Counter({np.int64(0): 31757, np.int64(1): 200})





Balanced Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 127028})

Training Naive Bayes...

Model: Naive Bayes
F1 Score: 0.9814
Sensitivity (Recall): 0.4800
Specificity: 0.9764
Latency: 0.4000 samples
Confusion Matrix:
[[31008   749]
 [  104    96]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     31757
           1       0.11      0.48      0.18       200

    accuracy                           0.97     31957
   macro avg       0.56      0.73      0.59     31957
weighted avg       0.99      0.97      0.98     31957


Training Random Forest...

Model: Random Forest
F1 Score: 0.9970
Sensitivity (Recall): 0.7200
Specificity: 0.9988
Latency: 1359.5519 samples
Confusion Matrix:
[[31718    39]
 [   56   144]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31757
           1       0.79      0.72      0.75       200

    ac

Parameters: { "use_label_encoder" } are not used.




Model: XGBoost
F1 Score: 0.9975
Sensitivity (Recall): 0.8600
Specificity: 0.9983
Latency: 4.1150 samples
Confusion Matrix:
[[31703    54]
 [   28   172]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31757
           1       0.76      0.86      0.81       200

    accuracy                           1.00     31957
   macro avg       0.88      0.93      0.90     31957
weighted avg       1.00      1.00      1.00     31957


Training Complete!


In [104]:
#training 6 with downsample
# Load Data
features, labels = load_features_and_labels(feature_path, annotations_file)

# Preserve Original Seizure-to-Non-Seizure Ratio
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

# Print Ratio
print("\nTraining Set Ratio:", Counter(y_train))
print("Test Set Ratio:", Counter(y_test))

# Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance Training Data with Downsampling
rus = RandomUnderSampler(random_state=42)
X_train_balanced, y_train_balanced = rus.fit_resample(X_train, y_train)

# Print Balanced Ratio
print("\nBalanced Training Set Ratio:", Counter(y_train_balanced))

# Define Models
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42),
    "XGBoost": XGBClassifier(n_estimators=150, max_depth=10, learning_rate=0.05, 
                             subsample=0.8, colsample_bytree=0.8, eval_metric='logloss', random_state=42)
}

# Train and Evaluate
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity and Specificity
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    
    # Latency Calculation (Onset Delay)
    latency = np.mean([max(0, i - j) for i, j in zip(np.where(y_pred == 1)[0], np.where(y_test == 1)[0])]) if np.any(y_test == 1) else 0

    # Save Results
    with open(os.path.join(model_results_path, f"{model_name}_results6.txt"), "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} samples\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} samples")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nTraining Complete!")


Training Set Ratio: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Set Ratio: Counter({np.int64(0): 31757, np.int64(1): 200})

Balanced Training Set Ratio: Counter({np.int64(0): 798, np.int64(1): 798})

Training Naive Bayes...





Model: Naive Bayes
F1 Score: 0.9684
Sensitivity (Recall): 0.6250
Specificity: 0.9509
Latency: 0.0000 samples
Confusion Matrix:
[[30197  1560]
 [   75   125]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     31757
           1       0.07      0.62      0.13       200

    accuracy                           0.95     31957
   macro avg       0.54      0.79      0.55     31957
weighted avg       0.99      0.95      0.97     31957


Training Random Forest...

Model: Random Forest
F1 Score: 0.9706
Sensitivity (Recall): 0.9350
Specificity: 0.9525
Latency: 0.0000 samples
Confusion Matrix:
[[30248  1509]
 [   13   187]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.98     31757
           1       0.11      0.94      0.20       200

    accuracy                           0.95     31957
   macro avg       0.55      0.94      0.59     31957
weighted

In [108]:
# Training 7 - Corrected Latency and Balanced Downsampling

# Load Features and Labels
features, labels = load_features_and_labels(feature_path, annotations_file)

print("\nTotal Features:", features.shape)
print("Total Labels:", labels.shape)
print("Initial Label Distribution:", Counter(labels))

# Stratified Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

print("\nTraining Set Shape:", X_train.shape)
print("Test Set Shape:", X_test.shape)
print("Training Labels Distribution:", Counter(y_train))
print("Test Labels Distribution:", Counter(y_test))

# Downsampling the Majority Class
rus = RandomUnderSampler(random_state=42, sampling_strategy={0: 5000, 1: 798})
X_train_balanced, y_train_balanced = rus.fit_resample(X_train, y_train)

print("\nBalanced Training Labels Distribution:", Counter(y_train_balanced))

# Standardize Features
scaler = StandardScaler()
X_train_balanced = scaler.fit_transform(X_train_balanced)
X_test = scaler.transform(X_test)

# Model Definitions
models = {
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Helper function to calculate latency, sensitivity, and specificity
def evaluate_onset_detection(y_true, y_pred, window_size=2, fs=256):
    """
    Calculate latency, sensitivity, and specificity.
    """
    # Convert window indices to time
    onset_times_true = np.where(y_true == 1)[0]
    onset_times_pred = np.where(y_pred == 1)[0]

    # Calculate latency
    latencies = []
    for true_onset in onset_times_true:
        # Find the first correct prediction after the actual onset
        valid_preds = onset_times_pred[onset_times_pred >= true_onset]
        if len(valid_preds) > 0:
            latency = (valid_preds[0] - true_onset) * (window_size / fs)
            latencies.append(latency)

    # Average latency across all seizure events
    avg_latency = np.mean(latencies) if len(latencies) > 0 else float('inf')

    # Calculate sensitivity (true positive rate)
    sensitivity = len(latencies) / len(onset_times_true) if len(onset_times_true) > 0 else 0

    # Calculate specificity (true negative rate)
    non_seizure_true = np.sum(y_true == 0)
    non_seizure_pred_correct = np.sum((y_pred == 0) & (y_true == 0))
    specificity = non_seizure_pred_correct / non_seizure_true if non_seizure_true > 0 else 0

    return avg_latency, sensitivity, specificity

# Train and Evaluate Models
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train_balanced, y_train_balanced)
    y_pred = model.predict(X_test)

    # Metrics
    f1 = f1_score(y_test, y_pred, average="weighted")
    precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
    cm = confusion_matrix(y_test, y_pred)

    # Sensitivity, Specificity, and Latency
    latency, sensitivity, specificity = evaluate_onset_detection(y_test, y_pred)
    
    # Save Results
    results_file = os.path.join(model_results_path, f"{model_name}_results7.txt")
    with open(results_file, "w") as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"F1 Score: {f1:.4f}\n")
        f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
        f.write(f"Specificity: {specificity:.4f}\n")
        f.write(f"Latency: {latency:.4f} seconds\n")
        f.write("Confusion Matrix:\n")
        f.write(f"{cm}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred))

    print(f"\nModel: {model_name}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Latency: {latency:.4f} seconds")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

print("\nModel training and evaluation complete!")



Total Features: (159783, 144)
Total Labels: (159783,)
Initial Label Distribution: Counter({np.int64(0): 158785, np.int64(1): 998})

Training Set Shape: (127826, 144)
Test Set Shape: (31957, 144)
Training Labels Distribution: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Labels Distribution: Counter({np.int64(0): 31757, np.int64(1): 200})

Balanced Training Labels Distribution: Counter({np.int64(0): 5000, np.int64(1): 798})

Training Naive Bayes...





Model: Naive Bayes
F1 Score: 0.9808
Sensitivity (Recall): 1.0000
Specificity: 0.9752
Latency: 0.1348 seconds
Confusion Matrix:
[[30971   786]
 [  102    98]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     31757
           1       0.11      0.49      0.18       200

    accuracy                           0.97     31957
   macro avg       0.55      0.73      0.58     31957
weighted avg       0.99      0.97      0.98     31957


Training Random Forest...

Model: Random Forest
F1 Score: 0.9917
Sensitivity (Recall): 1.0000
Specificity: 0.9908
Latency: 0.1063 seconds
Confusion Matrix:
[[31466   291]
 [   36   164]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     31757
           1       0.36      0.82      0.50       200

    accuracy                           0.99     31957
   macro avg       0.68      0.91      0.75     31957
weighted

Parameters: { "use_label_encoder" } are not used.




Model: XGBoost
F1 Score: 0.9927
Sensitivity (Recall): 1.0000
Specificity: 0.9916
Latency: 0.0654 seconds
Confusion Matrix:
[[31489   268]
 [   20   180]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     31757
           1       0.40      0.90      0.56       200

    accuracy                           0.99     31957
   macro avg       0.70      0.95      0.78     31957
weighted avg       1.00      0.99      0.99     31957


Model training and evaluation complete!


In [109]:
#training 8 lstm
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler
from collections import Counter
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

# Load Features and Labels
features, labels = load_features_and_labels(feature_path, annotations_file)

print("\nTotal Features:", features.shape)
print("Total Labels:", labels.shape)
print("Initial Label Distribution:", Counter(labels))

# Stratified Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

print("\nTraining Set Shape:", X_train.shape)
print("Test Set Shape:", X_test.shape)
print("Training Labels Distribution:", Counter(y_train))
print("Test Labels Distribution:", Counter(y_test))

# Standardize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape for LSTM (samples, time steps, features)
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

# LSTM Model Definition
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(1, X_train.shape[2])),
    Dropout(0.3),
    LSTM(64),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")
])

# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

# Early Stopping
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# Train the Model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=64,
    callbacks=[early_stopping]
)

# Evaluate the Model
y_pred = (model.predict(X_test) > 0.5).astype("int32").flatten()

# Metrics
precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
cm = confusion_matrix(y_test, y_pred)

# Sensitivity and Specificity
tn, fp, fn, tp = cm.ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

# Latency Calculation (Samples to Seconds)
latency = np.mean([max(0, i - j) for i, j in zip(np.where(y_pred == 1)[0], np.where(y_test == 1)[0])]) * 2

# Save Results
model_results_path = "/Users/folasewaabdulsalam/Seizure_Onset/model_results"
os.makedirs(model_results_path, exist_ok=True)
results_file = os.path.join(model_results_path, "LSTM_results.txt")
with open(results_file, "w") as f:
    f.write(f"F1 Score: {f1_scores:.4f}\n")
    f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
    f.write(f"Specificity: {specificity:.4f}\n")
    f.write(f"Latency: {latency:.4f} seconds\n")
    f.write("Confusion Matrix:\n")
    f.write(f"{cm}\n")
    f.write("Classification Report:\n")
    f.write(classification_report(y_test, y_pred))

print(f"\nF1 Score: {f1_scores:.4f}")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Latency: {latency:.4f} seconds")
print("Confusion Matrix:")
print(cm)
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("\nLSTM Training and Evaluation Complete!")





Total Features: (159783, 144)
Total Labels: (159783,)
Initial Label Distribution: Counter({np.int64(0): 158785, np.int64(1): 998})

Training Set Shape: (127826, 144)
Test Set Shape: (31957, 144)
Training Labels Distribution: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Labels Distribution: Counter({np.int64(0): 31757, np.int64(1): 200})
Epoch 1/30


  super().__init__(**kwargs)


[1m1998/1998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9937 - loss: 0.0686 - val_accuracy: 0.9966 - val_loss: 0.0114
Epoch 2/30
[1m1998/1998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9966 - loss: 0.0127 - val_accuracy: 0.9973 - val_loss: 0.0094
Epoch 3/30
[1m1998/1998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9973 - loss: 0.0109 - val_accuracy: 0.9975 - val_loss: 0.0089
Epoch 4/30
[1m1998/1998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9977 - loss: 0.0082 - val_accuracy: 0.9978 - val_loss: 0.0082
Epoch 5/30
[1m1998/1998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9981 - loss: 0.0076 - val_accuracy: 0.9977 - val_loss: 0.0078
Epoch 6/30
[1m1998/1998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9980 - loss: 0.0075 - val_accuracy: 0.9975 - val_loss: 0.0088
Epoch 7/30
[1m1998/1998[0

In [110]:
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, precision_recall_fscore_support, confusion_matrix, classification_report
from imblearn.under_sampling import RandomUnderSampler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
import numpy as np
import os


# Load Features and Labels
features, labels = load_features_and_labels(feature_path, annotations_file)

print("\nTotal Features:", features.shape)
print("Total Labels:", labels.shape)
print("Initial Label Distribution:", Counter(labels))

# Stratified Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

print("\nTraining Set Shape:", X_train.shape)
print("Test Set Shape:", X_test.shape)
print("Training Labels Distribution:", Counter(y_train))
print("Test Labels Distribution:", Counter(y_test))

# Downsampling the Majority Class
rus = RandomUnderSampler(random_state=42)
X_train_balanced, y_train_balanced = rus.fit_resample(X_train, y_train)

print("\nBalanced Training Labels Distribution:", Counter(y_train_balanced))

# Standardize Features
scaler = StandardScaler()
X_train_balanced = scaler.fit_transform(X_train_balanced)
X_test = scaler.transform(X_test)

# Reshape for LSTM (samples, timesteps, features)
X_train_balanced = X_train_balanced.reshape(-1, 1, X_train_balanced.shape[1])
X_test = X_test.reshape(-1, 1, X_test.shape[1])

# LSTM Model Definition
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(1, 144)),
    BatchNormalization(),
    Dropout(0.5),
    LSTM(64),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train the LSTM Model
model.fit(X_train_balanced, y_train_balanced, epochs=20, batch_size=32, validation_split=0.2)

# Predict on Test Set
y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

# Metrics
f1 = f1_score(y_test, y_pred, average="weighted")
precision, recall, f1_scores, _ = precision_recall_fscore_support(y_test, y_pred, average="binary")
cm = confusion_matrix(y_test, y_pred)

# Sensitivity, Specificity, and Latency
tn, fp, fn, tp = cm.ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

# Overlapping Window Latency Calculation
window_size = 2  # seconds
fs = 256  # sampling rate
latencies = []
onset_times_true = np.where(y_test == 1)[0]
onset_times_pred = np.where(y_pred == 1)[0]

for true_onset in onset_times_true:
    valid_preds = onset_times_pred[onset_times_pred >= true_onset]
    if len(valid_preds) > 0:
        latency = (valid_preds[0] - true_onset) * (window_size / 2)  # account for 50% overlap
        latencies.append(latency)

avg_latency = np.mean(latencies) if len(latencies) > 0 else float('inf')

# Save Results
results_file = os.path.join(model_results_path, "LSTM_results2.txt")
with open(results_file, "w") as f:
    f.write(f"F1 Score: {f1:.4f}\n")
    f.write(f"Sensitivity (Recall): {sensitivity:.4f}\n")
    f.write(f"Specificity: {specificity:.4f}\n")
    f.write(f"Latency: {avg_latency:.4f} seconds\n")
    f.write("Confusion Matrix:\n")
    f.write(f"{cm}\n")
    f.write("Classification Report:\n")
    f.write(classification_report(y_test, y_pred))

print("\nLSTM Model Results")
print(f"F1 Score: {f1:.4f}")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Latency: {avg_latency:.4f} seconds")
print("Confusion Matrix:")
print(cm)
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("\nLSTM training and evaluation complete!")



Total Features: (159783, 144)
Total Labels: (159783,)
Initial Label Distribution: Counter({np.int64(0): 158785, np.int64(1): 998})

Training Set Shape: (127826, 144)
Test Set Shape: (31957, 144)
Training Labels Distribution: Counter({np.int64(0): 127028, np.int64(1): 798})
Test Labels Distribution: Counter({np.int64(0): 31757, np.int64(1): 200})

Balanced Training Labels Distribution: Counter({np.int64(0): 798, np.int64(1): 798})


  super().__init__(**kwargs)


Epoch 1/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7326 - loss: 0.6275 - val_accuracy: 0.4187 - val_loss: 0.7127
Epoch 2/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8643 - loss: 0.3877 - val_accuracy: 0.5469 - val_loss: 0.6463
Epoch 3/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8652 - loss: 0.3711 - val_accuracy: 0.6406 - val_loss: 0.5881
Epoch 4/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8844 - loss: 0.3111 - val_accuracy: 0.7000 - val_loss: 0.5228
Epoch 5/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8693 - loss: 0.3201 - val_accuracy: 0.8031 - val_loss: 0.4521
Epoch 6/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9071 - loss: 0.2732 - val_accuracy: 0.8250 - val_loss: 0.3968
Epoch 7/20
[1m40/40[0m [32m━━━━━━━━━━