In [3]:
import numpy as np
import os
import h5py
import pyedflib
from tqdm import tqdm
import pandas as pd

In [None]:
source_dir = "/Users/folasewaabdulsalam/Seizure_Onset/Dataset"
output_dir = "/Users/folasewaabdulsalam/Seizure_Onset/h5_files"

def convert_edf_to_h5(source_dir, output_dir):
    """
    Convert .edf files to .h5 format
    """

    os.makedirs(output_dir, exist_ok=True)
    for patient_folder in tqdm(os.listdir(source_dir), desc="Processing patient data"):
        patient_path = os.join(source_dir, patient_folder)

        if not os.path.isdir(patient_path):
            continue

        patient_output_dir = os.path.join(output_dir, patient_folder)
        os.makedirs(patient_output_dir, exist_ok=True)

        edf_files  = [f for f in os.listdir(patient_path) if f.edswith(".edf")]
        h5_files = [f.replace(".edf", ".h5") for f in edf_files]
        already_processed = all(os.path.exists(os.path.join(patient_output_dir, h5_file) for h5_file in h5_files))

        if already_processed:
            print(f"Skipping {patient_folder} (all sessions converted)")
            continue

        for edf_file, h5_file in zip(edf_files, h5_files):
            edf_path = os.path.join(patient_path, edf_file)
            h5_file_path = os.path.join(patient_output_dir, h5_file)

            if os.path.exists(h5_file_path):
                print(f"Skipping {edf_file} already converted")
                continue

            #extracting raw signals and metadata
            with pyedflib.EdfReader(edf_path) as f:
                signals = np.array([f.readSignal(i) for i in range(f.signals_in_file)])
                channels = f.getSignalLabels()
                sampling_rate = f.getSampleFrequency(0)
            
            with h5py.File(h5_file_path, "w") as h5_file:
                h5_file.create_dataset("data", data=signals, compression="gzip")
                h5_file.attrs["channels"] = channels
                h5_file.attrs["sampling_rate"] = sampling_rate

                seizure_file = edf_path + ".seizures"
                if os.path.exists(seizure_file):
                    with open(seizure_file, "r") as sf:
                        annotations = [line.strip() for line in sf.readlines()]
                    h5_file.attrs["seizure_annotations"] = annotations

            print(f"Converted: {edf_file}")
    print("Yehhh!! Conversion Complete")


    