In [None]:
import os
import gc
import numpy as np
import mne
from mne.preprocessing import ICA, create_eog_epochs

In [None]:
#change paths
RAW_PATH = "/path/to/SIENA_EEG"
SAVE_PATH = "/path/to/SIENA_PREPROCESSED"
os.makedirs(SAVE_PATH, exist_ok=True)

In [None]:
#Parameters
#Bandpass filtering
LOW_F = 0.5
HIGH_F = 40
#Notch Filtering
NOTCH = 50
EPOCH_LEN = 5.0
RESAMPLE_SFREQ = 256

# ICA-32GB RAM
ICA_COMPONENTS = 20
ICA_MAX_DURATION = 20 * 60   # first 20 minutes only
ICA_RANDOM_STATE = 97

In [None]:
for root, _, files in os.walk(RAW_PATH):
    for file in files:
        if not file.endswith(".edf"):
            continue

        edf_path = os.path.join(root, file)
        save_name = file.replace(".edf", ".npz")
        save_path = os.path.join(SAVE_PATH, save_name)

        if os.path.exists(save_path):
            print("Skipping (already done):", file)
            continue

        print("\nProcessing:", file)

        try:
            #Load EDF Files
            raw = mne.io.read_raw_edf(
                edf_path,
                preload=True,
                verbose=False
            )

            if raw.n_times < 1:
                print("Empty file, skipping:", file)
                del raw
                continue


            raw.pick_types(eeg=True)

            # Bandpass and Notch Filtering
            raw.filter(LOW_F, HIGH_F, fir_design="firwin", verbose=False)
            raw.notch_filter(NOTCH, verbose=False)

            # Average Referencing-reduce bias
            raw.set_eeg_reference("average", verbose=False)
            #Resampling-standardize sampling rate and reduce data size
            raw.resample(RESAMPLE_SFREQ, verbose=False)

            # ICA artifact removal-remove EOG/eye-blink movements/muscle movements
            raw_ica = raw.copy().crop(
                tmax=min(raw.times[-1], ICA_MAX_DURATION)
            )

            ica = ICA(
                n_components=ICA_COMPONENTS,
                random_state=ICA_RANDOM_STATE,
                method="fastica",
                max_iter="auto"
            )

            ica.fit(raw_ica)

            # Detect eye artifacts
            eog_inds = []
            try:
                eog_epochs = create_eog_epochs(raw_ica, verbose=False)
                eog_inds, _ = ica.find_bads_eog(eog_epochs)
            except Exception:
                pass

            # Detect muscle artifacts
            muscle_inds, _ = ica.find_bads_muscle(raw_ica, threshold=0.7)

            ica.exclude = list(set(eog_inds + muscle_inds))
            print("Removing ICA components:", ica.exclude)

            raw = ica.apply(raw)

            del raw_ica, ica
            gc.collect()

            #epochs to preprocess .edf files one by one
            epochs = mne.make_fixed_length_epochs(
                raw,
                duration=EPOCH_LEN,
                preload=True,
                verbose=False
            )

            if len(epochs) == 0:
                print("No epochs created:", file)
                del raw, epochs
                continue

            X = epochs.get_data().astype(np.float32)

            #normalize
            mean = X.mean(axis=-1, keepdims=True)
            std = X.std(axis=-1, keepdims=True) + 1e-6
            X = (X - mean) / std

            # save as .npz for later dl model training
            y = np.zeros(len(X), dtype=np.int8)

            np.savez_compressed(
                save_path,
                X=X,
                y=y
            )

            print("Saved:", save_path)

            # cleanup memory
            del raw, epochs, X, y, mean, std
            gc.collect()

        except Exception as e:
            print("Error processing", file, ":", e)
            gc.collect()
            continue