## Extracted sampling frequency (fs) from EEG .mat files and stored it in a CSV

In [None]:
import os
import scipy.io as sio
import pandas as pd
from tqdm import tqdm

# Paths to data and output
REFERENCE_CSV = "/shared_data/training/REFERENCE.csv"
MAT_DIR = "/shared_data/training"
OUTPUT_CSV = "reference_with_fs.csv"

# Loaded reference CSV (contains filename, seizure flag, onset, offset)
df = pd.read_csv(REFERENCE_CSV, header=None, names=["filename", "seizure", "onset", "offset"])

# Prepared to store updated data with fs (sampling frequency)
records = []

# Loop over each row to extract 'fs' from corresponding .mat file
for _, row in tqdm(df.iterrows(), total=len(df)):
    mat_path = os.path.join(MAT_DIR, row["filename"])
    filename, seizure, onset, offset = row["filename"], row["seizure"], row["onset"], row["offset"]

    try:
        mat_data = sio.loadmat(mat_path, simplify_cells=True)
        fs = int(mat_data["fs"])  # Get sampling frequency
    except Exception as e:
        print(f"Failed to read {filename}: {e}")
        fs = None  # Marked missing fs

    records.append([filename, seizure, fs, onset, offset])

# Saved the updated data with 'fs' info to a new CSV
df_out = pd.DataFrame(records, columns=["filename", "seizure", "fs", "onset", "offset"])
df_out.to_csv(OUTPUT_CSV, index=False)
print(f" Sampling frequencies saved to: {OUTPUT_CSV}")


## Processed seizure EEG files and prepare filtered data arrays 

In [None]:
import os
import numpy as np
import scipy.io as sio
from scipy.signal import resample_poly
import pandas as pd
from wettbewerb import get_6montages
import mne

# Configuration
REFERENCE_CSV = "reference_with_fs.csv"
MAT_DIR = "/shared_data/training"
SAVE_X = "x_filtered_seizures.npy"
SAVE_Y = "y_filtered_seizures.npy"
SAVE_META = "meta_filtered_seizures.npy"

TARGET_FS = 250        # Resampled target frequency
DURATION_SEC = 300     # Max duration (5 minutes)
MAX_SAMPLES = TARGET_FS * DURATION_SEC

# Loaded seizure files only, with acceptable sampling frequency and offset
df = pd.read_csv(REFERENCE_CSV)
df = df[(df["seizure"] == 1) & (df["fs"].isin([250, 256])) & (df["offset"] <= 300)]

# Containers for EEG data, labels, and metadata
x_all, y_all, meta_all = [], [], []

# Processed each seizure EEG recording
for _, row in df.iterrows():
    file_id = row["filename"]
    mat_path = os.path.join(MAT_DIR, file_id + ".mat")
    if not os.path.exists(mat_path):
        continue

    try:
        eeg_data = sio.loadmat(mat_path, simplify_cells=True)
        data = eeg_data["data"]
        fs = int(eeg_data["fs"])
        ch_names = [ch.strip() for ch in eeg_data["channels"]]

        # Applied bipolar montage
        _, montage_data, is_missing = get_6montages(ch_names, data)
        if is_missing:
            continue

        # Applied notch and bandpass filters
        for i in range(montage_data.shape[0]):
            montage_data[i] = mne.filter.notch_filter(montage_data[i], Fs=fs, freqs=[50.0, 100.0], verbose=False)
            montage_data[i] = mne.filter.filter_data(montage_data[i], sfreq=fs, l_freq=0.5, h_freq=70.0, verbose=False)

        # Resampled if needed
        onset_sec, offset_sec = row["onset"], row["offset"]
        if fs != TARGET_FS:
            montage_data = resample_poly(montage_data, up=TARGET_FS, down=fs, axis=1)
            onset_sec *= (TARGET_FS / fs)
            offset_sec *= (TARGET_FS / fs)
            effective_fs = TARGET_FS
        else:
            effective_fs = fs

        # Padded or trim to MAX_SAMPLES
        total_samples = montage_data.shape[1]
        if total_samples < MAX_SAMPLES:
            pad_width = MAX_SAMPLES - total_samples
            montage_data = np.pad(montage_data, ((0, 0), (0, pad_width)), mode='constant')
        else:
            montage_data = montage_data[:, :MAX_SAMPLES]

        # Stored data, label, and metadata
        x_all.append(montage_data)
        y_all.append(1)  # Seizure label
        meta_all.append((file_id, onset_sec, offset_sec))

    except Exception as e:
        print(f"Skippedping {file_id} due to error: {e}")
        continue

# Saved all processed seizure data
x_all = np.stack(x_all)
y_all = np.array(y_all)
meta_all = np.array(meta_all, dtype=object)

np.save(SAVE_X, x_all)
np.save(SAVE_Y, y_all)
np.save(SAVE_META, meta_all)

print(" Seizure data saved. Shapes:", x_all.shape, y_all.shape, meta_all.shape)


## Processed non-seizure EEG files (limit to 1711 samples)

In [None]:
import os
import numpy as np
import scipy.io as sio
from scipy.signal import resample_poly
import pandas as pd
from wettbewerb import get_6montages
import mne

# Configuration
REFERENCE_CSV = "reference_with_fs.csv"
MAT_DIR = "/shared_data/training"
SAVE_X = "x_1711_nonseizures.npy"
SAVE_Y = "y_1711_nonseizures.npy"
SAVE_META = "meta_1711_nonseizures.npy"

TARGET_FS = 250
DURATION_SEC = 300
MAX_SAMPLES = TARGET_FS * DURATION_SEC
TARGET_COUNT = 1711  # Limited to 1711 non-seizure samples

# Loaded non-seizure EEG entries with valid fs
df = pd.read_csv(REFERENCE_CSV)
df = df[(df["seizure"] == 0) & (df["fs"].isin([250, 256]))]

x_all, y_all, meta_all = [], [], []

for _, row in df.iterrows():
    if len(x_all) >= TARGET_COUNT:
        break

    file_id = row["filename"]
    mat_path = os.path.join(MAT_DIR, file_id + ".mat")
    if not os.path.exists(mat_path):
        continue

    try:
        eeg_data = sio.loadmat(mat_path, simplify_cells=True)
        data = eeg_data["data"]
        fs = int(eeg_data["fs"])
        ch_names = [ch.strip() for ch in eeg_data["channels"]]

        _, montage_data, is_missing = get_6montages(ch_names, data)
        if is_missing:
            continue

        for i in range(montage_data.shape[0]):
            montage_data[i] = mne.filter.notch_filter(montage_data[i], Fs=fs, freqs=[50.0, 100.0], verbose=False)
            montage_data[i] = mne.filter.filter_data(montage_data[i], sfreq=fs, l_freq=0.5, h_freq=70.0, verbose=False)

        if fs != TARGET_FS:
            montage_data = resample_poly(montage_data, up=TARGET_FS, down=fs, axis=1)

        if montage_data.shape[1] < MAX_SAMPLES:
            pad_width = MAX_SAMPLES - montage_data.shape[1]
            montage_data = np.pad(montage_data, ((0, 0), (0, pad_width)), mode='constant')
        else:
            montage_data = montage_data[:, :MAX_SAMPLES]

        x_all.append(montage_data)
        y_all.append(0)  # Non-seizure label
        meta_all.append((file_id, 0.0, 0.0))

    except Exception as e:
        print(f"Skippedping {file_id} due to error: {e}")
        continue

x_all = np.stack(x_all)
y_all = np.array(y_all)
meta_all = np.array(meta_all, dtype=object)

np.save(SAVE_X, x_all)
np.save(SAVE_Y, y_all)
np.save(SAVE_META, meta_all)

print(" Non-seizure data saved. Shapes:", x_all.shape, y_all.shape, meta_all.shape)
