In [None]:
from google.colab import drive
drive.flush_and_unmount()

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
with open("/content/drive/MyDrive/Colab Notebooks/Purdue Coursework/ECE570_Project/chb03_04.edf.seizures", "r", encoding="utf-8", errors="ignore") as f:
    print(f.read())


 X## time resolution: 256      r     4   


In [4]:
!pip install mne

Collecting mne
  Downloading mne-1.9.0-py3-none-any.whl.metadata (20 kB)
Downloading mne-1.9.0-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m82.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mne
Successfully installed mne-1.9.0


In [5]:
import os
import pickle
import numpy as np
import mne
from pathlib import Path

In [6]:
# Set up EDF and seizure directory paths
edf_dir = "/content/drive/MyDrive/Colab Notebooks/Purdue Coursework/ECE570_Project"
seizure_dir = Path(edf_dir)

# List of EDF files to include in dataset
edf_files_train_val = [os.path.join(edf_dir, fname) for fname in [
    "chb01_03.edf", "chb01_04.edf", "chb01_15.edf", "chb01_16.edf", "chb01_18.edf",
    "chb03_01.edf", "chb03_02.edf", "chb03_03.edf", "chb03_04.edf"
]]

edf_files_test = [os.path.join(edf_dir, fname) for fname in [
    "chb05_06.edf", "chb05_13.edf", "chb05_16.edf", "chb05_17.edf"
]]

In [7]:
seizure_intervals = {
 'chb01_03.edf': [(2996, 3036)],
 'chb01_04.edf': [(1467, 1494)],
 'chb01_15.edf': [(1732, 1772)],
 'chb01_16.edf': [(1015, 1066)],
 'chb01_18.edf': [(1720, 1810)],
 'chb01_21.edf': [(327, 420)],
 'chb01_26.edf': [(1862, 1963)],
 'chb03_01.edf': [(362, 414)],
 'chb03_02.edf': [(731, 796)],
 'chb03_03.edf': [(432, 501)],
 'chb03_04.edf': [(2162, 2214)],
 'chb03_34.edf': [(1982, 2029)],
 'chb03_35.edf': [(2592, 2656)],
 'chb03_36.edf': [(1725, 1778)],
 'chb05_06.edf': [(417, 532)],
 'chb05_13.edf': [(1086, 1196)],
 'chb05_16.edf': [(2317, 2413)],
 'chb05_17.edf': [(2451, 2571)],
 'chb05_22.edf': [(2348, 2465)]
}


In [8]:
print(seizure_intervals)

{'chb01_03.edf': [(2996, 3036)], 'chb01_04.edf': [(1467, 1494)], 'chb01_15.edf': [(1732, 1772)], 'chb01_16.edf': [(1015, 1066)], 'chb01_18.edf': [(1720, 1810)], 'chb01_21.edf': [(327, 420)], 'chb01_26.edf': [(1862, 1963)], 'chb03_01.edf': [(362, 414)], 'chb03_02.edf': [(731, 796)], 'chb03_03.edf': [(432, 501)], 'chb03_04.edf': [(2162, 2214)], 'chb03_34.edf': [(1982, 2029)], 'chb03_35.edf': [(2592, 2656)], 'chb03_36.edf': [(1725, 1778)], 'chb05_06.edf': [(417, 532)], 'chb05_13.edf': [(1086, 1196)], 'chb05_16.edf': [(2317, 2413)], 'chb05_17.edf': [(2451, 2571)], 'chb05_22.edf': [(2348, 2465)]}


In [9]:
sampling_rate = 256
window_sec = 5
window_size = sampling_rate * window_sec
target_seizure = 80
target_nonseizure = 400

In [10]:
def extract_segments(edf_files, seizure_intervals, target_seizure=80, target_nonseizure=400):
    X_final, y_final = [], []
    for edf_path in edf_files:
        edf_name = os.path.basename(edf_path)
        raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
        seizure_ranges = seizure_intervals.get(edf_name, [])
        n_samples = int(raw.n_times)
        n_channels = len(raw.info['ch_names'])

        for start_idx in range(0, n_samples - window_size + 1, sampling_rate):
            end_idx = start_idx + window_size
            center_sec = (start_idx + end_idx) / 2 / sampling_rate
            is_seizure = any(sz_start <= center_sec <= sz_end for sz_start, sz_end in seizure_ranges)

            if not is_seizure and (start_idx % (5 * sampling_rate) != 0):
                continue
            if is_seizure and y_final.count(1) >= target_seizure:
                continue
            if not is_seizure and y_final.count(0) >= target_nonseizure:
                continue

            try:
                segment = raw.get_data(start=start_idx, stop=end_idx)
                if segment.shape != (n_channels, window_size):
                    continue
                X_final.append(segment.astype(np.float32))
                y_final.append(1 if is_seizure else 0)
            except Exception:
                continue
    return np.stack(X_final), np.array(y_final)


In [13]:
# Train/Val
X_trainval, y_trainval = extract_segments(edf_files_train_val, seizure_intervals)
with open(os.path.join(edf_dir, "trainval.pkl"), "wb") as f:
    pickle.dump({"data": X_trainval, "labels": y_trainval}, f)

# Test
X_test, y_test = extract_segments(edf_files_test, seizure_intervals)
with open(os.path.join(edf_dir, "test.pkl"), "wb") as f:
    pickle.dump({"data": X_test, "labels": y_test}, f)


  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)


In [11]:
X_np = np.stack(X_final)
y_np = np.array(y_final)
idx = np.random.permutation(len(X_np))
X_np = X_np[idx]
y_np = y_np[idx]

NameError: name 'X_final' is not defined

In [49]:
output_path = os.path.join(edf_dir, "mini_chbmit_balanced.pkl")
with open(output_path, "wb") as f:
    pickle.dump({"data": X_np, "labels": y_np}, f)

In [50]:
print("Saved to:", output_path)
print("Label counts:", np.bincount(y_np))
print("Shape:", X_np.shape)

Saved to: /content/drive/MyDrive/Colab Notebooks/Purdue Coursework/ECE570_Project/mini_chbmit_balanced.pkl
Label counts: [400  80]
Shape: (480, 23, 1280)


In [51]:
import pickle

# Load the dataset
with open("/content/drive/MyDrive/Colab Notebooks/Purdue Coursework/ECE570_Project/mini_chbmit_balanced.pkl", "rb") as f:
    data = pickle.load(f)

X = data["data"]       # shape: (N, C, T)
y = data["labels"]     # shape: (N,)

# Print results
print("Data shape:", X.shape)
print("Label distribution:", np.bincount(y))


Data shape: (480, 23, 1280)
Label distribution: [400  80]


In [52]:
np.unique(y, return_counts=True)

(array([0, 1]), array([400,  80]))

In [46]:
X_final, y_final = [], []

# Process each EDF file
for edf_path in edf_files:
    edf_name = os.path.basename(edf_path)
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
    seizure_ranges = seizure_intervals.get(edf_name, [])
    n_samples = int(raw.n_times)
    n_channels = len(raw.info['ch_names'])

    # Loop with 1-sec stride for seizure, 5-sec stride for non-seizure
    for start_idx in range(0, n_samples - window_size + 1, sampling_rate):
        end_idx = start_idx + window_size
        center_sec = (start_idx + end_idx) / 2 / sampling_rate

        is_seizure = any(sz_start <= center_sec <= sz_end for sz_start, sz_end in seizure_ranges)

        # Only keep non-seizure windows every 5 seconds
        if not is_seizure and (start_idx % (5 * sampling_rate) != 0):
            continue
        if is_seizure and y_final.count(1) >= target_seizure:
            continue
        if not is_seizure and y_final.count(0) >= target_nonseizure:
            continue

        try:
            segment = raw.get_data(start=start_idx, stop=end_idx)
            if segment.shape != (n_channels, window_size):
                continue
            X_final.append(segment.astype(np.float32))
            y_final.append(1 if is_seizure else 0)
        except Exception:
            continue

  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
  raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
