# Sleep-EDF Dataset Exploration Notebook (Updated)

This notebook shows where the data lives and maps numeric labels to sleep stages.

In [1]:
import os
import sys
import mne

# Point to your loader module
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from time_series_datasets.sleep_edf.sleepedf_loader import SleepEDFDataset, get_sleepedf_data

# Initialize and show data path
data_path = get_sleepedf_data()
print(f"Sleep-EDF data directory: {data_path}")
dataset = SleepEDFDataset(data_dir=data_path)

Sleep-EDF data directory: /Users/planger/Development/EmbedHealth/time_series_datasets/raw_data/sleep-edf-database-1.0.0/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette


## Mapping Numeric Labels to Sleep Stages

In [2]:
# Load raw and annotations for the first recording
psg_file = dataset.data_files[0]
print(f"First PSG file: {psg_file}")
raw = mne.io.read_raw_edf(os.path.join(data_path, psg_file), preload=True)

# Find the matching hypnogram file
prefix = psg_file.split('-')[0][:6]
hyp_files = [f for f in os.listdir(data_path)
             if f.startswith(prefix) and f.endswith('Hypnogram.edf')]
hyp_file = hyp_files[0]
print(f"Matched Hypnogram file: {hyp_file}")
ann = mne.read_annotations(os.path.join(data_path, hyp_file))
raw.set_annotations(ann)

# Extract events and mapping
events, event_id = mne.events_from_annotations(raw)
code_to_stage = {code: desc for desc, code in event_id.items()}

print("\nAnnotation code → Sleep stage mapping:")
for code, desc in code_to_stage.items():
    print(f"  {code}: {desc}")

# Display first 5 epochs and their stages
data, labels = dataset[0]
print("\nFirst 5 epochs and their stages:")
for i in range(5):
    print(f"  Epoch {i}: code={labels[i]} → {code_to_stage[labels[i]]}")

First PSG file: SC4002E0-PSG.edf
Extracting EDF parameters from /Users/planger/Development/EmbedHealth/time_series_datasets/raw_data/sleep-edf-database-1.0.0/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4002E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 8489999  =      0.000 ... 84899.990 secs...


  raw = mne.io.read_raw_edf(os.path.join(data_path, psg_file), preload=True)
  raw = mne.io.read_raw_edf(os.path.join(data_path, psg_file), preload=True)
  raw = mne.io.read_raw_edf(os.path.join(data_path, psg_file), preload=True)


Matched Hypnogram file: SC4002EC-Hypnogram.edf
Used Annotations descriptions: [np.str_('Movement time'), np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]

Annotation code → Sleep stage mapping:
  1: Movement time
  2: Sleep stage 1
  3: Sleep stage 2
  4: Sleep stage 3
  5: Sleep stage 4
  6: Sleep stage ?
  7: Sleep stage R
  8: Sleep stage W
Extracting EDF parameters from /Users/planger/Development/EmbedHealth/time_series_datasets/raw_data/sleep-edf-database-1.0.0/physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/SC4002E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 8489999  =      0.000 ... 84899.990 secs...


  raw.set_annotations(ann)
  raw = mne.io.read_raw_edf(psg_path, preload=True)
  raw = mne.io.read_raw_edf(psg_path, preload=True)
  raw = mne.io.read_raw_edf(psg_path, preload=True)


Used Annotations descriptions: [np.str_('Movement time'), np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]
Not setting metadata
151 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 151 events and 3000 original time points ...
0 bad epochs dropped

First 5 epochs and their stages:
  Epoch 0: code=8 → Sleep stage W
  Epoch 1: code=2 → Sleep stage 1
  Epoch 2: code=3 → Sleep stage 2
  Epoch 3: code=4 → Sleep stage 3
  Epoch 4: code=5 → Sleep stage 4


  raw.set_annotations(ann)
