#### Thoughts 



In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mne
import json 
import os

In [20]:

# --- Define file paths for one subject ---
# Replace with the actual paths to your files
eeg_file = '/Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data/ds005207/sub-001/ses-001/eeg/sub-001_ses-001_task-sleep_acq-PSG_eeg.set'
scoring_fname = '/Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data/ds005207/sub-001/ses-001/eeg/sub-001_ses-001_task-sleep_acq-PSGScoring_events.tsv'

#This is just for putting the correct labels on the data 
mapping_fname = '/Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data/ds005207/task-sleep_acq-cEEGridScoring_events.json'



# --- Load the raw EEG data ---
# preload=True loads the data into memory, which is necessary for filtering
raw = mne.io.read_raw_eeglab(eeg_file, preload=True)
annots_df = pd.read_csv(scoring_fname, sep='\t')

# --- Load the sleep stage mapping from the JSON file ---
with open(mapping_fname, 'r') as f:
    eeg_json = json.load(f)

# The mapping is nested, so we extract it
sleep_stage_mapping = eeg_json['staging']['Levels']
print("Sleep Stage Mapping:")
print(sleep_stage_mapping)


Reading /Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data/ds005207/sub-001/ses-001/eeg/sub-001_ses-001_task-sleep_acq-psg_eeg.fdt
Reading 0 ... 11026431  =      0.000 ... 43071.996 secs...
Sleep Stage Mapping:
{'1': 'Wake', '2': 'REM', '3': 'N1', '4': 'N2', '5': 'N3', '6': 'A', '7': 'Movement', '8': 'Artefact', '9': 'Unscored'}


In [21]:
# --- Read the scoring data with Pandas ---
scoring_df = pd.read_csv(scoring_fname, sep='\t')
print("\nScoring file preview:")
print(scoring_df.head())

# --- Create MNE Annotations ---
# MNE annotations need three things: onset (start time in seconds),
# duration (length in seconds), and description (the label).

onsets = scoring_df['onset'].to_numpy()
# All sleep stages are 30 seconds long
durations = np.full(len(scoring_df), 30) 
# Map the numeric staging value to its string name (e.g., 1 -> "Wake")
descriptions = scoring_df['staging'].map(str).map(sleep_stage_mapping).to_numpy()

# Create the Annotations object
annotations = mne.Annotations(onset=onsets, duration=durations, description=descriptions)

# Apply the annotations to our raw data
raw.set_annotations(annotations)

# You can visualize the raw data with annotations to check
# raw.plot(start=0, duration=600, n_channels=5, scalings='auto')


Scoring file preview:
   onset  duration  staging
0      0         0        6
1     30         0        1
2     60         0        1
3     90         0        1
4    120         0        1


  raw.set_annotations(annotations)
  raw.set_annotations(annotations)


Unnamed: 0,General,General.1
,Filename(s),sub-001_ses-001_task-sleep_acq-psg_eeg.fdt
,MNE object type,RawEEGLAB
,Measurement date,Unknown
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,11:57:52 (HH:MM:SS)
,Sampling frequency,256.00 Hz
,Time points,11026432
,Channels,Channels


### Create Epochs 


In [34]:
# --- Create events from annotations ---
# This function converts the string descriptions into integer event IDs
# We also get the event_id dictionary, which is the reverse of our mapping
events, event_id = mne.events_from_annotations(raw, event_id=None)

print("\nEvent ID dictionary created by MNE:")
print(event_id)

# --- Create the epochs ---
# tmin=0 and tmax=30 ensures each epoch is exactly 30 seconds long,
# starting from the onset of the annotation.
# baseline=None is important for sleep data as there's no clear "pre-stimulus" period.
epochs = mne.Epochs(
    raw=raw,
    events=events,
    event_id=event_id,
    tmin=0,
    tmax=30,
    preload=True,  # Load epochs into memory for cleaning
    baseline=None
)

print("\nCreated epochs object:")
print(epochs)

Used Annotations descriptions: [np.str_('A'), np.str_('N1'), np.str_('N2'), np.str_('N3'), np.str_('REM'), np.str_('Wake')]

Event ID dictionary created by MNE:
{np.str_('A'): 1, np.str_('N1'): 2, np.str_('N2'): 3, np.str_('N3'): 4, np.str_('REM'): 5, np.str_('Wake'): 6}
Not setting metadata
1436 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 1436 events and 7681 original time points ...
1 bad epochs dropped

Created epochs object:
<Epochs | 1435 events (all good), 0 – 30 s (baseline off), ~1.23 GiB, data loaded,
 np.str_('A'): 1
 np.str_('N1'): 110
 np.str_('N2'): 402
 np.str_('N3'): 142
 np.str_('REM'): 99
 np.str_('Wake'): 681>


In [35]:
# --- 1. Apply a band-pass filter ---
# Common frequencies for sleep analysis are 0.3 Hz to 35 Hz.
epochs.filter(l_freq=0.3, h_freq=35)
print("\nApplied band-pass filter (0.3-35 Hz).")

# --- 2. Apply artifact rejection ---
# This will drop any epoch where the peak-to-peak amplitude in an EEG
# channel exceeds 150 microvolts (150e-6 V).
# You may need to adjust this threshold based on your data.
# reject_criteria = dict(eeg=150e-6) # 150 µV
# epochs.drop_bad(reject=reject_criteria)

print("\nApplied artifact rejection and dropped bad epochs.")
print("Epochs remaining:", len(epochs))

Setting up band-pass filter from 0.3 - 35 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.30
- Lower transition bandwidth: 0.30 Hz (-6 dB cutoff frequency: 0.15 Hz)
- Upper passband edge: 35.00 Hz
- Upper transition bandwidth: 8.75 Hz (-6 dB cutoff frequency: 39.38 Hz)
- Filter length: 2817 samples (11.004 s)


Applied band-pass filter (0.3-35 Hz).

Applied artifact rejection and dropped bad epochs.
Epochs remaining: 1435


In [42]:
# --- Define the output file path ---
output_path = '/Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data'
# if not os.path.exists(output_path):
#     os.makedirs(output_path)

cleaned_epochs_fname = os.path.join(output_path, 'sub-001_cleaned-epo.fif')

# --- Save the epochs object ---
epochs.save(cleaned_epochs_fname, overwrite=True)

print(f"\nSuccessfully saved cleaned epochs to:\n{cleaned_epochs_fname}")

# You can easily load this file back later with:
# loaded_epochs = mne.read_epochs(cleaned_epochs_fname)

Overwriting existing file.
Overwriting existing file.
Overwriting existing file.

Successfully saved cleaned epochs to:
/Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data/sub-001_cleaned-epo.fif


### Code at end to check/visualise data loaded for the patient

In [43]:
# --- Load the cleaned epochs object ---
# set preload=True to load the data into memory
epochs = mne.read_epochs(cleaned_epochs_fname, preload=True)

# --- Convert the epochs object to a pandas DataFrame ---
# This creates a "long-format" table, which is great for inspection and plotting.
df = epochs.to_data_frame()

# --- Display the first few rows of the table ---
print("Cleaned data as a DataFrame:")
print(df.head())

# --- To see the different sleep stages included ---
print("\nSleep stages in the cleaned data:")
print(df['condition'].unique())

Reading /Users/katieoreilly/Desktop/unsw/SCIF_2001_actual2.nosync/SCIF_2001/EEG_data/sub-001_cleaned-epo.fif ...
    Found the data of interest:
        t =       0.00 ...   30000.00 ms
        0 CTF compensation matrices available
Not setting metadata
1435 matching events found
No baseline correction applied
0 projection items activated
Cleaned data as a DataFrame:
       time condition  epoch     ECG II      EMG1      EMG2      EMG3  \
0  0.000000         A      0 -28.441034  0.183931  0.018823  0.159811   
1  0.003906         A      0 -28.245830  0.184985  0.019832  0.159880   
2  0.007812         A      0 -27.951918  0.186356  0.021131  0.159942   
3  0.011719         A      0 -27.622909  0.188057  0.022595  0.160092   
4  0.015625         A      0 -27.330528  0.190041  0.024170  0.160356   

      C4:A1     O2:A1     F4:A1      C3:A2      O1:A2      F3:A2    EOG1:A2  \
0  1.130711  0.386359  1.292040  16.493435  17.517998  13.481441  29.603779   
1  1.115705  0.379626  1.277978  1