In [None]:
!pip install mne
!pip install scikit-learn
!pip install mne numpy scipy sklearn matplotlib pyedflib
!pip install -U mne

In [3]:
import mne
import numpy as np
from scipy.signal import welch
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

# Path to your EEG data and hypnogram file
eeg_file_path = 'SC4022E0-PSG.edf'
hypnogram_file_path = 'SC4022EJ-Hypnogram.edf'

# Load EEG data
raw = mne.io.read_raw_edf(eeg_file_path, preload=True)
annotations = mne.read_annotations(hypnogram_file_path)
raw.set_annotations(annotations)

# Apply a band-pass filter
raw.filter(0.5, 30, fir_design='firwin')

# Mapping of sleep stages to integers, excluding 'Sleep stage ?'
event_id = {'Sleep stage W': 1, 'Sleep stage 1': 2, 'Sleep stage 2': 3,
            'Sleep stage 3': 4, 'Sleep stage 4': 4, 'Sleep stage R': 5}

# Create events from annotations
events, _ = mne.events_from_annotations(raw, event_id=event_id, chunk_duration=30.)

# Create epochs, ignoring annotations that are not found in event_id
epochs = mne.Epochs(raw, events, event_id=event_id, tmin=0., tmax=30., baseline=None, preload=True, on_missing='ignore')

# Convert epochs to data array
data = epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)

# Feature extraction: Power Spectral Density (PSD)
n_fft = int(raw.info['sfreq'])
psds = []
for epoch in data:
    psd = welch(epoch, fs=raw.info['sfreq'], nperseg=n_fft)
    psds.append(psd[1].mean(axis=1))  # Averaging across frequencies
psds = np.array(psds)

# Preparing labels for the classifier
labels = epochs.events[:, -1]

# Data scaling
scaler = StandardScaler()
psds_scaled = scaler.fit_transform(psds)

# Splitting dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(psds_scaled, labels, test_size=0.25, random_state=42)

# Training a RandomForest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))


Extracting EDF parameters from C:\Users\bu23957\Downloads\SC4022E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 8267999  =      0.000 ... 82679.990 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 661 samples (6.610 s)



  raw.set_annotations(annotations)


Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
2755 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 2755 events and 3001 original time points ...
1 bad epochs dropped


  data = epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)


              precision    recall  f1-score   support

           1       1.00      0.99      0.99       476
           2       0.72      0.56      0.63        50
           3       0.84      0.96      0.90        94
           4       0.90      0.90      0.90        21
           5       0.80      0.85      0.83        48

    accuracy                           0.94       689
   macro avg       0.85      0.85      0.85       689
weighted avg       0.94      0.94      0.94       689

