In [1]:
import mne

In [38]:
!pip install numpy scikit-learn mne pyEDFlib



###  Choose Data for Analysis and Data Preprocessing

In [26]:
import numpy as np

In [29]:
import mne

# Replace these file paths with the paths to your downloaded files
psg_file = 'sleep-cassette/SC4001E0-PSG.edf'
hyp_file = 'sleep-cassette/SC4001EC-Hypnogram.edf'

# Read PSG file
raw = mne.io.read_raw_edf(psg_file, preload=True)
# Read and apply annotations from the hypnogram file
annotations = mne.read_annotations(hyp_file)
raw.set_annotations(annotations)


Extracting EDF parameters from C:\Users\bu23957\Downloads\sleep-cassette\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...


0,1
Measurement date,"April 24, 1989 16:13:00 GMT"
Experimenter,Unknown
Participant,X

0,1
Digitized points,Not available
Good channels,7 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,100.00 Hz
Highpass,0.50 Hz
Lowpass,100.00 Hz
Filenames,SC4001E0-PSG.edf
Duration,22:04:60 (HH:MM:SS)


### preprocessing

In [31]:
# Function for data preprocessing (filtering, resampling, and epoching)
def preprocess_data(raw, epoch_length=30):
    # Filter data between 0.5 and 40 Hz
    raw.filter(0.5, 40, fir_design='firwin')
    
    # Resample the data to 100 Hz to reduce data size and computation time
    raw.resample(100)
    
    # Create events from annotations in the raw data
    # Mapping sleep stage annotations to integers for epoching
    annotation_desc_2_event_id = {
        'Sleep stage W': 1,
        'Sleep stage 1': 2,
        'Sleep stage 2': 3,
        'Sleep stage 3': 4,
        'Sleep stage 4': 4,  # Merge stage 3 and stage 4 for AASM classification
        'Sleep stage R': 5
    }
    events, event_id = mne.events_from_annotations(raw, event_id=annotation_desc_2_event_id)
    
    # Pick the EEG channels for epoching
    picks = mne.pick_types(raw.info, meg=False, eeg=True, eog=False, stim=False)
    
    # Epoch the data around event markers
    epochs = mne.Epochs(raw, events, event_id=event_id, picks=picks, tmin=0, tmax=epoch_length - 1 / raw.info['sfreq'], 
                        baseline=None, preload=True)
    
    return epochs

# File paths - replace these with your actual file paths
psg_file = 'sleep-cassette/SC4001E0-PSG.edf'
hyp_file = 'sleep-cassette/SC4001EC-Hypnogram.edf'

# Read the PSG and hypnogram files
raw = read_data(psg_file, hyp_file)

# Preprocess the data (filter, resample, epoch)
epochs = preprocess_data(raw)

# Print summary information about the epochs
print(epochs)


Extracting EDF parameters from C:\Users\bu23957\Downloads\sleep-cassette\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 661 samples (6.610 s)

Sampling frequency of the instance is already 100.0, returning unmodified.
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stag

In [32]:
# Example for directly using the events
labels = epochs.events[:, -1]  # Assuming the last column contains the event/stage ID

In [33]:
X = epochs.get_data()  # Extract epoch data
X = (X - X.mean()) / X.std()  # Normalize
y = to_categorical(labels)  # Assuming labels are already in a numeric format

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

model = Sequential([
    Conv1D(filters=16, kernel_size=3, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(y.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [41]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")

1/1 - 0s - loss: 3.7454 - accuracy: 0.2258 - 48ms/epoch - 48ms/step
Test accuracy: 0.2258


In [42]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

print(confusion_matrix(y_test_classes, y_pred_classes))
print(classification_report(y_test_classes, y_pred_classes))

[[0 0 0 0 0]
 [0 0 0 4 0]
 [0 2 1 6 1]
 [1 0 9 6 0]
 [0 0 0 1 0]]
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         4
           3       0.10      0.10      0.10        10
           4       0.35      0.38      0.36        16
           5       0.00      0.00      0.00         1

    accuracy                           0.23        31
   macro avg       0.09      0.10      0.09        31
weighted avg       0.21      0.23      0.22        31

