# Installation of Required Libraries

##### I will need some libraries for EDF reading, signal processing and machine learning.

In [3]:
 pip install mne

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install pyedflib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install numpy pandas scikit-learn matplotlib

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


### 1. Import Libraries

In [285]:
import os
import mne
import numpy as np
from scipy.signal import welch
from scipy.stats import skew, kurtosis
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

### 2. Loading of EDF and Hypnogram files

In [52]:
path = r"C:\Users\Admin\PycharmProjects\SVM + RF"
data_dir = os.path.join(path, "Data001")
files = os.listdir(data_dir)

print("Files in folder:", files)

# Assign the first and second items
edf_file = files[0]
annot_file = files[1]

print("First file:", edf_file)
print("Second file:", annot_file)
#file_path = os.path.join(path, "Data001", "SC4001E0-PSG.edf")

Files in folder: ['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf']
First file: SC4001E0-PSG.edf
Second file: SC4001EC-Hypnogram.edf


In [79]:
path_edf = os.path.join(data_dir, edf_file)
path_annot = os.path.join(data_dir, annot_file)
data = mne.io.read_raw_edf(path_edf, preload=True)
annots = mne.read_annotations(path_annot)
data.set_annotations(annots)

Extracting EDF parameters from C:\Users\Admin\PycharmProjects\SVM + RF\Data001\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...


  data = mne.io.read_raw_edf(path_edf, preload=True)
  data = mne.io.read_raw_edf(path_edf, preload=True)
  data = mne.io.read_raw_edf(path_edf, preload=True)
  data.set_annotations(annots)


Unnamed: 0,General,General.1
,Filename(s),SC4001E0-PSG.edf
,MNE object type,RawEDF
,Measurement date,1989-04-24 at 16:13:00 UTC
,Participant,X
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,22:05:00 (HH:MM:SS)
,Sampling frequency,100.00 Hz
,Time points,7950000
,Channels,Channels


### 3. Resampling

In [82]:
raw.resample(50)

Sampling frequency of the instance is already 50.0, returning unmodified.


Unnamed: 0,General,General.1
,Filename(s),SC4001E0-PSG.edf
,MNE object type,RawEDF
,Measurement date,1989-04-24 at 16:13:00 UTC
,Participant,X
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,22:05:00 (HH:MM:SS)
,Sampling frequency,50.00 Hz
,Time points,3975000
,Channels,Channels


### 4. Extraction of Epochs

In [110]:
mapping = {
    "Sleep stage W": 0,   # Wake
    "Sleep stage 1": 1,
    "Sleep stage 2": 2,
    "Sleep stage 3": 3,
    "Sleep stage 4": 3,  # merge stage3/stage4
    "Sleep stage R": 4
}

events, event_id = mne.events_from_annotations(raw, event_id=mapping)

epochs = mne.Epochs(
    raw,
    events,
    event_id=None,
    tmin=0,
    tmax=30,
    baseline=None,
    detrend=1,
    preload=True
)

labels = epochs.events[:, -1]  # numeric stage labels
print(labels)

#unique, counts = np.unique(labels, return_counts=True)
#stage_counts = dict(zip(unique, counts))
#print("Counts per stage (integer labels):", stage_counts)

#id_to_stage = {0: "W", 1: "N1", 2: "N2", 3: "N3", 4: "REM"}
#stage_counts_named = {id_to_stage[k]: v for k, v in stage_counts.items()}
#print("Counts per stage (names):", stage_counts_named)

#print("Total (items): ", len(labels))

Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']
Not setting metadata
153 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 153 events and 1501 original time points ...
0 bad epochs dropped
[0 1 2 3 2 3 3 3 3 3 3 3 3 0 3 2 3 3 3 3 2 3 2 3 1 2 3 2 3 2 3 3 3 2 3 3 3
 3 1 3 2 3 2 3 4 1 2 3 1 2 1 2 3 2 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3
 1 2 1 2 1 2 1 2 4 2 1 0 1 2 1 2 1 2 3 2 3 2 3 2 3 3 3 2 3 2 3 3 3 3 3 3 2
 3 3 2 4 1 4 0 1 0 1 0 1 2 0 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 1 4 0 1 4 0 1
 0 1 0 1 0]


### 5. Extracts the numeric stage codes for each epoch.

In [126]:
# Converts numeric stage codes into binary labels (0/1).
# 0 = Awake (W)
# 1 = Asleep (N1, N2, N3, REM)

stage_map = {
    0: 0,  # Wake -> 0
    1: 1,  # N1 -> Sleep
    2: 1,  # N2 -> Sleep
    3: 1,  # N3 -> Sleep
    4: 1   # REM -> Sleep
}

# y = target labels for classification.
y = np.array([stage_map[item] for item in labels])

print(np.unique(y, return_counts=True))


(array([0, 1]), array([ 12, 141], dtype=int64))


### 6. Feature Extraction (with one channel (e.g. EEG Fpz-Cz))

In [266]:
def extract_features(epoch_data, sfreq):
    # Extracts spectral band powers from one epoch
    f, psd = welch(epoch_data, sfreq, nperseg=sfreq*2)
    bands = {
        "delta": (0.5, 4),
        "theta": (4, 8),
        "alpha": (8, 12),
        "beta": (12, 30)
    }
    feats = []
    for (low, high) in bands.values():
        idx = np.logical_and(f >= low, f <= high)
        feats.append(np.mean(psd[idx]))
        
    # Add time-domain features
    feats.append(np.mean(epoch_data))
    feats.append(np.std(epoch_data))
    return np.array(feats)

X = []
for ep in epochs.get_data():    # shape (n_epochs, n_channels, n_times)
    ep_data = ep[0]             # take only first EEG channel
    feats = extract_features(ep_data, epochs.info['sfreq'])
    X.append(feats)

X = np.vstack(X)
#print(epochs.info['ch_names'][0])
#print(X)

### 7. Splitting of Train and Test (with one channel)

In [249]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

### 8. Support Vector Machines (with one channel)

In [252]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', C=1, gamma='scale')
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)


### 9. Random Forest (with one channel)

In [255]:
rf = RandomForestClassifier(n_estimators=200, max_depth=None, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)


### 10. Evaluation (with one EEG channel and few features)

In [258]:
print("=== SVM Results ===")
print(classification_report(y_test, y_pred_svm, zero_division=0))
print("Accuracy:", accuracy_score(y_test, y_pred_svm))

print("\n=== Random Forest Results ===")
print(classification_report(y_test, y_pred_rf, zero_division=0))
print("Accuracy:", accuracy_score(y_test, y_pred_rf))


=== SVM Results ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.94      1.00      0.97        29

    accuracy                           0.94        31
   macro avg       0.47      0.50      0.48        31
weighted avg       0.88      0.94      0.90        31

Accuracy: 0.9354838709677419

=== Random Forest Results ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.93      0.97      0.95        29

    accuracy                           0.90        31
   macro avg       0.47      0.48      0.47        31
weighted avg       0.87      0.90      0.89        31

Accuracy: 0.9032258064516129


### 11. Feature Extraction (with Multi-channel)

In [287]:
def hjorth_parameters(x):
    activity = np.var(x)
    diff1 = np.diff(x)
    mobility = np.sqrt(np.var(diff1)/activity)
    diff2 = np.diff(diff1)
    complexity = np.sqrt(np.var(diff2)/np.var(diff1)) / mobility
    return activity, mobility, complexity

def spectral_entropy(psd):
    psd_norm = psd / np.sum(psd)
    return -np.sum(psd_norm * np.log2(psd_norm + 1e-12))

def multi_extract_features(epoch_data, sfreq):
    feats = []
    for ch in range(epoch_data.shape[0]):  # Loop over channels
        data = epoch_data[ch]
        
        # Frequency features
        f, psd = welch(data, sfreq, nperseg=sfreq*2)
        bands = {"delta":(0.5,4), "theta":(4,8), "alpha":(8,12), "beta":(12,30)}
        band_powers = []
        for (low, high) in bands.values():
            idx = np.logical_and(f >= low, f <= high)
            power = np.mean(psd[idx])
            feats.append(power)
            band_powers.append(power)
        
        # Band ratios
        feats.append(band_powers[1]/band_powers[2])  # theta/alpha
        feats.append(band_powers[0]/band_powers[1])  # delta/theta
        
        # Spectral entropy
        feats.append(spectral_entropy(psd))
        
        # Time-domain features
        feats.append(np.mean(data))
        feats.append(np.std(data))
        feats.append(skew(data))
        feats.append(kurtosis(data))
        
        # Hjorth parameters
        activity, mobility, complexity = hjorth_parameters(data)
        feats.extend([activity, mobility, complexity])
        
    return np.array(feats)

### 12. Compute features for all epochs (with multi-channel)

In [290]:
X = []
for ep in epochs.get_data():  # ep shape = (n_channels, n_times)
    feats = multi_extract_features(ep, epochs.info['sfreq'])
    X.append(feats)
X = np.vstack(X)

### 13. Splitting of Train and Test (with multi-channel)

In [325]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

### 14. Support Vector Machines (with multi-channel)

In [328]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', C=1, gamma='scale')
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)

### 15. Random Forest (with multi-channel)

In [331]:
rf = RandomForestClassifier(n_estimators=200, max_depth=None, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

### 16. Evaluation (with multi-chanenel)

In [334]:
print("=== SVM Results ===")
print(classification_report(y_test, y_pred_svm, zero_division=0))
print("Accuracy:", accuracy_score(y_test, y_pred_svm))

print("\n=== Random Forest Results ===")
print(classification_report(y_test, y_pred_rf, zero_division=0))
print("Accuracy:", accuracy_score(y_test, y_pred_rf))

=== SVM Results ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.94      1.00      0.97        29

    accuracy                           0.94        31
   macro avg       0.47      0.50      0.48        31
weighted avg       0.88      0.94      0.90        31

Accuracy: 0.9354838709677419

=== Random Forest Results ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00        29

    accuracy                           1.00        31
   macro avg       1.00      1.00      1.00        31
weighted avg       1.00      1.00      1.00        31

Accuracy: 1.0
