# Sleep stage classification: Random Forest & Hidden Markov Model
____

This model aims to classify sleep stages based on two EEG channel. We will use the features extracted in the `pipeline.ipynb` notebook as the input to a Random Forest. The output of this model will then be used as the input of a HMM. We will implement our HMM the same as in this paper (Malafeev et al., « Automatic Human Sleep Stage Scoring Using Deep Neural Networks »).

In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys

# Ensure parent folder is in PYTHONPATH
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
%matplotlib inline

from itertools import groupby

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import mne
import joblib

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import (StandardScaler)
from sklearn.model_selection import (GridSearchCV, GroupKFold)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score,
                             confusion_matrix,
                             plot_confusion_matrix,
                             classification_report,
                             f1_score,
                             cohen_kappa_score,
                             log_loss,
                             make_scorer)

from scipy.signal import medfilt

from hmmlearn.hmm import MultinomialHMM
from constants import (SLEEP_STAGES_VALUES, N_STAGES, EPOCH_DURATION)
from model_utils import print_hypnogram

## Load the features
___

In [None]:
# position of the subject information and night information in the X matrix
SUBJECT_IDX = 0 
NIGHT_IDX = 1
USE_CONTINUOUS_AGE = False
DOWNSIZE_SET = False

if USE_CONTINUOUS_AGE:
    X_file_name = "../data/x_features-age-continuous.npy"
    y_file_name = "../data/y_observations-age-continuous.npy"
else:
    X_file_name = "../data/x_features.npy"
    y_file_name = "../data/y_observations.npy"

In [None]:
X_init = np.load(X_file_name, allow_pickle=True)
y_init = np.load(y_file_name, allow_pickle=True)


In [None]:
X_init = np.vstack(X_init)
y_init = np.hstack(y_init)
print(X_init.shape)
print(y_init.shape)


In [None]:
print("Number of subjects: ", np.unique(X_init[:,SUBJECT_IDX]).shape[0]) # Some subject indexes are skipped, thus total number is below 83 (as we can see in https://physionet.org/content/sleep-edfx/1.0.0/)
print("Number of nights: ", len(np.unique([f"{int(x[0])}-{int(x[1])}" for x in X_init[:,SUBJECT_IDX:NIGHT_IDX+1]])))


## Downsizing sets
___

We will use the same set for all experiments. It includes the first 20 subjects, and excludes the 13th, because it only has one night.

The last subject will be put in the test set. 

In [None]:
if DOWNSIZE_SET:
    # Filtering to only keep first 20 subjects
    X_20 = X_init[np.isin(X_init[:,SUBJECT_IDX], range(20))]
    y_20 = y_init[np.isin(X_init[:,SUBJECT_IDX], range(20))]

    # Exclude the subject with only one night recording (13th)
    MISSING_NIGHT_SUBJECT = 13

    X = X_20[X_20[:,SUBJECT_IDX] != MISSING_NIGHT_SUBJECT]
    y = y_20[X_20[:,SUBJECT_IDX] != MISSING_NIGHT_SUBJECT]

    print(X.shape)
    print(y.shape)
else:
    X = X_init
    y = y_init

In [None]:
print("Number of subjects: ", np.unique(X[:,SUBJECT_IDX]).shape[0]) # Some subject indexes are skipped, thus total number is below 83 (as we can see in https://physionet.org/content/sleep-edfx/1.0.0/)
print("Subjects available: ", np.unique(X[:,SUBJECT_IDX]))
print("Number of nights: ", len(np.unique([f"{int(x[0])}-{int(x[1])}" for x in X[:,SUBJECT_IDX:NIGHT_IDX+1]])))

## Train, validation and test sets
___

If we downsize the dataset, the test set will only contain the two nights recording of the last subject (no 19) will be the test set. The rest will be the train and validation sets.

If we did not downsize the dataset, we will randomly pick a subject from each age group to be in the test set. Both nights (if there are two) are placed in the test set so that the classifier does not train on any recordings from a subject placed in the test set.


In [None]:
def train_test_split_one_subject(X, subject_test=19):
    test_indexes = np.where(X[:,SUBJECT_IDX] == subject_test)[0]
    train_indexes = list(set(range(X.shape[0])) - set(test_indexes))

    assert X.shape[0] == len(train_indexes)+len(test_indexes), "Total train and test sets must corresponds to all dataset"
    
    X_test = X[test_indexes,:]
    y_test = y[test_indexes]
    X_train = X[train_indexes,:]
    y_train = y[train_indexes]
    
    return X_test, X_train, y_test, y_train

def train_test_split_according_to_age(X, subjects_test=None):
    AGE_CATEGORY_COL_IDX = 3
    SUBJECT_COL_IDX = 0
    AGE_GROUPS = [
        [0,49],  # 39 recordings
        [50,59], # 41 recordings
        [60,84], # 41 recordings
        [85,110] # 32 recordings
    ]
    age_categories = np.unique(X[:, AGE_CATEGORY_COL_IDX])
    assert subjects_test is None or len(subjects_test) == len(age_categories), "If subjects are specified, they must be specified for all age groups"

    if subjects_test is None:
        unique_subject_with_age = np.array([
            (subject, X[observation_idx, AGE_CATEGORY_COL_IDX])
            for subject, observation_idx
            in zip(*np.unique(X[:,SUBJECT_COL_IDX], return_index=True))])

        if USE_CONTINUOUS_AGE:
            subjects_test = [
                np.random.choice(
                    unique_subject_with_age[
                        (unique_subject_with_age[:,1] >= age_range[0]) &
                        (unique_subject_with_age[:,1] <= age_range[1]), 0])
                for age_range in AGE_GROUPS
            ]
        else:
            subjects_test = [
                np.random.choice(
                    unique_subject_with_age[
                        unique_subject_with_age[:,1] == age, 0])
                for age in age_categories
            ]

    print("Selected subjects for the test set are: ", subjects_test)
    test_indexes = np.where(np.isin(X[:,SUBJECT_IDX], subjects_test))[0]
    train_indexes = list(set(range(X.shape[0])) - set(test_indexes))

    assert X.shape[0] == len(train_indexes)+len(test_indexes), "Total train and test sets must corresponds to all dataset"
    
    X_test = X[test_indexes,:]
    y_test = y[test_indexes]
    X_train = X[train_indexes,:]
    y_train = y[train_indexes]
    
    return X_test, X_train, y_test, y_train

if DOWNSIZE_SET:
    X_test, X_train_valid, y_test, y_train_valid = train_test_split_one_subject(X)
else:
    X_test, X_train_valid, y_test, y_train_valid = train_test_split_according_to_age(
        X , subjects_test=[0.0, 24.0, 49.0, 71.0]
    )
    
print(X_test.shape, X_train_valid.shape, y_test.shape, y_train_valid.shape)

## Random forest validation
___

In [None]:
NB_KFOLDS = 5
NB_CATEGORICAL_FEATURES = 2
NB_FEATURES = 48

CLASSIFIER_PIPELINE_KEY = 'classifier'

def get_random_forest_model():
    return Pipeline([
        ('scaling', ColumnTransformer([
            ('pass-through-categorical', 'passthrough', list(range(NB_CATEGORICAL_FEATURES))),
            ('scaling-continuous', StandardScaler(copy=False), list(range(NB_CATEGORICAL_FEATURES,NB_FEATURES)))
        ])),
        (CLASSIFIER_PIPELINE_KEY, RandomForestClassifier(
            n_estimators=100,
            random_state=42,
            n_jobs=-1
        ))
    ])

For the cross validation, we will use the `GroupKFold` technique. For each fold, we make sure to train and validate on different subjects, to avoid overfitting over subjects.

In [None]:
%%time

accuracies = []
macro_f1_scores = []
weighted_f1_scores = []
kappa_agreements = []
emission_matrix = np.zeros((N_STAGES,N_STAGES))

for train_index, valid_index in GroupKFold(n_splits=5).split(X_train_valid, groups=X_train_valid[:,SUBJECT_IDX]):
    # We drop the subject and night indexes
    X_train, X_valid = X_train_valid[train_index, 2:], X_train_valid[valid_index, 2:]
    y_train, y_valid = y_train_valid[train_index], y_train_valid[valid_index]
    
    # Scaling features and model training
    training_pipeline = get_random_forest_model()
    training_pipeline.fit(X_train, y_train)
    
    # Validation
    y_valid_pred = training_pipeline.predict(X_valid)

    print("----------------------------- FOLD RESULTS --------------------------------------\n")
    current_kappa = cohen_kappa_score(y_valid, y_valid_pred)

    print("TRAIN:", train_index, "VALID:", valid_index, "\n\n")
    print(confusion_matrix(y_valid, y_valid_pred), "\n")
    print(classification_report(y_valid, y_valid_pred, target_names=SLEEP_STAGES_VALUES.keys()), "\n")
    print("Agreement score (Cohen Kappa): ", current_kappa, "\n")
    
    accuracies.append(round(accuracy_score(y_valid, y_valid_pred),2))
    macro_f1_scores.append(f1_score(y_valid, y_valid_pred, average="macro"))
    weighted_f1_scores.append(f1_score(y_valid, y_valid_pred, average="weighted"))
    kappa_agreements.append(current_kappa)
    
    for y_pred, y_true in zip(y_valid_pred, y_valid):
        emission_matrix[y_true, y_pred] += 1

emission_matrix = emission_matrix / emission_matrix.sum(axis=1, keepdims=True)

In [None]:
print(f"Mean accuracy          : {np.mean(accuracies):0.2f} ± {np.std(accuracies):0.3f}")
print(f"Mean macro F1-score    : {np.mean(macro_f1_scores):0.2f} ± {np.std(macro_f1_scores):0.3f}")
print(f"Mean weighted F1-score : {np.mean(weighted_f1_scores):0.2f} ± {np.std(weighted_f1_scores):0.3f}")
print(f"Mean Kappa's agreement : {np.mean(kappa_agreements):0.2f} ± {np.std(kappa_agreements):0.3f}")

## Validation results
___

### Random forest hyperparameters
___

We have fixed the same hyperparameters for the other categories:
- `StandardScaler` for all continuous features
- Training on all the dataset (83 subjects)
- Test set contains the following subjects: `[3.0, 24.0, 55.0, 72.0]`
- No postprocessing step
- RF with its default hyperparameters

#### 1. class_weight: balanced vs none
___



In [None]:
%%time

params = {
    f"{CLASSIFIER_PIPELINE_KEY}__class_weight": [None, "balanced"]
}

classifier = get_random_forest_model()
cross_valid_folds = GroupKFold(n_splits=2).split(X_train_valid, groups=X_train_valid[:,SUBJECT_IDX])

search = GridSearchCV(
    classifier,
    params,
    scoring=make_scorer(cohen_kappa_score),
    cv=cross_valid_folds,
    n_jobs=-1,
    verbose=1
)
search.fit(X_train_valid[:,2:], y_train_valid)

In [None]:
results = []
for idx, rank in enumerate(search.cv_results_['rank_test_score']):
    current_param = search.cv_results_['params'][idx]
    score_mean = search.cv_results_['mean_test_score'][idx]
    score_uncertainty = search.cv_results_['std_test_score'][idx]
    print(f"{rank}. Parameter {current_param} has a score of {score_mean:0.4f} ± {score_uncertainty:0.3f}")


|  Value      |  Score          |
|-------------|-----------------|
| `None`      | 0.6153 ± 0.003  |
|  `balanced` |  0.6114 ± 0.002 |

The chosen value for `class_weight` is then `None`.

## Random forest training and testing
___

In [None]:
testing_pipeline = get_random_forest_model()

testing_pipeline.fit(X_train_valid[:, 2:], y_train_valid);


In [None]:
y_test_pred = testing_pipeline.predict(X_test[:,2:])

print(confusion_matrix(y_test, y_test_pred))

print(classification_report(y_test, y_test_pred, target_names=SLEEP_STAGES_VALUES.keys()))

print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_test_pred))

## Hidden Model Markov
___

In [None]:
def compute_hmm_matrices(y, subject_night):
    transition_matrix = np.zeros((N_STAGES,N_STAGES))
    start_matrix = np.zeros((N_STAGES))

    for night in groupby(zip(y, subject_night), key=lambda x: f"subject{int(x[1][0])}-night{int(x[1][1])}"):
        print(f"Computing file: {night[0]}")
        current_y = np.array([x[0] for x in night[1]])
        start_matrix[current_y[0]] += 1

        for transition in zip(current_y[:-1], current_y[1:]):
            transition_matrix[transition[0], transition[1]] += 1
            
    transition_matrix = transition_matrix/transition_matrix.sum(axis=1, keepdims=True)
    start_matrix = start_matrix/start_matrix.sum()
    
    return transition_matrix, start_matrix
    
transition_matrix, start_matrix = compute_hmm_matrices(y_train_valid, X_train_valid[:,0:2])

In [None]:
hmm_model = MultinomialHMM(n_components=N_STAGES)

hmm_model.transmat_ = transition_matrix
hmm_model.startprob_ = start_matrix
hmm_model.emissionprob_ = emission_matrix

In [None]:
y_hmm_pred = hmm_model.predict(y_test_pred.reshape(-1, 1))

In [None]:
print(confusion_matrix(y_test, y_hmm_pred))

print(classification_report(y_test, y_hmm_pred, target_names=SLEEP_STAGES_VALUES.keys()))

print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_hmm_pred))

In [None]:
print("Test subjects are subjects: ", np.unique(X_test[:,0]))
print("BEFORE HMM")
plt.rcParams["figure.figsize"] = (20,5)

for test_subject in np.unique(X_test[:,0]):
    test_subject_indexes = [idx for idx, elem in enumerate(X_test) if elem[0] == test_subject]
    
    for night_idx in np.unique(X_test[test_subject_indexes,1]):
        test_night_subject_indexes = [
            idx for idx, elem in enumerate(X_test)
            if elem[0] == test_subject and elem[1] == night_idx]
        hypnograms = [
            y_test[test_night_subject_indexes],
            y_test_pred[test_night_subject_indexes]
        ]
        
        print_hypnogram(hypnograms,
                        labels=["scored", "predicted"],
                        subject=test_subject,
                        night=night_idx)

In [None]:
print("Test subjects are subjects: ", np.unique(X_test[:,0]))
print("AFTER HMM")

for test_subject in np.unique(X_test[:,0]):
    test_subject_indexes = [idx for idx, elem in enumerate(X_test) if elem[0] == test_subject]
    
    for night_idx in np.unique(X_test[test_subject_indexes,1]):
        test_night_subject_indexes = [
            idx for idx, elem in enumerate(X_test)
            if elem[0] == test_subject and elem[1] == night_idx]
        hypnograms = [
            y_test[test_night_subject_indexes],
            y_hmm_pred[test_night_subject_indexes]
        ]
        
        print_hypnogram(hypnograms,
                        labels=["scored", "predicted with HMM"],
                        subject=test_subject,
                        night=night_idx)

## Median filter
___

In order to compare the HMM postprocessing step, we will apply a median filter to the output of the RF.

We apply a median filter with a 3 element sized kernel. It is applied to each nights sleep seperatly. We do this because short transitions are not common in sleep patterns:

> The final stage includes median filtration of short-term transitions. It is well known that short-term transitions in human sleep are impossible. Short-term jumps in hypnogram are evidence of transitory stage. Averaging for 2-3 min allows the curve structure to be smoothed and sleep structure to be resolved. 

Source: Doroshenkov, L. G., V. A. Konyshev, et S. V. Selishchev. « Classification of Human Sleep Stages Based on EEG Processing Using Hidden Markov Models ». Biomedical Engineering 41, nᵒ 1 (janvier 2007): 25‑28. https://doi.org/10.1007/s10527-007-0006-5.


In [None]:
KERNEL_SIZE=3
y_medfilt_pred = np.zeros(y_test.shape[0])

for test_subject in np.unique(X_test[:,0]):
    test_subject_indexes = [idx for idx, elem in enumerate(X_test) if elem[0] == test_subject]
    
    for night_idx in np.unique(X_test[test_subject_indexes,1]):
        test_night_subject_indexes = [
            idx for idx, elem in enumerate(X_test)
            if elem[0] == test_subject and elem[1] == night_idx]

        y_medfilt_pred[test_night_subject_indexes] = medfilt(y_test_pred[test_night_subject_indexes], kernel_size=KERNEL_SIZE)

In [None]:
print(confusion_matrix(y_test, y_medfilt_pred))

print(classification_report(y_test, y_medfilt_pred, target_names=SLEEP_STAGES_VALUES.keys()))

print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_medfilt_pred))

In [None]:
plt.rcParams["figure.figsize"] = (20,5)

print("Test subjects are subjects: ", np.unique(X_test[:,0]))

for test_subject in np.unique(X_test[:,0]):
    test_subject_indexes = [idx for idx, elem in enumerate(X_test) if elem[0] == test_subject]
    
    for night_idx in np.unique(X_test[test_subject_indexes,1]):
        test_night_subject_indexes = [
            idx for idx, elem in enumerate(X_test)
            if elem[0] == test_subject and elem[1] == night_idx]
        
        hypnograms = [
            y_test[test_night_subject_indexes],
            y_medfilt_pred[test_night_subject_indexes]
        ]
        
        print_hypnogram(hypnograms,
                        labels=["scored", "predicted with median filter"],
                        subject=test_subject,
                        night=night_idx)

## Saving trained model
___

We save the trained model with the postprocessing step, HMM. We will save only the matrix that define it. We do not need to persist the median filter postprocessing step, because it is stateless.

In [None]:
SAVED_DIR = "../trained_model"

if not os.path.exists(SAVED_DIR):
    os.mkdir(SAVED_DIR);    

In [None]:
if USE_CONTINUOUS_AGE: 
    joblib.dump(testing_pipeline, f"{SAVED_DIR}/classifier_RF_continous_age.joblib")
else:
    joblib.dump(testing_pipeline, f"{SAVED_DIR}/classifier_RF.joblib")

In [None]:
np.save(f"{SAVED_DIR}/HMM_transmat.npy", hmm_model.transmat_)
np.save(f"{SAVED_DIR}/HMM_startprob.npy", hmm_model.startprob_)
np.save(f"{SAVED_DIR}/HMM_emissionprob.npy", hmm_model.emissionprob_)