# Gaussian Naïve Bayes
___


In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys

# Ensure parent folder is in PYTHONPATH
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import joblib

from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import (
    GroupKFold,
    cross_validate,
)
from sklearn.metrics import (
    accuracy_score,
     confusion_matrix,
     classification_report,
     f1_score,
     cohen_kappa_score,
     make_scorer,
)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA
from sklearn.feature_selection import (SelectKBest, f_classif)

from constants import (
    SLEEP_STAGES_VALUES,
    N_STAGES,
    EPOCH_DURATION,
)
from model_utils import (
    print_hypnogram,
    train_test_split_one_subject,
    train_test_split_according_to_age,
    evaluate_hyperparams_grid,
    print_results_cv,
    print_results_cv_scores,
    get_pipeline,
    print_hyperparam_tuning_results,
)

## Load the features
___

In [None]:
# position of the subject information and night information in the X matrix
SUBJECT_IDX = 0 
NIGHT_IDX = 1
USE_CONTINUOUS_AGE = False
DOWNSIZE_SET = False
TEST_SET_SUBJECTS = [0.0, 24.0, 49.0, 71.0]

if USE_CONTINUOUS_AGE:
    X_file_name = "../data/x_features-age-continuous.npy"
    y_file_name = "../data/y_observations-age-continuous.npy"
else:
    X_file_name = "../data/x_features.npy"
    y_file_name = "../data/y_observations.npy"
    
X_init = np.load(X_file_name, allow_pickle=True)
y_init = np.load(y_file_name, allow_pickle=True)

X_init = np.vstack(X_init)
y_init = np.hstack(y_init)

print(X_init.shape)
print(y_init.shape)
print("Number of subjects: ", np.unique(X_init[:,SUBJECT_IDX]).shape[0]) # Some subject indexes are skipped, thus total number is below 83 (as we can see in https://physionet.org/content/sleep-edfx/1.0.0/)
print("Number of nights: ", len(np.unique([f"{int(x[0])}-{int(x[1])}" for x in X_init[:,SUBJECT_IDX:NIGHT_IDX+1]])))
print("Subjects available: ", np.unique(X_init[:,SUBJECT_IDX]))

In [None]:
X_test, X_train_valid, y_test, y_train_valid = train_test_split_according_to_age(
    X_init,
    y_init,
    use_continuous_age=USE_CONTINUOUS_AGE,
    subjects_test=TEST_SET_SUBJECTS)
    
print(X_test.shape, X_train_valid.shape, y_test.shape, y_train_valid.shape)

## NB validation
___

In [None]:
NB_KFOLDS = 5
NB_CATEGORICAL_FEATURES = 2
NB_FEATURES = 48
CLASSIFIER_PIPELINE_KEY = 'classifier'
RANDOM_STATE = 42 

def get_cv_iterator(n_splits=2):
    return GroupKFold(n_splits=n_splits).split(
        X_train_valid, groups=X_train_valid[:,SUBJECT_IDX]
    )
    
def cross_validate_with_confusion_matrix(pipeline, n_fold):
    accuracies = []
    macro_f1_scores = []
    weighted_f1_scores = []
    kappa_agreements = []

    for train_index, valid_index in get_cv_iterator(n_splits=n_fold):
        # We drop the subject and night indexes
        X_train, X_valid = X_train_valid[train_index, 2:], X_train_valid[valid_index, 2:]
        y_train, y_valid = y_train_valid[train_index], y_train_valid[valid_index]

        # Scaling features and model training
        training_pipeline = pipeline
        training_pipeline.fit(X_train, y_train)

        # Validation
        y_valid_pred = training_pipeline.predict(X_valid)

        print("----------------------------- FOLD RESULTS --------------------------------------\n")
        current_kappa = cohen_kappa_score(y_valid, y_valid_pred)

        print("TRAIN:", train_index, "VALID:", valid_index, "\n\n")
        print(confusion_matrix(y_valid, y_valid_pred), "\n")
        print(classification_report(y_valid, y_valid_pred, target_names=SLEEP_STAGES_VALUES.keys()), "\n")
        print("Agreement score (Cohen Kappa): ", current_kappa, "\n")

        accuracies.append(round(accuracy_score(y_valid, y_valid_pred),2))
        macro_f1_scores.append(f1_score(y_valid, y_valid_pred, average="macro"))
        weighted_f1_scores.append(f1_score(y_valid, y_valid_pred, average="weighted"))
        kappa_agreements.append(current_kappa)

    print_results_cv(accuracies, macro_f1_scores, weighted_f1_scores, kappa_agreements)

In [None]:
%%time

cross_validate_with_confusion_matrix(get_pipeline(
    classifier=GaussianNB()
), n_fold=NB_KFOLDS)

```
Agreement score (Cohen Kappa):  0.40234998625108576 

Mean accuracy          : 0.55 ± 0.037
Mean macro F1-score    : 0.51 ± 0.039
Mean weighted F1-score : 0.56 ± 0.029
Mean Kappa's agreement : 0.42 ± 0.047
CPU times: user 3.13 s, sys: 1.08 s, total: 4.21 s
Wall time: 5.75 s
```

## Dimensionality reduction
___

In [None]:
def cross_validate_with_dim_reduction(dim_reduction, pipeline=None):
    if pipeline is None:
        pipeline = get_pipeline(
            classifier=GaussianNB(),
            dimension_reduction=dim_reduction
        )
    
    scores = cross_validate(
        estimator=pipeline,
        X=X_train_valid,
        y=y_train_valid,
        groups=X_train_valid[:,SUBJECT_IDX],
        scoring={
            "agreement": make_scorer(cohen_kappa_score),
            "accuracy": 'accuracy',
            "f1-score-macro": 'f1_macro',
            "f1-score-weighted": 'f1_weighted',
        },
        cv=get_cv_iterator(n_splits=5),
        verbose=1,
        n_jobs=-1
    )
    
    print_results_cv_scores(scores)

In [None]:
%%time

cross_validate_with_dim_reduction(LinearDiscriminantAnalysis())

In [None]:
cross_validate_with_dim_reduction(PCA(n_components=4))

In [None]:
cross_validate_with_dim_reduction(PCA(n_components=16))

In [None]:
cross_validate_with_dim_reduction(PCA(n_components=30))

PCA gave the best results:

```
Mean accuracy          : 0.68 ± 0.027
Mean macro F1-score    : 0.60 ± 0.023
Mean weighted F1-score : 0.66 ± 0.025
Mean Kappa's agreement : 0.55 ± 0.037
CPU times: user 112 ms, sys: 137 ms, total: 249 ms
Wall time: 14 s
```

## Feature selection
___

In [None]:
cross_validate_with_dim_reduction(None, pipeline=
  Pipeline([
        ('scaling', ColumnTransformer([
            ('pass-through-categorical', 'passthrough', list(range(NB_CATEGORICAL_FEATURES))),
            ('scaling-continuous', StandardScaler(copy=False), list(range(NB_CATEGORICAL_FEATURES,NB_FEATURES)))
        ])),
        ('feature_selection', SelectKBest(f_classif, k=15)),
        ('dimension_reduction', LinearDiscriminantAnalysis()),
        (CLASSIFIER_PIPELINE_KEY, GaussianNB())
    ])
)


In [None]:
%%time

evaluate_hyperparams_grid(
    params={
        f"feature_selection__k": np.random.randint(low=0, high=48, size=10),
    },
    estimator=Pipeline([
        ('scaling', ColumnTransformer([
            ('pass-through-categorical', 'passthrough', list(range(NB_CATEGORICAL_FEATURES))),
            ('scaling-continuous', StandardScaler(copy=False), list(range(NB_CATEGORICAL_FEATURES,NB_FEATURES)))
        ])),
        ('feature_selection', SelectKBest(f_classif, k=15)),
        ('dimension_reduction', LinearDiscriminantAnalysis()),
        (CLASSIFIER_PIPELINE_KEY, GaussianNB())
    ]),
    X=X_train_valid,
    y=y_train_valid,
    cv=get_cv_iterator(n_splits=2),
    use_randomized=True
)

```
1. Parameter {'feature_selection__k': 46} has a score of 0.5633 ± 0.013
2. Parameter {'feature_selection__k': 45} has a score of 0.5628 ± 0.013
3. Parameter {'feature_selection__k': 43} has a score of 0.5624 ± 0.013
4. Parameter {'feature_selection__k': 25} has a score of 0.5368 ± 0.015
5. Parameter {'feature_selection__k': 24} has a score of 0.5357 ± 0.014
6. Parameter {'feature_selection__k': 17} has a score of 0.5090 ± 0.009
7. Parameter {'feature_selection__k': 12} has a score of 0.4797 ± 0.033
8. Parameter {'feature_selection__k': 6} has a score of 0.4361 ± 0.013
9. Parameter {'feature_selection__k': 3} has a score of 0.3439 ± 0.001
10. Parameter {'feature_selection__k': 0} has a score of nan ± nan
CPU times: user 3.28 s, sys: 779 ms, total: 4.06 s
Wall time: 28.2 s

```


Best combination is to not use feature selection, and to use LDA.

## Model testing
___

In [None]:
%%time

testing_pipeline = get_pipeline(
    classifier=GaussianNB(),
    dimension_reduction=LinearDiscriminantAnalysis()
)

testing_pipeline.fit(X_train_valid[:, 2:], y_train_valid);

In [None]:
y_test_pred = testing_pipeline.predict(X_test[:,2:])

print(confusion_matrix(y_test, y_test_pred))

print(classification_report(y_test, y_test_pred, target_names=SLEEP_STAGES_VALUES.keys()))

print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_test_pred))

### Test results
___

#### 1) With default parameters
____
```
              precision    recall  f1-score   support

           W       0.79      0.44      0.57      1624
          N1       0.25      0.30      0.28       983
          N2       0.79      0.67      0.72      3603
          N3       0.42      0.99      0.59       611
         REM       0.53      0.63      0.58      1302

    accuracy                           0.60      8123
   macro avg       0.56      0.61      0.55      8123
weighted avg       0.65      0.60      0.60      8123

Agreement score (Cohen Kappa):  0.4626294533458143
```

#### 2) With LDA
___

```
              precision    recall  f1-score   support

           W       0.84      0.87      0.85      1624
          N1       0.44      0.22      0.30       983
          N2       0.81      0.88      0.84      3603
          N3       0.71      0.89      0.79       611
         REM       0.64      0.61      0.63      1302

    accuracy                           0.76      8123
   macro avg       0.69      0.69      0.68      8123
weighted avg       0.73      0.76      0.74      8123

Agreement score (Cohen Kappa):  0.6537897956170273

```
#### 3) With LDA + feature selection
___
```
              precision    recall  f1-score   support

           W       0.84      0.82      0.83      1624
          N1       0.40      0.11      0.17       983
          N2       0.81      0.86      0.83      3603
          N3       0.73      0.84      0.78       611
         REM       0.48      0.64      0.55      1302

    accuracy                           0.72      8123
   macro avg       0.65      0.65      0.63      8123
weighted avg       0.71      0.72      0.70      8123

Agreement score (Cohen Kappa):  0.6085186109496423
```

## Saving trained model
___

In [None]:
SAVED_DIR = "../trained_model"

if not os.path.exists(SAVED_DIR):
    os.mkdir(SAVED_DIR);

if USE_CONTINUOUS_AGE: 
    joblib.dump(testing_pipeline, f"{SAVED_DIR}/classifier_gaussiannb_age_continuous.joblib")
else:
    joblib.dump(testing_pipeline, f"{SAVED_DIR}/classifier_gaussiannb.joblib")