# K Nearest Neighbour Classification
___

This model aims to classify sleep stages based on two EEG channel. We will use the features extracted in the `pipeline.ipynb` notebook as the input to a KNN classifier.

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

# Ensure parent folder is in PYTHONPATH
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import joblib

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import (GridSearchCV,
                                     RandomizedSearchCV,
                                     GroupKFold,
                                     cross_validate)
from sklearn.metrics import (accuracy_score,
                             confusion_matrix,
                             classification_report,
                             f1_score,
                             cohen_kappa_score,
                             make_scorer)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA

from constants import (SLEEP_STAGES_VALUES,
                       N_STAGES,
                       EPOCH_DURATION)
from model_utils import (print_hypnogram,
                         train_test_split_one_subject,
                         train_test_split_according_to_age,
                         evaluate_hyperparams_grid,
                         print_results_cv,
                         print_results_cv_scores,
                         get_pipeline,
                         print_hyperparam_tuning_results)

## Load the features
___

In [3]:
# position of the subject information and night information in the X matrix
SUBJECT_IDX = 0 
NIGHT_IDX = 1
USE_CONTINUOUS_AGE = False
DOWNSIZE_SET = False
TEST_SET_SUBJECTS = [0.0, 24.0, 49.0, 71.0]

if USE_CONTINUOUS_AGE:
    X_file_name = "../data/x_features-age-continuous.npy"
    y_file_name = "../data/y_observations-age-continuous.npy"
else:
    X_file_name = "../data/x_features.npy"
    y_file_name = "../data/y_observations.npy"
    
X_init = np.load(X_file_name, allow_pickle=True)
y_init = np.load(y_file_name, allow_pickle=True)

X_init = np.vstack(X_init)
y_init = np.hstack(y_init)
print(X_init.shape)
print(y_init.shape)
print("Number of subjects: ", np.unique(X_init[:,SUBJECT_IDX]).shape[0]) # Some subject indexes are skipped, thus total number is below 83 (as we can see in https://physionet.org/content/sleep-edfx/1.0.0/)
print("Number of nights: ", len(np.unique([f"{int(x[0])}-{int(x[1])}" for x in X_init[:,SUBJECT_IDX:NIGHT_IDX+1]])))
print("Subjects available: ", np.unique(X_init[:,SUBJECT_IDX]))

(168954, 50)
(168954,)
Number of subjects:  78
Number of nights:  153
Subjects available:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.
 36. 37. 38. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54.
 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 70. 71. 72. 73. 74.
 75. 76. 77. 80. 81. 82.]


In [4]:
X_test, X_train_valid, y_test, y_train_valid = train_test_split_according_to_age(
    X_init,
    y_init,
    use_continuous_age=USE_CONTINUOUS_AGE,
    subjects_test=TEST_SET_SUBJECTS)
    
print(X_test.shape, X_train_valid.shape, y_test.shape, y_train_valid.shape)

Selected subjects for the test set are:  [0.0, 24.0, 49.0, 71.0]
(8123, 50) (160831, 50) (8123,) (160831,)


## KNN validation
___

In [5]:
NB_KFOLDS = 5
NB_CATEGORICAL_FEATURES = 2
NB_FEATURES = 48
CLASSIFIER_PIPELINE_KEY = 'classifier'
RANDOM_STATE = 42 

def get_cv_iterator(n_splits=2):
    return GroupKFold(n_splits=n_splits).split(
        X_train_valid, groups=X_train_valid[:,SUBJECT_IDX]
    )
    
def cross_validate_with_confusion_matrix(pipeline, n_fold):
    accuracies = []
    macro_f1_scores = []
    weighted_f1_scores = []
    kappa_agreements = []

    for train_index, valid_index in get_cv_iterator(n_splits=n_fold):
        # We drop the subject and night indexes
        X_train, X_valid = X_train_valid[train_index, 2:], X_train_valid[valid_index, 2:]
        y_train, y_valid = y_train_valid[train_index], y_train_valid[valid_index]

        # Scaling features and model training
        training_pipeline = pipeline
        training_pipeline.fit(X_train, y_train)

        # Validation
        y_valid_pred = training_pipeline.predict(X_valid)

        print("----------------------------- FOLD RESULTS --------------------------------------\n")
        current_kappa = cohen_kappa_score(y_valid, y_valid_pred)

        print("TRAIN:", train_index, "VALID:", valid_index, "\n\n")
        print(confusion_matrix(y_valid, y_valid_pred), "\n")
        print(classification_report(y_valid, y_valid_pred, target_names=SLEEP_STAGES_VALUES.keys()), "\n")
        print("Agreement score (Cohen Kappa): ", current_kappa, "\n")

        accuracies.append(round(accuracy_score(y_valid, y_valid_pred),2))
        macro_f1_scores.append(f1_score(y_valid, y_valid_pred, average="macro"))
        weighted_f1_scores.append(f1_score(y_valid, y_valid_pred, average="weighted"))
        kappa_agreements.append(current_kappa)

    print_results_cv(accuracies, macro_f1_scores, weighted_f1_scores, kappa_agreements)

In [6]:
%%time

cross_validate_with_confusion_matrix(get_pipeline(
    classifier=KNeighborsClassifier(
        n_jobs=-1
    )
), n_fold=NB_KFOLDS)

----------------------------- FOLD RESULTS --------------------------------------

TRAIN: [  2137   2138   2139 ... 158843 158844 158845] VALID: [     0      1      2 ... 160828 160829 160830] 


[[6746  387  248   30  241]
 [ 950  688 1411    9  659]
 [1284  760 9460  655  779]
 [ 216   19  752 1938    6]
 [ 632 1066 1656    3 2209]] 

              precision    recall  f1-score   support

           W       0.69      0.88      0.77      7652
          N1       0.24      0.19      0.21      3717
          N2       0.70      0.73      0.71     12938
          N3       0.74      0.66      0.70      2931
         REM       0.57      0.40      0.47      5566

    accuracy                           0.64     32804
   macro avg       0.58      0.57      0.57     32804
weighted avg       0.62      0.64      0.63     32804
 

Agreement score (Cohen Kappa):  0.5088390243817995 

----------------------------- FOLD RESULTS --------------------------------------

TRAIN: [     0      1      2 ... 1

We see that the results are lower than the other models (SVC,RF), and takes much longer to train.

```
Mean accuracy          : 0.67 ± 0.031
Mean macro F1-score    : 0.60 ± 0.029
Mean weighted F1-score : 0.66 ± 0.032
Mean Kappa's agreement : 0.55 ± 0.044
CPU times: user 43min 2s, sys: 26.7 s, total: 43min 29s
Wall time: 20min 36s
```

In [7]:
%%time
knn_clf = get_pipeline(
    classifier=KNeighborsClassifier(
        n_jobs=-1)
)

knn_clf.fit(X_train_valid[:150000,2:], y_train_valid[:150000])

CPU times: user 1.33 s, sys: 19.9 ms, total: 1.35 s
Wall time: 1.35 s


Pipeline(memory=None,
         steps=[('scaling',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('pass-through-categorical',
                                                  'passthrough', [0, 1]),
                                                 ('scaling-continuous',
                                                  StandardScaler(copy=False,
                                                                 with_mean=True,
                                                                 with_std=True),
                                                  [2, 3, 4, 5, 6, 7, 8, 9, 10,
                                                   11, 12, 13, 14, 15, 16, 17,
                                                   18, 19, 20, 21, 22, 23, 24,
                                                   25, 26, 27, 28,

In [8]:
%%time
knn_clf.predict(X_train_valid[150000:,2:])

CPU times: user 3min 9s, sys: 140 ms, total: 3min 9s
Wall time: 30 s


array([3, 2, 2, ..., 0, 0, 0])

As we can see above, training time is really fast, while the prediction is quite slow. It can be explained by how the KNN model works: 

> [...] it does not attempt to construct a general internal model, but simply stores instances of the training data. Classification is computed from a simple majority vote of the nearest neighbors of each point: a query point is assigned the data class which has the most representatives within the nearest neighbors of the point.

## Validation results
___

### Dimension reduction
___

As with the other models, we will use LDA and PCA to reduce dimensions. We will try the maintain the same scores, and reduce the time it takes to cross-validate.

In [9]:
def cross_validate_with_dim_reduction(dim_reduction):
    pipeline = get_pipeline(
        classifier=KNeighborsClassifier(n_jobs=-1),
        dimension_reduction=dim_reduction
    )
    
    scores = cross_validate(
        estimator=pipeline,
        X=X_train_valid,
        y=y_train_valid,
        groups=X_train_valid[:,SUBJECT_IDX],
        scoring={
            "agreement": make_scorer(cohen_kappa_score),
            "accuracy": 'accuracy',
            "f1-score-macro": 'f1_macro',
            "f1-score-weighted": 'f1_weighted',
        },
        cv=get_cv_iterator(n_splits=5),
        verbose=1,
        n_jobs=-1
    )
    
    print_results_cv_scores(scores)

#### 1. LDA
___

We have `n_components=4` by default when using LDA to reduce dimensionality. We can see it speeds up a lot (15.8s with vs ~20min without) the prediction time for the same number of folds.

In [10]:
%%time

cross_validate_with_dim_reduction(LinearDiscriminantAnalysis())

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    6.7s remaining:   10.0s


Mean accuracy          : 0.66 ± 0.031
Mean macro F1-score    : 0.59 ± 0.024
Mean weighted F1-score : 0.65 ± 0.028
Mean Kappa's agreement : 0.53 ± 0.040
CPU times: user 57.3 ms, sys: 83.8 ms, total: 141 ms
Wall time: 7.14 s


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    7.1s finished


#### 2. PCA
___


In [11]:
%%time

cross_validate_with_dim_reduction(PCA(n_components=4))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    6.7s remaining:   10.0s


Mean accuracy          : 0.50 ± 0.022
Mean macro F1-score    : 0.45 ± 0.009
Mean weighted F1-score : 0.50 ± 0.020
Mean Kappa's agreement : 0.32 ± 0.025
CPU times: user 39.9 ms, sys: 31.9 ms, total: 71.8 ms
Wall time: 7.02 s


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    7.0s finished


In [12]:
%%time

cross_validate_with_dim_reduction(PCA(n_components=16))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   26.2s remaining:   39.2s


Mean accuracy          : 0.63 ± 0.037
Mean macro F1-score    : 0.57 ± 0.033
Mean weighted F1-score : 0.63 ± 0.035
Mean Kappa's agreement : 0.49 ± 0.051
CPU times: user 36.2 ms, sys: 16 ms, total: 52.1 ms
Wall time: 27.7 s


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   27.7s finished


In [13]:
%%time

cross_validate_with_dim_reduction(PCA(n_components=35))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   59.1s remaining:  1.5min


Mean accuracy          : 0.63 ± 0.036
Mean macro F1-score    : 0.57 ± 0.032
Mean weighted F1-score : 0.63 ± 0.034
Mean Kappa's agreement : 0.50 ± 0.048
CPU times: user 43.5 ms, sys: 20 ms, total: 63.4 ms
Wall time: 1min 2s


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.0min finished


#### Results
___

|  Score             |  without      | LDA           | PCA (n_comp=4) | PCA (n_comp=16) | PCA (n_comp=35) |
|--------------------|---------------|---------------|----------------|-----------------|-----------------|
|  accuracy          | 0.67 ± 0.031  |  0.66 ± 0.031 | 0.50 ± 0.022   |  0.63 ± 0.037   |  0.63 ± 0.036   |
|  macro F1-score    | 0.60 ± 0.029  |  0.59 ± 0.024 | 0.45 ± 0.009   |  0.57 ± 0.033   |  0.57 ± 0.032   |
|  weighted F1-score | 0.66 ± 0.032  |  0.65 ± 0.028 | 0.50 ± 0.020   |  0.63 ± 0.035   |  0.63 ± 0.034   |
|  Kappa's agreement | 0.55 ± 0.044  |  0.53 ± 0.040 | 0.32 ± 0.025   |  0.49 ± 0.050   |  0.50 ± 0.048   |
|  Time              | 20min 36s     |  15.8 s       | 15.3 s         |  52.4 s         |  1min 42s       |

The results with LDA used as dimension reduction have slightly worst results thant without, but has the best overall score accross PCA and LDA scores. We will keep LDA as a step in our pipeline, because it speeds up a lot prediction time. 

## Hyperparameters tuning
___

The hyperparameters of a KNN classifier are:
- `n_neighbors`: `int` (`default=5`)
    
    Represents the number of neighbors which votes the predicted class
- `weights`: `{‘uniform’, ‘distance’} or callable, default='uniform'`

    Weight functions attributed to neighbors in the voting process. If `uniform`, all neighbors have the same weight, whilst setting `weights` to `distance` involves that closer neighbors of a query point will have a greater influence than neighbors which are further away.

- `leaf_size`: `positive int` (`default=30`)

    Number of points at which to switch to brute-force. Changing leaf_size will not affect the results of a query, but can significantly impact the speed of a query and the memory required to store the constructed tree. **The optimal value depends on the nature of the problem.**
    
- `metric`: `str or callable, default=’minkowski’`

    The distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. 
    With hyperparam `p`, default value is euclidean distance. We could also look at `manhattan` and `chebyshev`.
    
### 1. Hyperparameters `n_neighbors` and `weights`
___

In [None]:
%%time

evaluate_hyperparams_grid(
    params={
        f"{CLASSIFIER_PIPELINE_KEY}__n_neighbors": np.linspace(200, 500, 10, dtype="int"),
        f"{CLASSIFIER_PIPELINE_KEY}__weights": ['uniform'],

    },
    estimator=get_pipeline(
        classifier=KNeighborsClassifier(n_jobs=-1),
        dimension_reduction=LinearDiscriminantAnalysis()
    ),
    X=X_train_valid,
    y=y_train_valid,
    cv=get_cv_iterator(n_splits=2),
    use_randomized=True
)

**1st experiment**: `n_neighbors": [  2,  57, 112, 168, 223, 278, 334, 389, 444, 500]` with `weights=['uniform', 'distance']`

|Rank| n_neighbors      |  weights          | Test score     |
|----|------------------|-------------------|----------------|
|1   | 389              | 'uniform'         | 0.5815 ± 0.001 |
|2   | 223              | 'uniform'         | 0.5814 ± 0.002 |
|3   | 334              | 'uniform'         | 0.5813 ± 0.002 |
|4   | 500              | 'uniform'         | 0.5812 ± 0.002 |
| ... |
|13   | 57               | 'distance'        | 0.5766 ± 0.004 |
|14   | 2                | 'distance'        | 0.4655 ± 0.001 |
|15   | 2                | 'uniform'         | 0.4589 ± 0.014 |

1st run:

1. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 389} has a score of 0.5815 ± 0.001
2. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 223} has a score of 0.5814 ± 0.002
3. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 500} has a score of 0.5812 ± 0.002
4. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 500} has a score of 0.5810 ± 0.002
5. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 223} has a score of 0.5809 ± 0.003
6. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 168} has a score of 0.5806 ± 0.003
7. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 112} has a score of 0.5797 ± 0.004
8. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 57} has a score of 0.5774 ± 0.005
9. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 2} has a score of 0.4655 ± 0.001
10. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 2} has a score of 0.4589 ± 0.014

2nd run (without duplicates):

3. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 334} has a score of 0.5813 ± 0.002
4. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 168} has a score of 0.5810 ± 0.003
5. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 444} has a score of 0.5809 ± 0.001
6. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 112} has a score of 0.5807 ± 0.004
9. Parameter {'classifier__weights': 'distance', 'classifier__n_neighbors': 57} has a score of 0.5766 ± 0.004

We can see that the best results comes with uniform weights and `n_neighbors` in the range [200,500].

___

**2nd experiment**: `n_neighbors": [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]` with `weights=['uniform']`

|Rank| n_neighbors      |  weights          | Test score     |
|----|------------------|-------------------|----------------|
|1   | 200              | 'uniform'         | 0.5817 ± 0.003 |
|2   | 300              | 'uniform'         | 0.5817 ± 0.001 |
|3   | 233              | 'uniform'         | 0.5816 ± 0.002 |
|4   | 400              | 'uniform'         | 0.5815 ± 0.002 |
| ... |
|9    | 266              | 'uniform'         | 0.5812 ± 0.002 |
|10   | 366              | 'uniform'         | 0.5812 ± 0.001 |

1. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 200} has a score of 0.5817 ± 0.003
2. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 300} has a score of 0.5817 ± 0.001
3. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 233} has a score of 0.5816 ± 0.002
4. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 400} has a score of 0.5815 ± 0.002
5. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 466} has a score of 0.5814 ± 0.002
6. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 433} has a score of 0.5813 ± 0.001
7. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 500} has a score of 0.5812 ± 0.002
8. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 333} has a score of 0.5812 ± 0.002
9. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 266} has a score of 0.5812 ± 0.002
10. Parameter {'classifier__weights': 'uniform', 'classifier__n_neighbors': 366} has a score of 0.5812 ± 0.001
CPU times: user 3.19 s, sys: 737 ms, total: 3.92 s
Wall time: 3min 30s

**We will keep `weights: 'uniform'`, `n_neighbors': 200`.**


### 2. Hyperparameter `leaf_size`
____

We will try to find the value that minimizes prediction time. 

In [None]:
search = GridSearchCV(
    estimator=get_pipeline(
        classifier=KNeighborsClassifier(n_jobs=-1),
        dimension_reduction=LinearDiscriminantAnalysis()
    ),
    param_grid={
        f"{CLASSIFIER_PIPELINE_KEY}__leaf_size": np.linspace(10, 100, 10, dtype="int")
    },
    scoring=make_scorer(cohen_kappa_score),
    cv=get_cv_iterator(n_splits=5),
    n_jobs=-1,
    verbose=1
)

search.fit(X_train_valid[:,2:], y_train_valid);

In [None]:
for idx, current_param in enumerate(search.cv_results_['params']):
    mean_fit_time = search.cv_results_['mean_fit_time'][idx]
    std_fit_time = search.cv_results_['std_fit_time'][idx]
    mean_score_time = search.cv_results_['mean_score_time'][idx]
    std_score_time = search.cv_results_['std_score_time'][idx]
    print(f"Parameter {current_param} has a score time of {mean_score_time:0.4f}s ± {std_score_time:0.3f}s and fit time of {mean_fit_time:0.4f} ± {std_fit_time:0.3f}")

**1st experiment**: `leaf_size=[10,20,30,...,90,100]`

|Rank| leaf_size     |  Score time       | Fit time       |
|----|---------------|-------------------|----------------|
|1   | 100           | 3.0002s ± 1.039s  | 4.0782 ± 0.958 |
|2   | 60            | 3.1930s ± 0.377s  | 4.8671 ± 0.521 |
|3   | 40            | 3.9384s ± 0.762s  | 6.5524 ± 0.852 |
|4   | 80            | 3.9545s ± 0.686s  | 5.0560 ± 0.466 |
| ... |
|9   | 30            | 4.4972s ± 0.774s  | 7.0859 ± 0.918 |
|10  | 20            | 5.0833s ± 0.358s  | 6.9917 ± 0.407 |

We will choose `leaf_size=100`.

### 3. `metric` hyperparameter
___

In [None]:
%%time

evaluate_hyperparams_grid(
    params={
        f"{CLASSIFIER_PIPELINE_KEY}__metric": ['minkowski', 'manhattan', 'chebyshev']
    },
    estimator=get_pipeline(
        classifier=KNeighborsClassifier(n_jobs=-1),
        dimension_reduction=LinearDiscriminantAnalysis()
    ),
    X=X_train_valid,
    y=y_train_valid,
    cv=get_cv_iterator(n_splits=5),
    use_randomized=True
)

|Rank| metric        |  Score          |
|----|---------------|-----------------|
|1   | 'manhattan'   | 0.5348 ± 0.038  |
|2   | 'chebyshev'   | 0.5336 ± 0.038  |
|3   | 'minkowski'   | 0.5333 ± 0.039  |

We will choose `metric='manhattan'`.

### Checking final hyperparameters
___

In [None]:
%%time

evaluate_hyperparams_grid(
    params={
        f"{CLASSIFIER_PIPELINE_KEY}__metric": ['manhattan', 'chebyshev'],
        f"{CLASSIFIER_PIPELINE_KEY}__n_neighbors": np.linspace(200, 300, 3, dtype="int"),
        f"{CLASSIFIER_PIPELINE_KEY}__weights": ['uniform'],
        f"{CLASSIFIER_PIPELINE_KEY}__leaf_size": [200]

    },
    estimator=get_pipeline(
        classifier=KNeighborsClassifier(n_jobs=-1),
        dimension_reduction=LinearDiscriminantAnalysis()
    ),
    X=X_train_valid,
    y=y_train_valid,
    cv=get_cv_iterator(n_splits=5),
    use_randomized=True
)

|Rank| n_neighbors   |  metric           | Score          |
|----|---------------|-------------------|----------------|
|1   | 300           | chebyshev         | 0.5820 ± 0.044 |
|2   | 250           | manhattan         | 0.5819 ± 0.044 |
|3   | 250           | chebyshev         | 0.5817 ± 0.044 |
|4   | 200           | manhattan         | 0.5815 ± 0.044 |
|5   | 300           | manhattan         | 0.5814 ± 0.044 |
|6   | 200           | chebyshev         | 0.5813 ± 0.044 |

We had previously set the hyperparamters the following value, by independantly running tests:
```
weights='uniform',
n_neighbors=200,
leaf_size=100,
metric='manhattan',
```

By checking all these together, with runner ups for hyperparameters `n_neighbors` and `metric`, we found that the following hyperparameters should be better:
```
weights='uniform',
n_neighbors=300,
leaf_size=100,
metric='chebyshev',
```

## Testing 
___

In [14]:
%%time

testing_pipeline = get_pipeline(
    classifier=KNeighborsClassifier(
        weights='uniform',
        n_neighbors=300,
        leaf_size=100,
        metric='chebyshev',
        n_jobs=-1
    ),
    dimension_reduction=LinearDiscriminantAnalysis()
)

testing_pipeline.fit(X_train_valid[:, 2:], y_train_valid);

CPU times: user 4.72 s, sys: 1.54 s, total: 6.26 s
Wall time: 1.99 s


Pipeline(memory=None,
         steps=[('scaling',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('pass-through-categorical',
                                                  'passthrough', [0, 1]),
                                                 ('scaling-continuous',
                                                  StandardScaler(copy=False,
                                                                 with_mean=True,
                                                                 with_std=True),
                                                  [2, 3, 4, 5, 6, 7, 8, 9, 10,
                                                   11, 12, 13, 14, 15, 16, 17,
                                                   18, 19, 20, 21, 22, 23, 24,
                                                   25, 26, 27, 28,

In [15]:
y_test_pred = testing_pipeline.predict(X_test[:,2:])

print(confusion_matrix(y_test, y_test_pred))

print(classification_report(y_test, y_test_pred, target_names=SLEEP_STAGES_VALUES.keys()))

print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_test_pred))

[[1490    9    7   29   89]
 [ 317  145  319    6  196]
 [  80   84 3163  193   83]
 [   4    0   46  561    0]
 [ 107  106  295    3  791]]
              precision    recall  f1-score   support

           W       0.75      0.92      0.82      1624
          N1       0.42      0.15      0.22       983
          N2       0.83      0.88      0.85      3603
          N3       0.71      0.92      0.80       611
         REM       0.68      0.61      0.64      1302

    accuracy                           0.76      8123
   macro avg       0.68      0.69      0.67      8123
weighted avg       0.73      0.76      0.73      8123

Agreement score (Cohen Kappa):  0.6561374430741804


### Test results
___

#### a) With LDA and tuning (metric=manhattan and n_neighbors=200)
___

```
              precision    recall  f1-score   support

           W       0.75      0.92      0.83      1624
          N1       0.44      0.15      0.23       983
          N2       0.83      0.87      0.85      3603
          N3       0.70      0.92      0.80       611
         REM       0.68      0.61      0.64      1302

    accuracy                           0.76      8123
   macro avg       0.68      0.70      0.67      8123
weighted avg       0.73      0.76      0.73      8123

Agreement score (Cohen Kappa):  0.656774449223104
```

#### b) With LDA and tuning (metric=chebyshev and n_neighbors=300)

```
              precision    recall  f1-score   support

           W       0.75      0.92      0.82      1624
          N1       0.43      0.15      0.22       983
          N2       0.83      0.88      0.85      3603
          N3       0.71      0.92      0.80       611
         REM       0.68      0.61      0.64      1302

    accuracy                           0.76      8123
   macro avg       0.68      0.69      0.67      8123
weighted avg       0.73      0.76      0.73      8123

Agreement score (Cohen Kappa):  0.6566457163531443
```

## Saving trained model
___

In [16]:
SAVED_DIR = "../trained_model"

if not os.path.exists(SAVED_DIR):
    os.mkdir(SAVED_DIR);

if USE_CONTINUOUS_AGE: 
    joblib.dump(testing_pipeline, f"{SAVED_DIR}/classifier_KNN_age_continuous.joblib")
else:
    joblib.dump(testing_pipeline, f"{SAVED_DIR}/classifier_KNN.joblib")