# Save & reload trained model with ONNX
___

This notebook aims to save, reload and check if the model can be correctly serialized through ONNX, and the Scikit-learn ONNX package.

In [1]:
%matplotlib inline

import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import (RandomForestClassifier,
                              VotingClassifier)
from sklearn.metrics import (confusion_matrix,
                             classification_report,
                             cohen_kappa_score)
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.helpers.onnx_helper import save_onnx_model
from onnxruntime import InferenceSession

from models.model_utils import (train_test_split_according_to_age)
from constants import (SLEEP_STAGES_VALUES,)

## Generate trained pipeline
____

In [2]:
SUBJECT_IDX = 0 
NIGHT_IDX = 1
USE_CONTINUOUS_AGE = False
DOWNSIZE_SET = False
TEST_SET_SUBJECTS = [0.0, 24.0, 49.0, 71.0]

In [3]:
def load_features():
    if USE_CONTINUOUS_AGE:
        X_file_name = "data/x_features-age-continuous.npy"
        y_file_name = "data/y_observations-age-continuous.npy"
    else:
        X_file_name = "data/x_features.npy"
        y_file_name = "data/y_observations.npy"

    X_init = np.load(X_file_name, allow_pickle=True)
    y_init = np.load(y_file_name, allow_pickle=True)

    X_init = np.vstack(X_init)
    y_init = np.hstack(y_init)

    print(X_init.shape)
    print(y_init.shape)
    print("Number of subjects: ", np.unique(X_init[:,SUBJECT_IDX]).shape[0]) # Some subject indexes are skipped, thus total number is below 83 (as we can see in https://physionet.org/content/sleep-edfx/1.0.0/)
    print("Number of nights: ", len(np.unique([f"{int(x[0])}-{int(x[1])}" for x in X_init[:,SUBJECT_IDX:NIGHT_IDX+1]])))
    print("Subjects available: ", np.unique(X_init[:,SUBJECT_IDX]))
    
    return X_init, y_init

def split_data(X_init, y_init):
    X_test, X_train_valid, y_test, y_train_valid = train_test_split_according_to_age(
        X_init,
        y_init,
        use_continuous_age=USE_CONTINUOUS_AGE,
        subjects_test=TEST_SET_SUBJECTS)
    
    print(X_test.shape, X_train_valid.shape, y_test.shape, y_train_valid.shape)
    
    return X_test, X_train_valid, y_test, y_train_valid

X_init, y_init = load_features()
X_test, X_train_valid, y_test, y_train_valid = split_data(X_init, y_init)

(168954, 50)
(168954,)
Number of subjects:  78
Number of nights:  153
Subjects available:  [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.
 36. 37. 38. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54.
 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 70. 71. 72. 73. 74.
 75. 76. 77. 80. 81. 82.]
Selected subjects for the test set are:  [0.0, 24.0, 49.0, 71.0]
(8123, 50) (160831, 50) (8123,) (160831,)


In [4]:
def get_voting_classifier_pipeline():
    NB_CATEGORICAL_FEATURES = 2
    NB_FEATURES = 48

    estimator_list = [
        ('random_forest', RandomForestClassifier(
            random_state=42, # enables deterministic behaviour
            n_jobs=-1
        )),
        ('knn', Pipeline([
            ('knn_dim_red', LinearDiscriminantAnalysis()),
            ('knn_clf', KNeighborsClassifier(
                weights='uniform',
                n_neighbors=300,
                leaf_size=100,
                metric='chebyshev',
                n_jobs=-1
            ))
        ])),
    ]
        
    return Pipeline([
        ('scaling', ColumnTransformer([
            ('pass-through-categorical', 'passthrough', list(range(NB_CATEGORICAL_FEATURES))),
            ('scaling-continuous', StandardScaler(copy=False), list(range(NB_CATEGORICAL_FEATURES,NB_FEATURES)))
        ])),
        ('voting_clf', VotingClassifier(
            estimators=estimator_list,
            voting='soft',
            weights=np.array([0.83756205, 0.16243795]),
            flatten_transform=False,
            n_jobs=-1,
        ))
    ])

testing_pipeline = get_voting_classifier_pipeline()
testing_pipeline.fit(X_train_valid[:, 2:], y_train_valid)
y_test_pred = testing_pipeline.predict(X_test[:,2:])

print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, target_names=SLEEP_STAGES_VALUES.keys()))
print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_test_pred))

[[1522   52    2    4   44]
 [ 240  145  325    1  272]
 [  38   57 3210  188  110]
 [   4    0   31  576    0]
 [  57   83  264    0  898]]
              precision    recall  f1-score   support

           W       0.82      0.94      0.87      1624
          N1       0.43      0.15      0.22       983
          N2       0.84      0.89      0.86      3603
          N3       0.75      0.94      0.83       611
         REM       0.68      0.69      0.68      1302

    accuracy                           0.78      8123
   macro avg       0.70      0.72      0.70      8123
weighted avg       0.75      0.78      0.76      8123

Agreement score (Cohen Kappa):  0.6913101923642638


## Saving pipeline to ONNX
___

In [6]:
onnx_pipeline = convert_sklearn(
    testing_pipeline,
    initial_types=[(
        'float_input',
        FloatTensorType([None, X_train_valid[:,2:].shape[1]])
    )]
)

KeyError: <class 'sklearn.pipeline.Pipeline'>

We can see the Voting classifier conversion do not currently support a Pipeline typed estimator in its estimators list.

Considering that;
- the option of adding a pipeline as an estimator in the voting classifier is not supported.
- the size of a KNearestNeighbor classifier would be too big without its LDA, and that the performance of the RandomForest would be significantly decreased with an LDA beforehand.
- the voting classifier had a Cohen Kappa agreement's score of 0.6913 on the testing set, whilst we obtained 0.6916 with the fat Random Forest, and we obtained 0.6879 with the skinny RF.
- the voting classifier had a Cohen Kappa agreement's score of 0.62 ± 0.043 on the validation set, whilst we obtained [TO DE DEFINED] with the fat Random Forest, and we obtained 0.62 ± 0.043 with the skinny RF (where the validation set is a CV of 5 partitions and considering subjects)
- the size of the small RF is 322.8 Mbytes
- the size of the fat RF is 1.91 Gbytes
- the size of the voting classifier is 376.8 Mbytes

We have decided to temporaly choose to use the skinny random forest, as its performance is quite similar to the voting classifier's and the fat random forest's.

## Generate RF trained pipeline
___

In [5]:
%%time

def get_random_forest_model():
    NB_CATEGORICAL_FEATURES = 2
    NB_FEATURES = 48
    
    return Pipeline([
        ('scaling', ColumnTransformer([
            ('pass-through-categorical', 'passthrough', list(range(NB_CATEGORICAL_FEATURES))),
            ('scaling-continuous', StandardScaler(copy=False), list(range(NB_CATEGORICAL_FEATURES,NB_FEATURES)))
        ])),
        ('classifier', RandomForestClassifier(
            n_estimators=100,
            max_depth=24,
            random_state=42, # enables deterministic behaviour
            n_jobs=-1
        ))
    ])

testing_pipeline = get_random_forest_model()
testing_pipeline.fit(X_train_valid[:, 2:], y_train_valid)
y_test_pred = testing_pipeline.predict(X_test[:,2:])

print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, target_names=SLEEP_STAGES_VALUES.keys()))
print("Agreement score (Cohen Kappa): ", cohen_kappa_score(y_test, y_test_pred))

[[1512   65    3    3   41]
 [ 220  147  332    0  284]
 [  39   45 3212  194  113]
 [   4    0   32  575    0]
 [  49   81  284    0  888]]
              precision    recall  f1-score   support

           W       0.83      0.93      0.88      1624
          N1       0.43      0.15      0.22       983
          N2       0.83      0.89      0.86      3603
          N3       0.74      0.94      0.83       611
         REM       0.67      0.68      0.68      1302

    accuracy                           0.78      8123
   macro avg       0.70      0.72      0.69      8123
weighted avg       0.75      0.78      0.75      8123

Agreement score (Cohen Kappa):  0.6879671218212182
CPU times: user 3min 41s, sys: 2.5 s, total: 3min 43s
Wall time: 1min 49s


## Saving with ONNX
____

In [16]:
onnx_pipeline = convert_sklearn(
    testing_pipeline,
    initial_types=[(
        'float_input',
        FloatTensorType([None, X_train_valid[:,2:].shape[1]])
    )]
)

In [None]:
save_onnx_model(onnx_pipeline, 'trained_model/rf_pipeline.onnx')

## Comparing ONNX pipeline vs normal pipeline results
____

In [7]:
sess = InferenceSession('trained_model/rf_pipeline.onnx')

In [8]:
y_test_pred_onnx = sess.run(None, {'float_input': X_test[:,2:].astype(np.float32)})[0]

In [9]:
sum(~(y_test_pred_onnx == y_test_pred))/len(y_test_pred)

0.000738643358365136

## ONNX Pipepline drawing

In [27]:
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
pydot_graph = GetPydotGraph(onnx_pipeline.graph, name=onnx_pipeline.graph.name, rankdir="TP",
                            node_producer=GetOpNodeProducer("docstring"))
pydot_graph.write_dot("graph.dot")

import os
os.system('dot -O -V -Tpng graph.dot')

0