## This notebook provides, training, saving and evaluation for bi-LSTM, residual CNNs and tree based classifiers

In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from sklearn.ensemble import RandomForestClassifier, VotingClassifier


## Choose the dataset

In [None]:
# DATASET = "MITBIH"
DATASET = "PTBDB"

if DATASET == "MITBIH":
    N_CLASSES = 5
else:
    N_CLASSES = 2
    

## Imports

In [None]:
import torch
import numpy as np

import skorch
from skorch.callbacks import LRScheduler, EarlyStopping, Checkpoint

from torch.optim.lr_scheduler import ReduceLROnPlateau

import lightgbm as lgb
from xgboost.sklearn import XGBClassifier

from copy import deepcopy


from src.data_loading import load_data_mitbih, load_data_ptbdb
from src.data_preprocessing import preprocess_x_pytorch, preprocess_y_pytorch
from src.metrics_utils import compute_metrics, compute_metrics_from_keras, skorch_f1_score, sklearn_f1_score
from src.cnn_models.cnn import CNN
from src.skorch_utils import get_neural_net_classifier, get_class_weights
from src.json_utils import serialize_tensors, save_file
from src.tree_models_io_utils import *

torch.manual_seed(0)
np.random.seed(0)


## CNN Models Section

### Data Loading

In [None]:
if N_CLASSES == 5:
    (x, y), (xtest, ytest) = load_data_mitbih()
else:
    (x, y), (xtest, ytest) = load_data_ptbdb()

(x_orig, y_orig), (xtest_orig, ytest_orig) = (deepcopy(x), deepcopy(y)), (deepcopy(xtest), deepcopy(ytest))
    
print(x.shape)
print(np.unique(y))
assert np.array_equal(np.unique(y), np.unique(ytest))


In [None]:
x, xtest = preprocess_x_pytorch(x), preprocess_x_pytorch(xtest)
y, ytest = preprocess_y_pytorch(y), preprocess_y_pytorch(ytest)


### Train & Save CNN model

In [None]:
# load best hyperparameters found for the residual CNN for the MITBIH dataset
if DATASET == "MITBIH":
    params = {
        'module__n_filters': [64, 96, 96, 96, 128],
        'module__strides': [2, 1, 1, 1, 1],
        'module__kernel_sizes': [13, 9, 9, 9, 7],
        'module__fully_connected_features': 64,
        'module__adaptive_average_len': 8,
        'module__residual': True,
        'lr': 0.0002,
        'iterator_train__batch_size': 256,
        'criterion__weight': torch.Tensor([1., 1., 1., 1., 1.])
    }
else:
    params = {
        'module__n_filters': [32, 48, 48, 48, 64],
        'module__strides': [2, 1, 1, 1, 1],
        'module__kernel_sizes': [9, 7, 7, 7, 5],
        'module__fully_connected_features': 128,
        'module__adaptive_average_len': 8,
        'module__residual': True,
        'lr': 0.0008,
        'iterator_train__batch_size': 256,
        'criterion__weight': torch.Tensor([1., 1.])
    }
    
# need to save these params to be able to load the model later
save_file("CnnResidual" + "_" + DATASET + "/params.json", serialize_tensors(params))
    
# callbacks necessary for training
early_stopping_cb = EarlyStopping(patience=25, monitor="skorch_f1_score", lower_is_better=False)
lr_scheduler_cb = LRScheduler(policy=ReduceLROnPlateau, min_lr=0.000001, patience=2, verbose=True)

# callback for printing f1 score on validation set during fitting
macro_f1_cb = skorch.callbacks.EpochScoring(scoring=skorch_f1_score, lower_is_better=False)

# callback for saving the best model according to validation f1 score
cp_cb = Checkpoint(dirname="CnnResidual" + "_" + DATASET, monitor="skorch_f1_score_best")

net = get_neural_net_classifier(module=CNN, n_classes=N_CLASSES, callbacks=[macro_f1_cb, lr_scheduler_cb, early_stopping_cb, cp_cb], params=params)
net.fit(x, y)


### Evaluate CNN Model

In [None]:
y_proba = net.predict_proba(xtest)
compute_metrics(ytest, y_proba, name="Residual_CNN")


### Tree Algorithms

We load the data again in order to adapt it to the required format for the tree algorithms.

In [None]:
#function needed to reshape input
def convert3Dto2D(x):
    return x.reshape(-1, x.shape[1])


In [None]:
#function to retreive parameter set per model with some default values
def get_params_for_trees(model_name):
    if model_name == 'rf':
        return {'mitbih': {'n_estimators':400, 'max_depth':15, 'criterion':'gini', 'random_state':0 },
                'ptbdb': {'n_estimators':800, 'max_depth':12, 'criterion':'gini', 'random_state':0}}
    elif model_name == 'xgboost':
        return {'mitbih':{'learning_rate':0.1, 'n_estimators':400, 'max_depth':6 },
                'ptbdb':{'learning_rate':0.1, 'n_estimators':800, 'max_depth':10}}
    else: 
        return {'mitbih':{'learning_rate':0.1, 'random_state':0, 'max_depth':10},
                'ptbdb':{'learning_rate':0.1, 'random_state':0, 'max_depth':10 }}
    

In [None]:
#here we se up parameter values based on the dataset
if N_CLASSES == 5:
    (x, y), (xtest, ytest) = load_data_mitbih()
    parameters_rf = get_params_for_trees('rf')['mitbih']
    parameters_xg = get_params_for_trees('xgboost')['mitbih']
    parameters_lgbm = get_params_for_trees('lgbm')['mitbih']
else:
    (x, y), (xtest, ytest) = load_data_ptbdb()
    parameters_rf = get_params_for_trees('rf')['ptbdb']
    parameters_xg = get_params_for_trees('xgboost')['ptbdb']
    parameters_lgbm = get_params_for_trees('lgbm')['ptbdb']

print("Shape before adjustment: ",x.shape) #before shape adjustment
x_train = convert3Dto2D(x)
x_test = convert3Dto2D(xtest)
y_train = y
y_test = ytest
print("Shape of x_train after adjustment: ",x_train.shape)
print("Shape of x_test after adjustment: ",x_test.shape)


##### Random Forest (create and train the model)

In [None]:
#adjsut parameters based on the dataset that we 
rf_classifier = RandomForestClassifier(n_estimators = parameters_rf['n_estimators'], 
                                       max_depth = parameters_rf['max_depth'],
                                       criterion = parameters_rf['criterion'], 
                                       random_state = parameters_rf['random_state'])
rf_classifier.fit(x_train, y_train)


##### XGBoost (create and train the model)

In [None]:
xgb_classifier = XGBClassifier(learning_rate=parameters_xg['learning_rate'], 
                               n_estimators = parameters_xg['n_estimators'], 
                               max_depth = parameters_xg['max_depth'])
xgb_classifier.fit(x_train, y_train)


 ##### LightGBM (create and train the model)

In [None]:
lgb_classifier = lgb.LGBMClassifier(learning_rate=parameters_lgbm['learning_rate'],
                                           max_depth=parameters_lgbm['max_depth'],
                                           random_state=parameters_lgbm['random_state'])
lgb_classifier.fit(x_train,y_train,
                          eval_set=[(x_test,y_test),(x_train,y_train)],
                          eval_metric='logloss')


#### Model Evaluation

In [None]:
y_pred_rf = rf_classifier.predict_proba(x_test)
compute_metrics(y_test, y_pred_rf, name="Random Forrest")


In [None]:
y_pred_xg = xgb_classifier.predict_proba(x_test)
compute_metrics(y_test, y_pred_xg, name="XGBoost")


In [None]:
y_pred_lgbm = lgb_classifier.predict_proba(x_test)
compute_metrics(y_test, y_pred_lgbm, name="LightGBM")


#### Saving tree models

In [None]:
save_rf(rf_classifier, DATASET)


In [None]:
save_xgboost(xgb_classifier, DATASET)


In [None]:
save_lgbm(lgb_classifier, DATASET)


### Train RNN model

In [None]:
from tensorflow import keras
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Dense, Input, Dropout, LSTM, GRU, SimpleRNN, Bidirectional, Dropout
from src.rnn_models.rnn import get_rnn_model


In [None]:
model = get_rnn_model(DATASET, "advanced")

file_path = "advanced_rnn_"+DATASET+".h5"
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early = keras.callbacks.EarlyStopping(monitor="val_acc", mode="max", patience=5, verbose=1)
redonplat = ReduceLROnPlateau(monitor="val_acc", mode="max", patience=3, verbose=2)
callbacks_list = [checkpoint, early, redonplat]  # early

model.fit(x_orig, y_orig, epochs=1000, verbose=2, callbacks=callbacks_list, validation_split=0.1)


### Evaluate RNN model

In [None]:
model.load_weights(file_path)
y_proba = model.predict(xtest_orig)
compute_metrics_from_keras(ytest_orig, y_proba, name="Advanced_RNN")
