# Task 3 - Ensembles

This notebook provides training and evaluation for ensembles of tree algorithms and neural networks of previous tasks.

In [None]:
#this code is needed for COLAB run only ********
#COMMENT THIS OUT IF YOU WILL RUN IT LOCALLY AND DO NOT FOGET TO CHANGE PATH NAMES IN constants.py!!!!!!!

# ! [ ! -z "$COLAB_GPU" ] && pip install torch torchvision pillow==4.1.1 skorch
# from google.colab import drive
# drive.mount('/content/drive')
# path_to_module = '/content/drive/MyDrive/ML4HC/project1'
# import sys
# sys.path.append(path_to_module)


In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


### Choose the dataset

In [None]:
# DATASET = "MITBIH"
DATASET = "PTBDB"

if DATASET == "MITBIH":
    N_CLASSES = 5
else:
    N_CLASSES = 2
    

### Imports

In [None]:
import torch
import numpy as np

import skorch
from skorch.callbacks import LRScheduler, EarlyStopping, Checkpoint

from torch.optim.lr_scheduler import ReduceLROnPlateau


from src.data_loading import load_data_mitbih, load_data_ptbdb
from src.data_preprocessing import *
from src.metrics_utils import *
from src.cnn_models.cnn import CNN
from src.load_cnn import load_cnn_model
from src.skorch_utils import get_neural_net_classifier, get_class_weights
from src.ensembling import *
from src.tree_models_io_utils import *

torch.manual_seed(0)
np.random.seed(0)


### Data Loading

In [None]:
if N_CLASSES == 5:
    (x, y), (xtest, ytest) = load_data_mitbih()
else:
    (x, y), (xtest, ytest) = load_data_ptbdb()
    
print(x.shape)
print(np.unique(y))
assert np.array_equal(np.unique(y), np.unique(ytest))

In [None]:
#data preprocessing for tree algorithms
print("Shape before adjustment: ",x.shape) #before shape adjustment
x_train = convert3Dto2D(x)
x_test = convert3Dto2D(xtest)
y_train = y
y_test = ytest
print("Shape of x_train after adjustment: ",x_train.shape)
print("Shape of x_test after adjustment: ",x_test.shape)


### Create & Evaluate the ensemble models

#### Loading tree models (random forrest, xgboost and lightgbm from Task 2, saved in folder tree_models inside src directory)

In [None]:
rf_classifier = load_rf(DATASET)
xgb_classifier = load_xgboost(DATASET)
lgb_classifier = load_lgbm(DATASET)

list_of_models = [rf_classifier,xgb_classifier,lgb_classifier]


#### Check individual model's performance

In [None]:
compute_metrics(y_test, rf_classifier.predict_proba(x_test), name="Random Forrest")


In [None]:
compute_metrics(y_test, xgb_classifier.predict_proba(x_test), name="XGBoost")


In [None]:
compute_metrics(y_test, lgb_classifier.predict_proba(x_test), name="LightGBM")


#### Evaluate the performance

In [None]:
y_average_pred = get_ensemble_predictions(list_of_models, x_test, N_CLASSES)
y_weighted_pred = get_ensemble_predictions(list_of_models, x_test, N_CLASSES, weights=[0.10, 0.60, 0.30])
y_logreg_pred = get_logreg_ensemble_predictions(list_of_models = list_of_models, 
                                                dataset=x_train, 
                                                dataset_labels=y_train,
                                                test_set=x_test)


In [None]:
compute_metrics(y_test, y_average_pred, name="Average Ensemble Metrics")


In [None]:
compute_metrics(y_test, y_weighted_pred, name="Weighted Ensemble Metrics")


In [None]:
compute_metrics(y_test, y_logreg_pred, name="LogReg Ensemble Metrics")


### CNN Loading

In [None]:
# uncomment to use the pretrained net, otherwise it is expected that Task1 and Task2 have been successfuly run before
residual_net = "CnnResidual" # + "_pretrained"
vanilla_net = "CnnVanilla" # + "_pretrained"

residual_net = load_cnn_model(residual_net, DATASET, N_CLASSES)
vanilla_net = load_cnn_model(vanilla_net, DATASET, N_CLASSES)


### Check individual performance

In [None]:
# need to reshape for pytorch
xtrain_torch = preprocess_x_pytorch(x)
xtest_torch = preprocess_x_pytorch(xtest)


In [None]:
y_proba_residual = residual_net.predict_proba(xtest_torch)
compute_metrics(ytest, y_proba_residual, name="CnnResidual")


In [None]:
y_proba_vanilla = vanilla_net.predict_proba(xtest_torch)
compute_metrics(ytest, y_proba_vanilla, name="CnnVanilla")


### Ensemble CNNs

In [None]:
average_ensemble_preds = get_ensemble_predictions([residual_net, vanilla_net], xtest_torch, N_CLASSES)
compute_metrics(ytest, average_ensemble_preds, name="AverageCnnEnsemble")


In [None]:
weighted_ensemble_preds = get_ensemble_predictions([residual_net, vanilla_net], xtest_torch, N_CLASSES, weights=[0.7, 0.3])
compute_metrics(ytest, weighted_ensemble_preds, name="WeightedCnnEnsemble")


In [None]:
logreg_ensemble_preds = get_logreg_ensemble_predictions([residual_net, vanilla_net], xtrain_torch, y,xtest_torch)
compute_metrics(ytest, logreg_ensemble_preds, name="LogRegCnnEnsemble")
