# Task 4: RNN Based Transfer Learning 
--------------------------------------------------
This notebook is comprised of the transfer learning (Task 4) part of the project. Two best performing RNN based models, which are Bidirectional LSTM and ConvLSTM, are selected and both models are trained on the larger MIT-BIH dataset. Then, their final fully connected layer is changed to convert them into binary classifiers intended to work for the smaller PTB dataset. Two approaches of transfer learning are applied to both models. 

- For the first approach, the pre-trained weights are leveraged without freezing any layers so the entire model is tuned again by training on the PTB dataset.

- In the second approach, most of the model layers are kept frozen and only the fully connected layers at the end are tuned on the PTB dataset.  

- At the bottom of the notebook, for both approaches and both model types, a 5-fold cross validation grid search is carried out to identify the best performing hyperparameter configuration. In the cells at the beginning of the notebook, corresponding optimal hyperparameters are used.


# Results

### ConvLSTM model
#### Without Layer Freezing:

Test f1 score : 0.9928571428571429 

Test accuracy score : 0.9896942631398145 

Test AUROC score : 0.9879216215294399 

Test AUPRC score : 0.9916061177955566

#### With Layer Freezing:
Test f1 score : 0.9025641025641027 

Test accuracy score : 0.8564067330814154 

Test AUROC score : 0.8047659595487964 

Test AUPRC score : 0.8719745378050834 

----------------------------------------------------------------

### Bidirectional LSTM model
#### Without Layer Freezing:

Test f1 score : 0.9762357414448669 

Test accuracy score : 0.9656475437993817 

Test AUROC score : 0.9564438600473502 

Test AUPRC score : 0.969526262999743 


#### With Layer Freezing:

Test f1 score : 0.9054986907879077 

Test accuracy score : 0.8636207488835451 

Test AUROC score : 0.8306710073048331 

Test AUPRC score : 0.8886332095514813

### Overall, transfer learning without layer freezing performed better for both models.


# Implementation

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras import losses

import pickle
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, average_precision_score

import model_helper

import sys
sys.path.append("../")
from models import *


# To ensure reproducable results: 
from numpy.random import seed
seed(1)

In [None]:
#Looking for GPU for speed-up
device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
  device_name = "/cpu:0"
print('Found device at: {}'.format(device_name))

# MIT-BIH Arryhtmia Database
------------------------------------------

In [None]:
df_train = pd.read_csv("../input/mitbih_train.csv", header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv("../input/mitbih_test.csv", header=None)

Y_mitbih = np.array(df_train[187].values).astype(np.int8)
X_mitbih = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test_mitbih = np.array(df_test[187].values).astype(np.int8)
X_test_mitbih = np.array(df_test[list(range(187))].values)[..., np.newaxis]

# PTB Diagonstic ECG Database
------------------------------------------

In [None]:
df_1 = pd.read_csv("../input/ptbdb_normal.csv", header=None)
df_2 = pd.read_csv("../input/ptbdb_abnormal.csv", header=None)
df = pd.concat([df_1, df_2])

df_train, df_test = train_test_split(df, test_size=0.2, random_state=1337, stratify=df[187])


Y_ptbdb = np.array(df_train[187].values).astype(np.int8)
X_ptbdb = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test_ptbdb = np.array(df_test[187].values).astype(np.int8)
X_test_ptbdb = np.array(df_test[list(range(187))].values)[..., np.newaxis]

# Transfer Learning for ConvLSTM model
------------------------------------------------------------
## Without Freezing Layers

In [None]:
from_file = True
file_path = "Results/final_cnn_lstm_mitbih.h5"
new_save_path = "Results/transfer_cnn_lstm.h5"

#first loading weights of / training the ConvLSTM for the larger (MITBIH) dataset

if from_file:
    model = keras.models.load_model(file_path)
    print("Trained model weights loaded.")
    
# if no save file is available, trainin on MIT dataset first
else:
    
    print("Training on MIT Dataset:")
    
    with tf.device(device_name):
    
        #callbacks to stop or change learning rate when held out validation set loss 
        #stops improving, patience selected high due to instability of RNNs
        early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
        redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
        if file_path:
            checkpoint = ModelCheckpoint(filepath=file_path, monitor='val_loss', verbose=1, save_best_only=True) 
            callbacks_list = [checkpoint, early, redonplat] 
        else:
            callbacks_list = [early, redonplat] 
    
        #creating and trainin model
        model = ConvLSTM( input_length=X_ptbdb.shape[1], num_units=150, num_conv=2, num_dense = 2,
                           num_classes=2, dropout=0.5, optimizer="adam",callbacks= callbacks_list,  lr=0.001)

        model = model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                     binary_task=False)
    
    print("Training over.")
    
print("Proceeding to transfer learning on PTB Dataset:")

#adding the new final fully connected layer
if isinstance(model, tf.keras.Sequential):
    new_model= keras.models.Sequential(model.layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2"))
else:
    new_model= keras.models.Sequential(model.model().layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2")) 

        
with tf.device(device_name):
            
    # using lower learning rate to allow for more careful fine tuning
    new_model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.001), 
                          metrics=['accuracy', keras.metrics.AUC()])

    #fine tuning on PTB dataset
    transfer_model = model_helper.train_test_model( new_model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb, epochs=50,
                                                     binary_task=True, save_name = new_save_path)


## With Freezing Layers

In [None]:
from_file = True
file_path = "Results/final_cnn_lstm_mitbih.h5"
new_save_path = "Results/transfer_cnn_lstm_frozen.h5"

#first loading weights of / training the ConvLSTM for the larger (MITBIH) dataset

if from_file:
    model = keras.models.load_model(file_path)
    print("Trained model weights loaded.")
# if no save file is available, trainin on MIT dataset first   
else:
    
    print("Training on MIT Dataset:")
    
    with tf.device(device_name):
    
        #callbacks to stop or change learning rate when held out validation set loss 
        #stops improving, patience selected high due to instability of RNNs
        early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
        redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
        if save_name:
            checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
            callbacks_list = [checkpoint, early, redonplat] 
        else:
            callbacks_list = [early, redonplat] 
    
        #creating and trainin model
        model = ConvLSTM( input_length=X_ptbdb.shape[1], num_units=150, num_conv=2, num_dense = 2,
                          classes=2, dropout=0.5, optimizer="adam", callbacks= callbacks_list,lr=0.001)

        model = model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                     binary_task=False)
    
    print("Training over.")
    
print("Proceeding to transfer learning on PTB Dataset:")

#adding the new final fully connected layer
if isinstance(model, tf.keras.Sequential):
    new_model= keras.models.Sequential(model.layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2"))
else:
    new_model= keras.models.Sequential(model.model().layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2")) 

#freezing the layers before the final 2 fully connected layers
for layer in new_model.layers[:-2]:
        layer.trainable = False
        
with tf.device(device_name):
    
    # using lower learning rate to allow for more careful fine tuning
    new_model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.001), 
                          metrics=['accuracy', keras.metrics.AUC()])

    #fine tuning on PTB dataset
    transfer_model = model_helper.train_test_model( new_model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb, epochs=100,
                                                     binary_task=True, save_name = new_save_path)

# Transfer Learning for Bidirectional LSTM model
------------------------------------------------------------
## Without Freezing Layers

In [None]:
from_file = True
file_path = "Results/final_bdlstm_mitbih.h5"
new_save_path = "Results/transfer_bdlstm.h5"

#first loading weights of / training the Bidirectional LSTM for the larger (MITBIH) dataset

if from_file:
    model = keras.models.load_model(file_path)
    print("Trained model weights loaded.")
# if no save file is available, trainin on MIT dataset first    
else:
    
    print("Training on MIT Dataset:")
    
    with tf.device(device_name):
    
        #callbacks to stop or change learning rate when held out validation set loss 
        #stops improving, patience selected high due to instability of RNNs
        early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
        redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
        if save_name:
            checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
            callbacks_list = [checkpoint, early, redonplat] 
        else:
            callbacks_list = [early, redonplat] 
    
        #creating and trainin model
        model = BiDirLSTM( input_length=X_mitbih.shape[1], num_units=100, classes=5, num_cells = 2, 
                        num_dense = 2, dropout=0, optimizer="adam", callbacks= callbacks_list,lr=0.0001)

        model = model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                     binary_task=False)
    
    print("Training over.")
    
print("Proceeding to transfer learning on PTB Dataset:")

#adding the new final fully connected layer
if isinstance(model, tf.keras.Sequential):
    new_model= keras.models.Sequential(model.layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2"))
else:
    new_model= keras.models.Sequential(model.model().layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2")) 


        
with tf.device(device_name):
    
    # using lower learning rate to allow for more careful fine tuning
    new_model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
                          metrics=['accuracy', keras.metrics.AUC()])

    #fine tuning on PTB dataset
    transfer_model = model_helper.train_test_model( new_model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb, epochs=50,
                                                     binary_task=True, save_name = new_save_path)


## With Freezing Layers

In [None]:
from_file = True
file_path = "Results/final_bdlstm_mitbih.h5"
new_save_path = "Results/transfer_bdlstm_frozen.h5"

#first loading weights of / training the Bidirectional LSTM for the larger (MITBIH) dataset

if from_file:
    model = keras.models.load_model(file_path)
    print("Trained model weights loaded.")
# if no save file is available, trainin on MIT dataset first    
else:
    
    print("Training on MIT Dataset:")
    
    with tf.device(device_name):
    
        #callbacks to stop or change learning rate when held out validation set loss 
        #stops improving, patience selected high due to instability of RNNs
        early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
        redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
        if save_name:
            checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
            callbacks_list = [checkpoint, early, redonplat] 
        else:
            callbacks_list = [early, redonplat] 
    
        #creating and trainin model
        model = BiDirLSTM( input_length=X_mitbih.shape[1], num_units=100, classes=5, num_cells = 2, 
                        num_dense = 2, dropout=0, optimizer="adam", callbacks= callbacks_list, lr=0.0001)

        model = model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                     binary_task=False)
    
    print("Training over.")
    
print("Proceeding to transfer learning on PTB Dataset:")

#adding the new final fully connected layer
if isinstance(model, tf.keras.Sequential):
    new_model= keras.models.Sequential(model.layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2"))
else:
    new_model= keras.models.Sequential(model.model().layers[:-1])
    new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2")) 

#in this version, we will not freeze layers
for layer in new_model.layers[:-2]:
        layer.trainable = False
        
with tf.device(device_name):
    
    # using lower learning rate to allow for more careful fine tuning
    new_model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
                          metrics=['accuracy', keras.metrics.AUC()])

    #fine tuning on PTB dataset
    transfer_model = model_helper.train_test_model( new_model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb, epochs=100,
                                                     binary_task=True, save_name = new_save_path)

## Hyperparameter Search for Transfer Learning Models

In [None]:
def transfer_grid_search( model = "bdlstm", frozen=True):
    """Performs hyperparameter grid search with 5-fold corss validation

    Keyword arguments:
    model -- chooses between Bidirectional LSTM (bdlstm) and ConvLSTM (default bdlstm)
    frozen -- whether to freeze layers before 
              the fully conencted layers at the end (default True)
    """

    #optimizer and other unit number options were discarded as grid search takes too long
    learning_rates = [0.0001, 0.001, 0.000001]
    batch = 200
    epochs = 50
    optimizers = ["adam", "rmsprop"]

    file_path_cnnlstm = "Results/final_cnn_lstm_mitbih.h5"
    file_path_bdlstm = "Results/final_bdlstm_mitbih.h5"

    opt_params = {}
    best_AUC = 0
    best_acc = 0
    scores = []
    
    (X,Y) = (X_ptbdb, Y_ptbdb) 

    for optim in optimizers:
        for lr in learning_rates:
                        print("---------------------------------------------------")
                        print("Params to evaluate:")
                        print("LR: ",lr, " | Optim: ",optim)

                        scores = []

                        for train, val in KFold(n_splits=5, shuffle=True).split(X,Y):
                            
                                print("Fold - ", len(scores)+1)

                                #callbacks enable early stopping and learning rate reduction
                                #depending on validation loss
                                early = EarlyStopping(monitor="val_accuracy", mode="max", patience=10, verbose=1)
                                redonplat = ReduceLROnPlateau(monitor="val_accuracy", mode="max", patience=7, verbose=1)
                                callbacks_list = [early, redonplat]
                                
                                with tf.device(device_name):
                                    
                                
                                    if model == "bdlstm":
                                        model = keras.models.load_model(file_path_bdlstm)
                                    else:
                                        model = keras.models.load_model(file_path_cnnlstm)

                                    print("Trained model weights loaded.")


                                    #adding the new final fully connected layer
                                    if isinstance(model, tf.keras.Sequential):
                                        new_model= keras.models.Sequential(model.layers[:-1])
                                        new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2"))
                                    else:
                                        new_model= keras.models.Sequential(model.model().layers[:-1])
                                        new_model.add(keras.layers.Dense(1, activation="sigmoid", name="dense_2")) 

                                    #freezing the layers before the final 2 fully connected layers
                                    if frozen:
                                        for layer in new_model.layers[:-2]:
                                                layer.trainable = False

                      
                                    # using lower learning rate to allow for more careful fine tuning
                                    new_model.compile(loss='binary_crossentropy', optimizer=optim,
                                                          metrics=['accuracy', keras.metrics.AUC()])

                                    # training the model
                                    new_model.fit(X[train], Y[train], epochs=epochs, batch_size=batch, 
                                            verbose=0, callbacks=callbacks_list, validation_data = (X[val],Y[val]) )

                                    # evaluate validation set
                                    scores.append(new_model.score(X[val],Y[val])) 
                                
                                print("Fold Accuracy: ", scores[-1])

                        avg_acc = np.asarray(scores).mean()

                        print("-------------------------- RESULTS -------------------------- ")

                        print("average 5-fold cross val accuracy: ", avg_acc)

                        if avg_acc > best_acc:
                            best_acc = avg_acc
                            opt_params["optim"] = optim
                            opt_params["lr"] = lr

                            
    print("---------------------------------------------------")
    print(" ")
    print("Best Params:")
    print("LR: ",opt_params["lr"], " | Optim: ", opt_params["optim"])
    print(" ")
    print("Best Accuracy: ", best_acc)

    return opt_params

#### Grid Search For Bidirectional LSTM without Layer Freezing

In [None]:
transfer_grid_search( model = "bdlstm", frozen=False)

#### Grid Search For Bidirectional LSTM with Layer Freezing

In [None]:
transfer_grid_search( model = "bdlstm", frozen=True)

#### Grid Search For ConvLSTM without Layer Freezing

In [None]:
transfer_grid_search( model = "convlstm", frozen=False)

#### Grid Search For ConvLSTM with Layer Freezing

In [None]:
transfer_grid_search( model = "convlstm", frozen=True)