# Task 1 and 2 - RNN Based Models

This notebook includes the training and testing of the finalised RNN based models. All models except for LSTM converge, their hyperparameters are selected in the RNN_Models_Hyperparam_Search.ipynb notebook. 

## Model Comparison:
-------------------------------------------------------------------
### Vanilla RNN:
#### PTB:
Test f1 score : 0.9515545914678236  \
Test accuracy score : 0.9309515630367571  \
Test AUROC score : 0.9244353779260203  \
Test AUPRC score : 0.9495864255421855 

#### MIT:
Test f1 score : 0.8582  \
Test accuracy score : 0.9728 


-------------------------------------------------------------------
### LSTM: - almost only predicts majority class - no convergence
#### PTB:
Test f1 score : 0.8386195890684222  \
Test accuracy score : 0.7220886293369976  \
Test AUROC score : 0.5  \
Test AUPRC score : 0.7220886293369976  

#### MIT:
Test f1 score : 0.1863665513376111  \
Test accuracy score : 0.8282477617394483 

-------------------------------------------------------------------
### Bidirectional LSTM:
#### PTB:
Test f1 score : 0.9663291735342595 \
Test accuracy score : 0.9508759876331158\
Test AUROC score : 0.9306281968200277 \
Test AUPRC score : 0.9510640339196088

#### MIT:
Test f1 score : 0.8917068203921733  \
Test accuracy score : 0.978211218710031 

-------------------------------------------------------------------

### ConvLSTM:
#### PTB:
Test f1 score : 0.9827218934911243 \
Test accuracy score : 0.9749227069735487  \
Test AUROC score : 0.9647672062277494  \
Test AUPRC score : 0.9746978178291369 

#### MIT:
Test f1 score : 0.9076935070124723  \
Test accuracy score : 0.9828704549607162  


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras import backend as K

import pickle
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, average_precision_score
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau

import model_helper

import sys
sys.path.append("../")
from models import *


# To ensure reproducable results: 
from numpy.random import seed
seed(1)

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
  device_name = "/cpu:0"
print('Found device at: {}'.format(device_name))

# MIT-BIH Arryhtmia Database
------------------------------------------

In [None]:
df_train = pd.read_csv("../input/mitbih_train.csv", header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv("../input/mitbih_test.csv", header=None)

Y_mitbih = np.array(df_train[187].values).astype(np.int8)
X_mitbih = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test_mitbih = np.array(df_test[187].values).astype(np.int8)
X_test_mitbih = np.array(df_test[list(range(187))].values)[..., np.newaxis]

# PTB Diagonstic ECG Database
------------------------------------------

In [None]:
df_1 = pd.read_csv("../input/ptbdb_normal.csv", header=None)
df_2 = pd.read_csv("../input/ptbdb_abnormal.csv", header=None)
df = pd.concat([df_1, df_2])

df_train, df_test = train_test_split(df, test_size=0.2, random_state=1337, stratify=df[187])


Y_ptbdb = np.array(df_train[187].values).astype(np.int8)
X_ptbdb = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test_ptbdb = np.array(df_test[187].values).astype(np.int8)
X_test_ptbdb = np.array(df_test[list(range(187))].values)[..., np.newaxis]

-----------------------------------------------------
# Final Results

All final models incorporating a version of RNNs are trained and tested in the following sections to specify final performance metrics. Model hyperparameters were chosen through hyperparameter search as displayed in the notebook: RNN_Models_Hyperparam_Search.ipynb


# Vanilla RNN
***************************************
Since Vanilal RNN performance on validations sets during hyperparameter search varied greatly accross runs, we run the final Vanilla RNN models 5 times and obtain average evaluation metrics and their standard deviaton.

## PTB Results

In [None]:
save_name = "Results/final_vanilla_rnn_ptbdb.h5"

### To test the saved model file without training:

In [None]:
with tf.device(device_name):

    model = None

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb,
                                 binary_task=True, train=False, save_name = save_name)

### To train and test the model:

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model
    model = VanillaRNN( input_length=X_ptbdb.shape[1], num_units=150, classes=2,  
                                        num_cells = 1, dropout=0, optimizer="adam", callbacks= callbacks_list, lr=0.0001)

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb,
                                 binary_task=True, save_name = save_name)

## MIT Results

In [None]:
save_name = "Results/final_vanilla_rnn_mitbih.h5"

### To test the saved model file without training:

In [None]:
with tf.device(device_name):

    model = None

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, train=False, save_name = save_name)

### To train and test the model:

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model
    model = VanillaRNN( input_length=X_mitbih.shape[1], num_units=150, classes=5,  
                                        num_cells = 1, dropout=0.2, optimizer="adam",callbacks= callbacks_list, lr=5e-05)

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, save_name = save_name )


# LSTM - does not converge
***************************************

## PTB Results

In [None]:
save_name = "Results/final_lstm_ptbdb.h5"

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model

    model = VanillaLSTM( input_length=X_ptbdb.shape[1], num_units=150, classes=2, num_cells = 1, num_dense = 2,
                        dropout=0, optimizer="adam", callbacks= callbacks_list, lr=0.01)

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb, 
                                  binary_task=True, save_name = save_name)


## MIT Results

In [None]:
save_name = "Results/final_lstm_mitbih.h5"

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model

    model = VanillaLSTM( input_length=X_ptbdb.shape[1], num_units=150, classes=5, num_cells = 1, 
                        dropout=0, optimizer="adam", callbacks= callbacks_list, lr=0.0001)

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, save_name = save_name)


# Bidirectional LSTM 
***************************************

## PTB Results

In [None]:
save_name = "Results/final_bdlstm_ptbdb.h5"

### To test the saved model file without training:

In [None]:
with tf.device(device_name):

    model = None

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb,
                                 binary_task=True, train=False, save_name = save_name)

### To train and test the model:

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model
    model = BiDirLSTM( input_length=X_ptbdb.shape[1], num_units=100, classes=2, num_cells = 2, 
                        num_dense = 2, dropout=0, optimizer="adam", callbacks= callbacks_list,lr=0.0001)

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb,
                                 binary_task=True, save_name = save_name)

## MIT Results

In [None]:
save_name = "Results/final_bdlstm_mitbih.h5"

### To test the saved model file without training:

In [None]:
with tf.device(device_name):

    model = None

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, train=False,save_name = save_name)

### To train and test the model:

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model
    model = BiDirLSTM( input_length=X_mitbih.shape[1], num_units=100, classes=5, num_cells = 2, 
                        num_dense = 2, dropout=0, optimizer="adam",callbacks= callbacks_list, lr=0.0001)

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, save_name = save_name)


# ConvLSTM 
***************************************

## PTB Results

In [None]:
save_name = "Results/final_cnn_lstm_ptbdb.h5"

### To test the saved model file without training:

In [None]:
with tf.device(device_name):

    model = None

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb,
                                 binary_task=True, train=False, save_name = save_name)

### To train and test the model:

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model
    model = ConvLSTM( input_length=X_ptbdb.shape[1], num_units=150, num_conv=2, num_dense = 2,
                       classes=2, dropout=0.5, optimizer="adam",callbacks= callbacks_list, lr=0.001)

    model_helper.train_test_model( model, X_ptbdb, Y_ptbdb, X_test_ptbdb, Y_test_ptbdb,
                                 binary_task=True, save_name = save_name)
    

## MIT Results

In [None]:
save_name = "Results/final_cnn_lstm_mitbih.h5"

### To test the saved model file without training:

In [None]:
with tf.device(device_name):

    model = None

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, train=False, save_name = save_name)

### To train and test the model:

In [None]:
with tf.device(device_name):
    
    #callbacks to stop or change learning rate when held out validation set loss 
    #stops improving, patience selected high due to instability of RNNs
    early = EarlyStopping(monitor="val_loss", patience=15, verbose=1)
    redonplat = ReduceLROnPlateau(monitor="val_loss", patience=7, verbose=1)
    if save_name:
        checkpoint = ModelCheckpoint(filepath=save_name, monitor='val_loss', verbose=1, save_best_only=True) 
        callbacks_list = [checkpoint, early, redonplat] 
    else:
        callbacks_list = [early, redonplat] 
    
    #creating and trainin model
    model = ConvLSTM( input_length=X_mitbih.shape[1], num_units=150, num_conv=2, num_dense = 2,
                       classes=5, dropout=0.5, optimizer="adam", callbacks= callbacks_list,lr=0.001)

    model_helper.train_test_model( model, X_mitbih, Y_mitbih, X_test_mitbih, Y_test_mitbih,
                                 binary_task=False, save_name = save_name)