# Hyperparam Grid Search for RNN Models
---------------------------------------------------------

In this notebook, hyperparameter grid search using 5-fold cross validation accuracy is carried out for all RNN based models. 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras import backend as K
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

import pickle
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, average_precision_score

import model_helper

import sys
sys.path.append("../")
from models import *


# To ensure reproducable results: 
from numpy.random import seed
seed(1)

In [None]:
#Looking for GPU for speed-up
device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
  device_name = "/cpu:0"
print('Found device at: {}'.format(device_name))

# MIT-BIH Arryhtmia Database
------------------------------------------

In [None]:
df_train = pd.read_csv("../input/mitbih_train.csv", header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv("../input/mitbih_test.csv", header=None)

Y_mitbih = np.array(df_train[187].values).astype(np.int8)
X_mitbih = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test_mitbih = np.array(df_test[187].values).astype(np.int8)
X_test_mitbih = np.array(df_test[list(range(187))].values)[..., np.newaxis]

# PTB Diagonstic ECG Database
------------------------------------------

In [None]:
df_1 = pd.read_csv("../input/ptbdb_normal.csv", header=None)
df_2 = pd.read_csv("../input/ptbdb_abnormal.csv", header=None)
df = pd.concat([df_1, df_2])

df_train, df_test = train_test_split(df, test_size=0.2, random_state=1337, stratify=df[187])


Y_ptbdb = np.array(df_train[187].values).astype(np.int8)
X_ptbdb = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test_ptbdb = np.array(df_test[187].values).astype(np.int8)
X_test_ptbdb = np.array(df_test[list(range(187))].values)[..., np.newaxis]


# Vanilla RNN
***************************************
Grid search code for both datasets and both Vanilla RNN and LSTM based models:

In [None]:
def rnn_grid_search(name="ptb", model = "vanilla"):

    #optimizer and other unit number options were discarded as grid search takes too long
    unit_nums = [ 100, 150]
    dropout_rate = [0,0.2, 0.4]
    learning_rates = [0.0001, 0.00005, 0.000075]
    batch = 200
    epochs = 100
    optim = "adam"

    opt_params = {}
    best_AUC = 0
    best_acc = 0
    scores = []
    
    (X,Y) = (X_ptbdb, Y_ptbdb) if name=="ptb" else (X_mitbih, Y_mitbih)

    for drop in dropout_rate:
        for units in unit_nums:
            for lr in learning_rates:
                        print("---------------------------------------------------")
                        print("Params to evaluate:")
                        print("LR: ",lr, " | Units: ", units, " | Dropout: ",drop, " | Optim: ",optim)

                        scores = []

                        for train, val in KFold(n_splits=5, shuffle=True).split(X,Y):
                            
                                print("Fold - ", len(scores)+1)

                                #callbacks enable early stopping and learning rate reduction
                                #depending on validation loss
                                early = EarlyStopping(monitor="val_accuracy", mode="max", patience=15, verbose=1)
                                redonplat = ReduceLROnPlateau(monitor="val_accuracy", mode="max", patience=10, verbose=1)
                                callbacks_list = [early, redonplat]
                                
                                with tf.device(device_name):
                                
                                    # initializing the model
                                    #depending on dataset, number of classes determined
                                    if model == "vanilla":
                                        if name == "ptb": 
                                            model = VanillaRNN(input_length=X[train].shape[1], 
                                                               num_units=units, classes=2, callbacks= callbacks_list,
                                                               optimizer = optim, dropout= drop, lr=lr)    
                                        else:
                                            model = VanillaRNN(input_length=X[train].shape[1], 
                                                               num_units=units, classes=5, callbacks= callbacks_list,
                                                               optimizer = optim, dropout= drop, lr=lr)  
                                    elif model == "lstm":
                                        if name == "ptb": 
                                            model = VanillaLSTM(input_length=X[train].shape[1], 
                                                                num_units=units, classes=2, callbacks= callbacks_list,
                                                                optimizer = optim, dropout= drop, lr=lr)    
                                        else:
                                            model = VanillaLSTM(input_length=X[train].shape[1], 
                                                                num_units=units, classes=5, callbacks= callbacks_list,
                                                                optimizer = optim, dropout= drop, lr=lr)  


                                    # training the model
                                    model.fit(X[train], Y[train], epochs=epochs, batch_size=batch, 
                                            verbose=0, validation_data = (X[val],Y[val]) )

                                    # evaluate validation set
                                    scores.append(model.score(X[val],Y[val])) 
                                
                                print("Fold Accuracy: ", scores[-1])

                        avg_acc = np.asarray(scores).mean()

                        print("-------------------------- RESULTS -------------------------- ")

                        print("average 5-fold cross val accuracy: ", avg_acc)

                        if avg_acc > best_acc:
                            best_acc = avg_acc
                            opt_params["units"] = units
                            opt_params["drop"] = drop
                            opt_params["lr"] = lr

                            
    print("---------------------------------------------------")
    print(" ")
    print("Best Params:")
    print("LR: ",opt_params["lr"], " | Units: ", opt_params["units"], " | Dropout: ",opt_params["drop"])
    print(" ")
    print("Best Accuracy: ", best_acc)

    return opt_params

## PTB Dataset Grid Search


In [None]:
vanill_ptb_opt_params = rnn_grid_search(name="ptb", model="vanilla")

## MIT Dataset Grid Search

In [None]:
vanill_mit_opt_params = rnn_grid_search(name="mit", model="vanilla")

# LSTM
***************************************
This model replaces the RNN cells in the Vanilla RNN model with LSTM cells. Calling the same function with new model:

## PTB Dataset Grid Search


In [None]:
lstm_ptb_opt_params = rnn_grid_search(name="ptb", model="lstm")

## MIT Dataset Grid Search

In [None]:
lstm_mit_opt_params = rnn_grid_search(name="mit", model="lstm")

# Bidirectional LSTM 
***************************************
In this model, the bidirectional version of the LSTM cell is used and combined with a variable number of dense layers. We expect this model to capture patters in the non-causal direction as well, potentially improving on the performance of the unidirectional version.

Grid search function for Bidirectional LSTM based Model
When fine tuning this model, instead of exploring regularization thorugh dropout, determining the optimal number of fully connected layers at the end as well as the number of sequential bidirectional LSTM cells is chosen as the method to change capacity.

In [None]:
def bidir_lstm_grid_search(name="ptb"):

    #optimizer and other unit number options were discarded as grid search takes too long
    unit_nums = [ 100, 150]
    learning_rates = [0.0001, 0.00005]
    num_dense = [2,4]
    num_cells = [2,4]
    
    batch = 200
    epochs = 100
    optim = "adam"
    drop = 0

    opt_params = {}
    best_AUC = 0
    best_acc = 0
    
    (X,Y) = (X_ptbdb, Y_ptbdb) if name=="ptb" else (X_mitbih, Y_mitbih)

    for n_dense in num_dense:
        for n_cells in num_cells:
            for units in unit_nums:
                for lr in learning_rates:
                        print("---------------------------------------------------")
                        print("Params to evaluate:")
                        print("n_dense: ",n_dense, " | n_cells: ", n_cells, " | units: ",units, " | lr: ",lr)

                        scores = []

                        for train, val in KFold(n_splits=5, shuffle=True).split(X,Y):
                            
                                print("Fold - ", len(scores)+1)

                                #callbacks enable early stopping and learning rate reduction
                                #depending on validation loss
                                early = EarlyStopping(monitor="val_accuracy", mode="max", patience=15, verbose=1)
                                redonplat = ReduceLROnPlateau(monitor="val_accuracy", mode="max", patience=10, verbose=1)
                                callbacks_list = [early, redonplat]
                                
                                with tf.device(device_name):
                                
                                    # initializing the model
                                    #depending on dataset, number of classes determined
                                    if name == "ptb": 
                                        model = BiDirLSTM(input_length=X[train].shape[1], 
                                                          num_units=units, classes=2, callbacks= callbacks_list,
                                                          num_cells = n_cells, num_dense = n_dense,
                                                          optimizer = optim, dropout= drop, lr=lr)   
                                    else:
                                        model = BiDirLSTM(input_length=X[train].shape[1], 
                                                          num_units=units, classes=5, callbacks= callbacks_list,
                                                          num_cells = n_cells, num_dense = n_dense,
                                                          optimizer = optim, dropout= drop, lr=lr)        

                                    # training the model
                                    model.fit(X[train], Y[train], epochs=epochs, batch_size=batch, 
                                            verbose=0, validation_data = (X[val],Y[val]) )

                                    # evaluate validation set
                                    scores.append(model.score(X[val],Y[val])) 
                                
                                print("Fold Accuracy: ", scores[-1])

                        avg_acc = np.asarray(scores).mean()

                        print("-------------------------- RESULTS -------------------------- ")

                        print("average 5-fold cross val accuracy: ", avg_acc)

                        if avg_acc > best_acc:
                            best_acc = avg_acc
                            opt_params["units"] = units
                            opt_params["lr"] = lr
                            opt_params["n_cells"] = n_cells
                            opt_params["n_dense"] = n_dense

                            
    print("---------------------------------------------------")
    print(" ")
    print("Best Params:")
    print("n_dense: ",opt_params["n_dense"], " | n_cells: ", opt_params["n_cells"], " | units: ",opt_params["units"], " | lr: ",opt_params["lr"])
    print(" ")
    print("Best Accuracy: ", best_acc)

    return opt_params

## PTB Dataset Grid Search

In [None]:
bidir_ptb_opt_params = bidir_lstm_grid_search(name="ptb")

## MIT Dataset Grid Search

In [None]:
bidir_mit_opt_params = bidir_lstm_grid_search(name="mit")

# ConvLSTM Model
***************************************
This model aims to combine the spacial feature extraction capability of convolution layers with the sequential nature of the LSTM cells. Therefore, the number of convolutional layers becomes a parameter to be tuned. To keep the number of tuned hyperparameters managable, dropout and optimizer are given as fixed.

In [None]:
def convlstm_grid_search(name="ptb"):

    #optimizer and other unit number options were discarded as grid search takes too long
    unit_nums = [ 100, 150]
    learning_rates = [0.001, 0.0001]
    num_dense = [2,4]
    num_conv = [2,4]
    
    batch = 200
    epochs = 100
    optim = "adam"
    drop = 0.5

    opt_params = {}
    best_AUC = 0
    best_acc = 0
    
    (X,Y) = (X_ptbdb, Y_ptbdb) if name=="ptb" else (X_mitbih, Y_mitbih)

    for n_dense in num_dense:
        for n_conv in num_conv:
            for units in unit_nums:
                for lr in learning_rates:
                        print("---------------------------------------------------")
                        print("Params to evaluate:")
                        print("n_dense: ",n_dense, " | n_conv: ", n_conv, " | units: ",units, " | lr: ",lr)

                        scores = []

                        for train, val in KFold(n_splits=5, shuffle=True).split(X,Y):
                            
                                print("Fold - ", len(scores)+1)

                                #callbacks enable early stopping and learning rate reduction
                                #depending on validation loss
                                early = EarlyStopping(monitor="val_accuracy", mode="max", patience=15, verbose=1)
                                redonplat = ReduceLROnPlateau(monitor="val_accuracy", mode="max", patience=10, verbose=1)
                                callbacks_list = [early, redonplat]
                                
                                with tf.device(device_name):
                                
                                    # initializing the model
                                    #depending on dataset, number of classes determined
                                    if name == "ptb": 
                                        model = ConvLSTM(input_length=X[train].shape[1], 
                                                         num_units=units, classes=2, callbacks= callbacks_list,
                                                         num_conv = n_conv, num_dense = n_dense,
                                                         optimizer = optim, dropout= drop, lr=lr)   
                                    else:
                                        model = ConvLSTM(input_length=X[train].shape[1], 
                                                         num_units=units, classes=5, callbacks= callbacks_list,
                                                         num_conv = n_conv, num_dense = n_dense,
                                                         optimizer = optim, dropout= drop, lr=lr) 

                                    # training the model
                                    model.fit(X[train], Y[train], epochs=epochs, batch_size=batch, 
                                            verbose=0, validation_data = (X[val],Y[val]) )

                                    # evaluate validation set
                                    scores.append(model.score(X[val],Y[val]))
                                
                                print("Fold Accuracy: ", scores[-1])

                        avg_acc = np.asarray(scores).mean()

                        print("-------------------------- RESULTS -------------------------- ")

                        print("average 5-fold cross val accuracy: ", avg_acc)

                        if avg_acc > best_acc:
                            best_acc = avg_acc
                            opt_params["units"] = units
                            opt_params["lr"] = lr
                            opt_params["n_conv"] = n_conv
                            opt_params["n_dense"] = n_dense

                            
    print("---------------------------------------------------")
    print(" ")
    print("Best Params:")
    print("n_dense: ",opt_params["n_dense"], " | n_conv: ", opt_params["n_conv"], " | units: ",opt_params["units"], " | lr: ",opt_params["lr"])
    print(" ")
    print("Best Accuracy: ", best_acc)

    return opt_params

## PTB Dataset Grid Search

In [None]:
convlstm_ptb_opt_params = convlstm_grid_search(name="ptb")

## MIT Dataset Grid Search

In [None]:
convlstm_mit_opt_params = convlstm_grid_search(name="mit")