In [6]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
import sklearn.decomposition as decomposition
from sklearn.manifold import TSNE
from sklearn.metrics import f1_score, accuracy_score, make_scorer
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV

import tensorflow as tf
from tensorflow.keras import Model, Input, optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, GRU, BatchNormalization
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
#%load_ext tensorboard
#from tensorboard.plugins.hparams import api as hp

import matplotlib.pyplot as plt
import seaborn as sb
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as mpatches

In [7]:
df_train = pd.read_csv("data/mitbih_train.csv", header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv("data/mitbih_test.csv", header=None)

Y = np.array(df_train[187].values).astype(np.int8)
X = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test = np.array(df_test[187].values).astype(np.int8)
X_test = np.array(df_test[list(range(187))].values)[..., np.newaxis]

n_class = np.unique(Y).size

X.shape, Y.shape

((87554, 187, 1), (87554,))

### Parameter search on RNN

In [8]:
def build_gru(n_class=5, dropout=0.3, rnn_sizes = [128, 128], fc_sizes=[64], batch_norm=True):
    nclass = 5
    model = Sequential()
    model.add(Input(shape=(187, 1)))
    
    if batch_norm:
        model.add(BatchNormalization())
        
    for index, dim in enumerate(rnn_sizes):
        model.add(GRU(dim, dropout=dropout, return_sequences=(index != len(rnn_sizes) - 1)))
        
        if batch_norm:
            model.add(BatchNormalization())
    
    for index, dim in enumerate(fc_sizes):
        model.add(Dense(dim, activation="relu"))
        model.add(Dropout(dropout))
        
        if batch_norm:
            model.add(BatchNormalization())
            
    model.add(Dense(nclass, activation="softmax"))

    return model

In [9]:
class CustomRNN(BaseEstimator):
    def fit(self, train_X, train_y, **kwargs):
        
        self.build_model()
        
        # early = EarlyStopping(monitor="val_accuracy", mode="max", patience=5, verbose=1)
        # redonplat = ReduceLROnPlateau(monitor="val_accuracy", mode="max", patience=3, verbose=2)
        # callbacks_list = [checkpoint, early, redonplat]  # early
        self.model.fit(train_X, train_y, validation_split=0.1, epochs=self.epochs, batch_size=self.batch_size)
    
    def predict(self, eval_X):
        return np.argmax(self.model.predict(eval_X), axis=1)
    
    def set_params(self, epochs=100, 
                         batch_size=64, 
                         learning_rate=1e-3, 
                         dropout=0.3, 
                         rnn_sizes=[128, 128],
                         fc_sizes=[64],
                         batch_norm=True):
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.dropout = dropout
        self.rnn_sizes = rnn_sizes
        self.fc_sizes = fc_sizes
        self.batch_norm = batch_norm
                
        return self
    
    def score(self, eval_X, eval_y):
        predicted_y = np.argmax(self.model.predict(eval_X), axis=1)
        f1_score_ = f1_score(predicted_y, eval_y, average='macro')
        print("f1 score: ", f1_score_)
        return f1_score_
        
    def build_model(self):
        self.model = build_gru(n_class=5, dropout=self.dropout, 
                          rnn_sizes=self.rnn_sizes, fc_sizes=self.fc_sizes, batch_norm=self.batch_norm)
        opt = optimizers.Adam(self.learning_rate)
            
        self.model.compile(optimizer=opt, 
                      loss="sparse_categorical_crossentropy", 
                      metrics=['accuracy'])        

In [10]:
params = {
    'epochs': [2],
    'batch_size': [64],
    'learning_rate': [1e-3],
    'dropout': [0.2],
    'rnn_sizes': [[128, 128], [128, 128, 128]],
    'fc_sizes': [[64], [64, 64], [64, 32]],
    'batch_norm': [True, False]
}

dummy_params = {
    'epochs': [10],
    'batch_size': [128],
    'learning_rate': [1e-4],
    'dropout': [0.2],
    'rnn_sizes': [[128, 128]],
    'fc_sizes': [[64, 32]],
    'batch_norm': [False]
}

model = CustomRNN()
search = GridSearchCV(estimator=model, 
                      param_grid=dummy_params,
                      n_jobs=1,
                      cv=5,
                      return_train_score=True, 
                      refit=False, 
                      verbose=10,
                      error_score='raise')
best = search.fit(X[:, :, :], Y[:])
best.__dict__

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] batch_norm=False, batch_size=128, dropout=0.2, epochs=10, fc_sizes=[64, 32], learning_rate=0.0001, rnn_sizes=[128, 128] 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Train on 63038 samples, validate on 7005 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score:  0.19406401492751407
f1 score:  0.19105538931503693
[CV]  batch_norm=False, batch_size=128, dropout=0.2, epochs=10, fc_sizes=[64, 32], learning_rate=0.0001, rnn_sizes=[128, 128], score=(train=0.191, test=0.194), total= 2.2min
[CV] batch_norm=False, batch_size=128, dropout=0.2, epochs=10, fc_sizes=[64, 32], learning_rate=0.0001, rnn_sizes=[128, 128] 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.4min remaining:    0.0s


Train on 63038 samples, validate on 7005 samples
Epoch 1/10


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\User\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-36069508b718>", line 30, in <module>
    best = search.fit(X[:, :, :], Y[:])
  File "C:\Users\User\Anaconda3\envs\tf-gpu\lib\site-packages\sklearn\model_selection\_search.py", line 710, in fit
    self._run_search(evaluate_candidates)
  File "C:\Users\User\Anaconda3\envs\tf-gpu\lib\site-packages\sklearn\model_selection\_search.py", line 1151, in _run_search
    evaluate_candidates(ParameterGrid(self.param_grid))
  File "C:\Users\User\Anaconda3\envs\tf-gpu\lib\site-packages\sklearn\model_selection\_search.py", line 689, in evaluate_candidates
    cv.split(X, y, groups)))
  File "C:\Users\User\Anaconda3\envs\tf-gpu\lib\site-packages\joblib\parallel.py", line 1007, in __call__
    while self.dispatch_one_batch(iterator):
  File "C:\Users\User\Anaconda3\envs\

KeyboardInterrupt: 