In [0]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, \
    concatenate, LSTM, Activation, CuDNNLSTM, CuDNNGRU, BatchNormalization
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold
from keras import callbacks

from sklearn.metrics import confusion_matrix

In [2]:
drive.mount('/content/drive', force_remount=False)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
os.chdir('/content/drive/My Drive/MLHC/project_1')

## Load data

In [0]:
df_train = pd.read_csv("data/mitbih_train.csv", header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv("data/mitbih_test.csv", header=None)

Y = np.array(df_train[187].values).astype(np.int8)
X = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test = np.array(df_test[187].values).astype(np.int8)
X_test = np.array(df_test[list(range(187))].values)[..., np.newaxis]

## RNN

In [0]:
class RNN:
    def __init__(self, input_dim, output_classes, optimizer='RMSprop', dropout=.1, dense_size=200, hidden_size=150):
        self.input_dim = input_dim
        self.output_classes = output_classes
        self.optimizer = optimizer
        self.dense_size = dense_size
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.model = self.rnn()
  
    def rnn(self):
        x = Input(shape=self.input_dim)
        layer = Convolution1D(16, kernel_size=3, activation=activations.relu, padding="valid")(x)
        layer = Convolution1D(16, kernel_size=3, activation=activations.relu, padding="valid")(layer)
        layer = MaxPool1D(pool_size=2)(layer)
        layer = Dropout(rate=self.dropout)(layer)
        layer = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(layer)
        layer = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(layer)
        layer = MaxPool1D(pool_size=2)(layer)
        layer = Dropout(rate=self.dropout)(layer)
        layer = BatchNormalization()(layer)
        layer = CuDNNGRU(self.hidden_size, name='rnn')(layer)
        layer = Dense(self.dense_size, activation=activations.relu, name='dense')(layer)
        y = Dense(self.output_classes, name='out_layer', activation=activations.sigmoid)(layer)

        model = models.Model(inputs=x, outputs=y)
        model.compile(loss='sparse_categorical_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
        return model

    def set_weights(self, model_name):
        self.model.load_weights('./rnn/'+model_name+'/'+model_name+'_weights.h5')

    def load_model(self, model_name):
        self.model.load_weights('./rnn/'+model_name+'/'+model_name+'_model.h5')

    def train(self, data_in, data_out, model_name="default_name", epochs=10, batch_size=100, save=True, verbose=1):
        directory = './rnn/'+model_name+'/'
        callback = []
        callback.append(EarlyStopping(monitor="val_acc", mode="max", patience=6, verbose=1))
        callback.append(ReduceLROnPlateau(monitor="val_acc", mode="max", patience=3, verbose=2))
        if save:
            callback.append(callbacks.TensorBoard(log_dir=directory+'log/'))
            callback.append(ModelCheckpoint(directory+model_name+'_model.h5', monitor='val_acc', verbose=verbose, save_best_only=True, mode='max'))
        self.model.fit(data_in, data_out,
                shuffle=True,
                epochs=epochs,
                batch_size=batch_size,
                callbacks=callback,
                verbose=verbose,
                validation_split=0.15)
        if save:
            self.save_weights(model_name)
        
    def save_weights(self, model_name):
        self.model.save_weights('./rnn/'+model_name+'/'+model_name+'_weights.h5')

    def evaluate_model(self, X_test, Y_test, model_name="", save=True):
        pred_test = np.argmax(self.model.predict(X_test), axis=1)
        f1 = f1_score(Y_test, pred_test, average="macro")
        acc = accuracy_score(Y_test, pred_test)
        print("\n=================================================================")
        print("\nAccuracy:", acc)
        print("F1:", f1)
        print("Confusion matrix:")
        print(confusion_matrix(Y_test, pred_test))
        print("\n=================================================================\n")
        if save:
            with open('./rnn/'+model_name+'/' + 'report.txt','w') as fh:
                fh.write("Accuracy: "+str(acc)+"\n")
                fh.write("F1: "+str(f1)+"\n\n")
                self.model.summary(print_fn=lambda x: fh.write(x + '\n'))
        return (f1, acc)

    def cross_validation(self, X, Y, folds=5, epochs=10, batch_size=100, verbose=0):
        kf = KFold(n_splits=folds)
        i = 0.
        f1_a = 0
        acc_a = 0
        for train_index, test_index in kf.split(X):
            print("Iteration", int(i+1))
            X_train, X_test = X[train_index], X[test_index]
            Y_train, Y_test = Y[train_index], Y[test_index]
            rnn = RNN(self.input_dim, self.output_classes, self.optimizer, self.dense_size, self.hidden_size)
            rnn.train(X_train, Y_train, epochs=epochs, batch_size=batch_size, save=False, verbose=verbose)
            f1, acc = rnn.evaluate_model(X_test, Y_test, save=False)
            f1_a += f1
            acc_a += acc
            i += 1
        print("\n=================================================================")
        print("\nAverage Accuracy:", acc_a/i)
        print("Average F1:", f1_a/i)
        print("\n=================================================================\n")


In [0]:
rnn = RNN(X.shape[1:], np.bincount(Y).size, optimizer="Adadelta", hidden_size=100, dropout=.15)

In [29]:
rnn.cross_validation(X, Y, epochs=1000, batch_size=500, verbose=2)

Iteration 1
Train on 59536 samples, validate on 10507 samples
Epoch 1/1000
 - 7s - loss: 0.6618 - acc: 0.8195 - val_loss: 0.4894 - val_acc: 0.8344
Epoch 2/1000
 - 4s - loss: 0.3349 - acc: 0.9010 - val_loss: 0.9439 - val_acc: 0.8354
Epoch 3/1000


KeyboardInterrupt: ignored

In [37]:
rnn.train(X, Y, epochs=100, model_name="ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta")

Train on 74420 samples, validate on 13134 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.87841, saving model to ./rnn/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta_model.h5
Epoch 2/100

Epoch 00002: val_acc improved from 0.87841 to 0.88884, saving model to ./rnn/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta_model.h5
Epoch 3/100

Epoch 00003: val_acc improved from 0.88884 to 0.93414, saving model to ./rnn/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta_model.h5
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.93414
Epoch 5/100

Epoch 00005: val_acc improved from 0.93414 to 0.95485, saving model to ./rnn/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta/ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta_model.h5
Epoch 6/100

Epoch 00006: val_acc improved from 0.95485 to 0.97137, saving mod

In [38]:
rnn.set_weights("ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta")
rnn.evaluate_model(X_test, Y_test, model_name="ConvRNN_GRU_100_hidden_layer_2_Conv_BatchN_Adadelta")



Accuracy: 0.9838753882696876
F1: 0.9062168937582304
Confusion matrix:
[[18042    26    29    10    11]
 [  130   404    19     0     3]
 [   37     6  1383    18     4]
 [   22     0    18   122     0]
 [   18     0     2     0  1588]]




(0.9062168937582304, 0.9838753882696876)