**COMPILING THE DATA**

In [1]:
# importing all the necessary libraries
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf

In [2]:
# creating a list containing labelled data for both normal and abnormal heartbeats
dfs = [pd.read_csv('../input/heartbeat/ptbdb_' + x + '.csv') for x in ['normal', 'abnormal']]

In [9]:
# renaming the columns of the 2 datasets so that we can merge them into a single dataset to use for training our model
for df in dfs:
    df.columns = list(range(len(df.columns)))

In [13]:
# mergin the datasets
# .sample(frac=1.0, random_state=1) shuffles the tuples of the dataset
# .reset_index() resets the values of the index of each tuple
data = pd.concat(dfs, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)

# renaming the last column which contains the labels of each tuple
data = data.rename({187:'Label'}, axis=1)

data

**PREPROCESSING THE DATA**

In [14]:
# y will the column of the dataset containing only the label
# x will be the datset without it's label column

y = data['Label'].copy()
x = data.drop('Label', axis=1).copy()

In [23]:
# splitting the dataset into training set (70%) and testing set (30%)
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=1)

In [24]:
x_train

In [25]:
y_train

In [26]:
x_test

In [27]:
y_test

**TRAINING THE MODEL**

In [47]:
# specifying the inputs of the model
# each instance is going to come in as a vector of size as the no of columns, 187
inputs = tf.keras.Input(shape=(x_train.shape[1]))



# A standard 2 hidden layer neural network
# This uses the fed in features as seperate values devoid of any independency between them
# x = tf.keras.layers.Dense(64, activation='relu')(inputs)
# x = tf.keras.layers.Dense(64, activation='relu')(x)



# A recurrent neural network
# This model takes in a feature and the past state of the model which was infulenced by other features as inputs
expand = tf.expand_dims(inputs, axis=2)
gru = tf.keras.layers.GRU(256, return_sequences=True)(expand)
flatten = tf.keras.layers.Flatten()(gru)



# the output returns a single activation which is sigmoid activated, i.e. it returns a value within the range (0, 1)
# this output gives us the probability of classiying the heartbeat as abnormal
# abnormal ( > 0.5 )
# normal ( < 0.5 )
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(flatten)

# passing the inputs ans outputs to our model
model = tf.keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())

In [48]:
# we'll be using an 'adam' optimizer
# for loss optimizer, 'binary_crossentropy' is preferred for classifying binary outputs

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)

history = model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    batch_size=32,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

**GETTING OUR RESULTS**

In [49]:
# the output will be an array containing 3 values
# index 0 will have the loss
# index 1 will have the accuracy
# index 2 will have the AUC
results = model.evaluate(x_test, y_test, verbose=0)

In [50]:
print("Test accuracy: {:.2f}%".format(results[1] * 100))
print("     Test AUC: {:.4f}".format(results[2]))

# > Results using 2 hidden layer neural network
    1. Test accuracy: 93.95%
    2. Test AUC: 0.9804

# > Results using a 2 layer Recurrent Neural Network
    1. Test accuracy: 95.05%
    2. Test AUC: 0.9882