# PulseCheck - Heartbeat Anomaly Detection

## Import Necessary Modules

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf






## Data Preparation & Preprocessing

In [8]:
dfc = [pd.read_csv('input/heartbeat/ptbdb_' + x + '.csv') for x in ['normal', 'abnormal']]

In [9]:
for df in dfc:
    df.columns = list(range(len(df.columns)))

In [10]:
dt = pd.concat(dfc, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)

dt = dt.rename({187: 'Label'}, axis=1)

In [11]:
dt

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,Label
0,1.000000,0.887073,0.774146,0.713224,0.682021,0.699851,0.595840,0.552749,0.469539,0.481426,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.000000,0.684376,0.395907,0.288863,0.262102,0.231405,0.234160,0.218811,0.207399,0.212121,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,1.000000,0.645543,0.270195,0.089833,0.038997,0.064067,0.045265,0.062674,0.059192,0.057799,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.995881,0.993821,0.959835,0.872297,0.542739,0.054583,0.000000,0.098867,0.156540,0.302781,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.996020,0.323383,0.109453,0.035821,0.264677,0.342289,0.367164,0.402985,0.401990,0.420896,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14545,1.000000,0.979786,0.621879,0.146849,0.000000,0.266944,0.356718,0.421522,0.450059,0.457788,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14546,1.000000,0.648015,0.424677,0.315160,0.223816,0.156384,0.156863,0.122908,0.093735,0.122908,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14547,0.931217,1.000000,0.465201,0.150183,0.035409,0.033374,0.049247,0.070818,0.078958,0.087505,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14548,1.000000,0.588291,0.120570,0.056962,0.136076,0.181646,0.182595,0.185759,0.178481,0.186076,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Train-Test Split

In [12]:
y = dt['Label'].copy()
X = dt.drop('Label', axis=1).copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=1)

In [13]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,177,178,179,180,181,182,183,184,185,186
8540,1.000000,0.834705,0.358711,0.163923,0.096708,0.100137,0.137860,0.131001,0.131001,0.147462,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
10734,1.000000,0.783220,0.479365,0.206803,0.010884,0.078912,0.107937,0.113832,0.116553,0.113832,...,0.063492,0.070748,0.094785,0.142404,0.149206,0.155102,0.0,0.0,0.0,0.0
13652,1.000000,0.789198,0.247764,0.000000,0.039662,0.105148,0.115274,0.105654,0.109030,0.108017,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
5014,1.000000,0.783092,0.592754,0.418357,0.347826,0.314976,0.289372,0.275362,0.267150,0.276812,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
11630,0.990495,0.670835,0.302151,0.000000,0.068534,0.074537,0.141571,0.253127,0.309155,0.310155,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
905,1.000000,0.668866,0.334133,0.069586,0.106179,0.244151,0.262148,0.238152,0.240552,0.246551,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
5192,0.970892,0.406573,0.051643,0.065728,0.184038,0.205634,0.238498,0.227230,0.234742,0.271362,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
12172,1.000000,0.770696,0.600526,0.543364,0.500657,0.496058,0.500657,0.523653,0.519054,0.520368,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
235,1.000000,0.814031,0.646993,0.079065,0.063474,0.122494,0.208241,0.231626,0.263920,0.279510,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0


In [14]:
y_train

8540     0.0
10734    0.0
13652    0.0
5014     1.0
11630    1.0
        ... 
905      1.0
5192     1.0
12172    1.0
235      1.0
13349    1.0
Name: Label, Length: 10185, dtype: float64

In [15]:
X_train.shape

(10185, 187)

## Build the Model

In [16]:
inputs = tf.keras.Input(shape=(X_train.shape[1],))

expand = tf.expand_dims(inputs, axis=2)
gru = tf.keras.layers.GRU(256, return_sequences=True)(expand)
flatten = tf.keras.layers.Flatten()(gru)

outputs = tf.keras.layers.Dense(1, activation='sigmoid')(flatten)


model = tf.keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 187)]             0         
                                                                 
 tf.expand_dims (TFOpLambda  (None, 187, 1)            0         
 )                                                               
                                                                 
 gru (GRU)                   (None, 187, 256)          198912    
                                                                 
 flatten (Flatten)           (None, 47872)             0         
                                                                 
 dense (Dense)               (None, 1)                 47873     
                                                                 
Total params: 246785 (964.00 KB)
Trainable params: 246785 (964.00 KB)
Non-trainable params: 0 (0.00 Byte)
____________________

## Train the Model

In [17]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=32,
    epochs=50,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)


Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50


## Results and Evaluation

In [18]:
res = model.evaluate(X_test, y_test, verbose=0)

In [19]:
print("Accuracy: {:.2f}%".format(res[1] * 100))
print("     AUC: {:.4f}".format(res[2]))

Accuracy: 97.30%
     AUC: 0.9939
