## Description of Variables

A dataset of heartrates is used for this project. Each row represents a single measurement. Columns labeled as T1 from T80 are the time steps on the timeline (there are 80 time steps, each time step has only one measurement). 

The last column is the target variable. It shows the label (category) of the measurement as follows:<br>
0 = Normal<br>
1 = Supraventricular premature beat<br>
2 = Premature ventricular contraction<br>
3 = Fusion of ventricular and normal beat<br>
4 = Unclassifiable beat

## Goal

Use the data set **hearbeat_cleaned.csv** to predict the column called **Target**. The input variables are columns labeled as **T1 to T80**. 

# Read and Prepare the Data

In [1]:
# Import libraries

import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

# Other Libararies
from tensorflow.keras.callbacks import EarlyStopping

# Random Seed
np.random.seed(42)
tf.random.set_seed(42)

In [2]:
# Read the dataset

data = pd.read_csv(r'C:\Users\Scott\Downloads\heartbeat_cleaned.csv')

In [3]:
data.shape

(7960, 81)

In [4]:
data.head()

Unnamed: 0,T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,...,T72,T73,T74,T75,T76,T77,T78,T79,T80,Target
0,0.987,0.892,0.461,0.113,0.149,0.19,0.165,0.162,0.147,0.138,...,0.197,0.197,0.196,0.203,0.201,0.199,0.201,0.205,0.208,0
1,1.0,0.918,0.621,0.133,0.105,0.125,0.117,0.0898,0.0703,0.0781,...,0.195,0.191,0.152,0.172,0.207,0.211,0.207,0.207,0.172,0
2,1.0,0.751,0.143,0.104,0.0961,0.0519,0.0442,0.0416,0.0364,0.0857,...,0.226,0.242,0.244,0.286,0.468,0.816,0.977,0.452,0.0519,0
3,1.0,0.74,0.235,0.0464,0.0722,0.0567,0.0103,0.0155,0.0284,0.0155,...,0.0851,0.0747,0.0515,0.0593,0.067,0.0361,0.121,0.451,0.869,0
4,1.0,0.833,0.309,0.0191,0.101,0.12,0.104,0.0874,0.0765,0.0765,...,0.205,0.421,0.803,0.951,0.467,0.0,0.0519,0.082,0.0628,0


In [5]:
# Separate the target variable

y = data['Target']
x = data.drop('Target', axis=1)

## Split the data

In [6]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3)

## Data Transformation

In [7]:
# Convert the target variable to integer type
train_y = np.array(train_y)
test_y = np.array(test_y)

train_y = train_y.astype(np.int32)
test_y = test_y.astype(np.int32)

In [8]:
# Take a look at what the first 10 values look like of the target variable
train_y[0:10]

array([1, 0, 2, 0, 2, 2, 0, 0, 4, 4])

In [9]:
# Convert the input variables to float data types
train_x= np.array(train_x)
test_x= np.array(test_x)

train_x = train_x.astype(np.float32)
test_x = test_x.astype(np.float32)

In [10]:
train_x

array([[1.    , 0.818 , 0.133 , ..., 0.354 , 0.354 , 0.387 ],
       [1.    , 0.849 , 0.166 , ..., 0.108 , 0.0811, 0.0695],
       [0.    , 0.0335, 0.163 , ..., 0.62  , 0.624 , 0.606 ],
       ...,
       [1.    , 0.98  , 0.573 , ..., 0.057 , 0.038 , 0.0418],
       [0.564 , 0.512 , 0.468 , ..., 0.357 , 0.348 , 0.354 ],
       [0.799 , 0.683 , 0.564 , ..., 0.265 , 0.265 , 0.247 ]],
      dtype=float32)

In [11]:
# Convert the dataset to 3-D

train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))

In [12]:
train_x.shape, train_y.shape

((5572, 80, 1), (5572,))

In [13]:
train_x

array([[[1.    ],
        [0.818 ],
        [0.133 ],
        ...,
        [0.354 ],
        [0.354 ],
        [0.387 ]],

       [[1.    ],
        [0.849 ],
        [0.166 ],
        ...,
        [0.108 ],
        [0.0811],
        [0.0695]],

       [[0.    ],
        [0.0335],
        [0.163 ],
        ...,
        [0.62  ],
        [0.624 ],
        [0.606 ]],

       ...,

       [[1.    ],
        [0.98  ],
        [0.573 ],
        ...,
        [0.057 ],
        [0.038 ],
        [0.0418]],

       [[0.564 ],
        [0.512 ],
        [0.468 ],
        ...,
        [0.357 ],
        [0.348 ],
        [0.354 ]],

       [[0.799 ],
        [0.683 ],
        [0.564 ],
        ...,
        [0.265 ],
        [0.265 ],
        [0.247 ]]], dtype=float32)

# Find the baseline

In [14]:
data['Target'].value_counts()/len(data)

0    0.582035
4    0.198995
2    0.155402
1    0.055905
3    0.007663
Name: Target, dtype: float64

# Build a cross-sectional shallow model using Keras (with only one hidden layer)

In [15]:
# Using 80 because we have 80 columns and 1 because we have 1 column as target variable

model = keras.models.Sequential([
    
    keras.layers.Flatten(input_shape=[80, 1]),
    keras.layers.Dense(80, activation='relu'),
    keras.layers.Dense(5, activation='softmax')
    
])

In [16]:
# Make sure to add our random seeds for both numpy and tensorflow
np.random.seed(42)
tf.random.set_seed(42)

# Configure the optimizer
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

# Fit the model
history = model.fit(train_x, train_y, epochs=50,
                    validation_data=(test_x, test_y))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [17]:
# Evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)

# Extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.22
accuracy: 93.89%


# Build a cross-sectional deep model using Keras (with two or more hidden layers)

In [18]:
# Create early stopping
earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

callback = [earlystop]

In [19]:
# Create the input shapes (80 for input columns and 1 for target column)
n_steps = 80
n_inputs = 1

# Create the model
model = keras.models.Sequential([
    keras.layers.SimpleRNN(50, return_sequences=True, input_shape=[n_steps, n_inputs] ),
    keras.layers.SimpleRNN(35, return_sequences=True),
    keras.layers.SimpleRNN(20, return_sequences=True),
    keras.layers.SimpleRNN(5), 
    keras.layers.Dense(5, activation='softmax')
])


In [20]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Adam(learning_rate=0.001)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 13: early stopping


In [21]:
# Evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)

# Extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.43
accuracy: 88.65%


# Build a sequential shallow LSTM Model (with only one LSTM layer)

In [22]:
# Create the input shapes (80 for input columns and 1 for target column)
n_steps = 80
n_inputs = 1

model = keras.models.Sequential([
    
    keras.layers.LSTM(5, activation='softmax' , input_shape=[n_steps, n_inputs])
])

In [23]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.001)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [24]:
# Evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)

# Extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 1.03
accuracy: 60.89%


# Build a sequential deep LSTM Model (with only two LSTM layers)

In [25]:
n_steps = 80
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.LSTM(20, return_sequences=True, input_shape=[n_steps, n_inputs]),
    keras.layers.LSTM(15, return_sequences=True),
    keras.layers.LSTM(10),
    keras.layers.Dense(5, activation='softmax')
]) #

In [26]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping


In [27]:
# Evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)

# Extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 1.11
accuracy: 59.30%


# Build a sequential shallow GRU Model (with only one GRU layer)

In [28]:
n_steps = 80
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.GRU(5, input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(5, activation='softmax')
])

In [29]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.001)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [30]:
# Evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)

# Extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.93
accuracy: 61.39%


# Build a sequential deep GRU Model (with only two GRU layers)

In [31]:
n_steps = 80
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.GRU(5, return_sequences=True, input_shape=[n_steps, n_inputs]),
    keras.layers.GRU(5, return_sequences=True),
    keras.layers.GRU(5, return_sequences=True),
    keras.layers.GRU(5),
    keras.layers.Dense(5, activation='softmax')
])

In [32]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: early stopping


In [33]:
# Evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)

# Extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.44
accuracy: 87.56%


# Discussion

## List the test values of each model you built

## Which model performs the best and why?

## How does it compare to baseline? (0.5 points)