## Sannjana Thinderu U16484810

# HEARTBEAT PREDICTION MODEL USING MACHINE LEARNING 

In this project, we will focus on healthcare. This data set is made available by MIT. It contains data about 9,026 heartbeat measurements. Each row represents a single measurement (captured on a timeline). There are a total of 80 data points (columns). This is a multiclass classification task: predict whether the measurement represents a normal heartbeat or other anomalies. 

## Goal

Use the data set **hearbeat_cleaned.csv** to predict the column called **Target**. The input variables are columns labeled as **T1 to T80**. 

# Read and Prepare the Data (1 points)

# Setup 

In [24]:
# Common imports
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

# Get Data

In [25]:
data = pd.read_csv("heartbeat_cleaned.csv")

In [26]:
data.shape

(7960, 81)

In [27]:
data.head

<bound method NDFrame.head of          T1     T2     T3      T4      T5      T6      T7      T8      T9  \
0     0.987  0.892  0.461  0.1130  0.1490  0.1900  0.1650  0.1620  0.1470   
1     1.000  0.918  0.621  0.1330  0.1050  0.1250  0.1170  0.0898  0.0703   
2     1.000  0.751  0.143  0.1040  0.0961  0.0519  0.0442  0.0416  0.0364   
3     1.000  0.740  0.235  0.0464  0.0722  0.0567  0.0103  0.0155  0.0284   
4     1.000  0.833  0.309  0.0191  0.1010  0.1200  0.1040  0.0874  0.0765   
...     ...    ...    ...     ...     ...     ...     ...     ...     ...   
7955  0.929  0.871  0.805  0.7430  0.6510  0.5360  0.3940  0.2510  0.1400   
7956  0.803  0.692  0.587  0.4470  0.3180  0.1900  0.1180  0.0777  0.1120   
7957  1.000  0.967  0.620  0.3470  0.1390  0.0890  0.1040  0.1010  0.1070   
7958  0.984  0.567  0.607  0.5830  0.6070  0.5750  0.5750  0.4880  0.3930   
7959  0.974  0.913  0.866  0.8230  0.7460  0.6420  0.5480  0.4260  0.3250   

         T10  ...     T72     T73     T74    

In [28]:
#checking for missing values
data.isna().sum()

T1        0
T2        0
T3        0
T4        0
T5        0
         ..
T77       0
T78       0
T79       0
T80       0
Target    0
Length: 81, dtype: int64

# Split Data in 70/30

In [29]:
#using sklearn library seperate the train and test data

from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(data, test_size=0.3)

# Target Variable 

In [30]:
# seperating  the target variable 
train_target = train_set[['Target']]
test_target = test_set[['Target']]

In [31]:
# train and test inputs
train_inputs = train_set.drop(['Target'], axis=1)
test_inputs = test_set.drop(['Target'], axis=1)

In [32]:
train_inputs

Unnamed: 0,T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,...,T71,T72,T73,T74,T75,T76,T77,T78,T79,T80
4390,1.00000,0.8540,0.497,0.2200,0.1720,0.1180,0.1050,0.06370,0.0669,0.0382,...,0.0318,0.00637,0.0287,0.0127,0.0223,0.0127,0.0255,0.000,0.0223,0.00955
7929,0.77200,0.6710,0.545,0.3920,0.2190,0.1130,0.0299,0.00000,0.0183,0.0748,...,0.5680,0.57100,0.5730,0.5850,0.5800,0.5830,0.5710,0.581,0.5730,0.57500
7771,0.80500,0.7060,0.603,0.4710,0.3300,0.1790,0.0835,0.01220,0.0000,0.0209,...,0.5410,0.53600,0.5440,0.5360,0.5430,0.5320,0.5410,0.527,0.5290,0.52500
4811,0.92500,0.7400,0.188,0.0000,0.0548,0.1230,0.1200,0.16400,0.2020,0.1680,...,0.2880,0.28400,0.2770,0.2640,0.2740,0.2950,0.2980,0.274,0.2670,0.27700
3700,1.00000,0.9130,0.370,0.0731,0.0411,0.0731,0.0274,0.02740,0.0137,0.0183,...,0.0594,0.13200,0.2880,0.5020,0.7530,0.9500,0.8490,0.461,0.1370,0.11000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1782,0.00893,0.0714,0.235,0.3690,0.4820,0.5540,0.6900,0.75900,0.7860,0.7710,...,0.7470,0.75000,0.7440,0.7380,0.7470,0.7320,0.7500,0.735,0.7380,0.74100
6043,0.70600,0.6750,0.393,0.1210,0.0000,0.0806,0.1560,0.17100,0.1850,0.1660,...,0.1350,0.13300,0.1400,0.1180,0.1370,0.1280,0.1370,0.107,0.1230,0.09480
7807,0.66600,0.4970,0.482,0.4670,0.4730,0.4440,0.4320,0.38200,0.3400,0.2720,...,0.3580,0.33700,0.3490,0.3340,0.3430,0.3280,0.3280,0.328,0.3250,0.32500
7544,0.71200,0.5960,0.475,0.3420,0.2040,0.0953,0.0339,0.00485,0.0355,0.0824,...,0.5150,0.50700,0.5120,0.5060,0.5090,0.5010,0.5040,0.501,0.5070,0.50400


## Pipeline 

In [33]:
numeric_columns = train_inputs.select_dtypes(include=[np.number]).columns.to_list()

In [34]:
numeric_columns

['T1',
 'T2',
 'T3',
 'T4',
 'T5',
 'T6',
 'T7',
 'T8',
 'T9',
 'T10',
 'T11',
 'T12',
 'T13',
 'T14',
 'T15',
 'T16',
 'T17',
 'T18',
 'T19',
 'T20',
 'T21',
 'T22',
 'T23',
 'T24',
 'T25',
 'T26',
 'T27',
 'T28',
 'T29',
 'T30',
 'T31',
 'T32',
 'T33',
 'T34',
 'T35',
 'T36',
 'T37',
 'T38',
 'T39',
 'T40',
 'T41',
 'T42',
 'T43',
 'T44',
 'T45',
 'T46',
 'T47',
 'T48',
 'T49',
 'T50',
 'T51',
 'T52',
 'T53',
 'T54',
 'T55',
 'T56',
 'T57',
 'T58',
 'T59',
 'T60',
 'T61',
 'T62',
 'T63',
 'T64',
 'T65',
 'T66',
 'T67',
 'T68',
 'T69',
 'T70',
 'T71',
 'T72',
 'T73',
 'T74',
 'T75',
 'T76',
 'T77',
 'T78',
 'T79',
 'T80']

In [35]:
train_inputs.shape

(5572, 80)

In [36]:
test_inputs.shape

(2388, 80)

# Data Transformation

In [37]:
#Target variables need to be an array with integer type
train_x = np.array(train_inputs)
test_x = np.array(test_inputs)

train_y = np.array(train_target)
test_y = np.array(test_target)

In [38]:
#Check the first 10 values of the train_y data set
train_x[0:10]

array([[1.     , 0.854  , 0.497  , 0.22   , 0.172  , 0.118  , 0.105  ,
        0.0637 , 0.0669 , 0.0382 , 0.0637 , 0.035  , 0.051  , 0.0414 ,
        0.0637 , 0.0446 , 0.0669 , 0.0573 , 0.0732 , 0.0669 , 0.0796 ,
        0.0764 , 0.118  , 0.124  , 0.153  , 0.153  , 0.194  , 0.201  ,
        0.242  , 0.252  , 0.287  , 0.283  , 0.315  , 0.287  , 0.274  ,
        0.223  , 0.185  , 0.134  , 0.111  , 0.0732 , 0.0669 , 0.0382 ,
        0.0446 , 0.0318 , 0.0318 , 0.0159 , 0.0287 , 0.0159 , 0.0318 ,
        0.0127 , 0.0287 , 0.00637, 0.0255 , 0.0159 , 0.035  , 0.0191 ,
        0.0446 , 0.0159 , 0.035  , 0.0255 , 0.0414 , 0.0287 , 0.0382 ,
        0.0191 , 0.0318 , 0.0191 , 0.0255 , 0.0191 , 0.035  , 0.0159 ,
        0.0318 , 0.00637, 0.0287 , 0.0127 , 0.0223 , 0.0127 , 0.0255 ,
        0.     , 0.0223 , 0.00955],
       [0.772  , 0.671  , 0.545  , 0.392  , 0.219  , 0.113  , 0.0299 ,
        0.     , 0.0183 , 0.0748 , 0.183  , 0.319  , 0.367  , 0.447  ,
        0.515  , 0.578  , 0.63   , 0.658 

In [39]:
train_x

array([[1.     , 0.854  , 0.497  , ..., 0.     , 0.0223 , 0.00955],
       [0.772  , 0.671  , 0.545  , ..., 0.581  , 0.573  , 0.575  ],
       [0.805  , 0.706  , 0.603  , ..., 0.527  , 0.529  , 0.525  ],
       ...,
       [0.666  , 0.497  , 0.482  , ..., 0.328  , 0.325  , 0.325  ],
       [0.712  , 0.596  , 0.475  , ..., 0.501  , 0.507  , 0.504  ],
       [1.     , 0.861  , 0.327  , ..., 0.305  , 0.297  , 0.29   ]])

In [40]:
#Keras expects a different input format:
#Data needs to have 3 dimensions
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))

In [41]:
train_x.shape, test_x.shape

((5572, 80, 1), (2388, 80, 1))

In [42]:
train_x

array([[[1.     ],
        [0.854  ],
        [0.497  ],
        ...,
        [0.     ],
        [0.0223 ],
        [0.00955]],

       [[0.772  ],
        [0.671  ],
        [0.545  ],
        ...,
        [0.581  ],
        [0.573  ],
        [0.575  ]],

       [[0.805  ],
        [0.706  ],
        [0.603  ],
        ...,
        [0.527  ],
        [0.529  ],
        [0.525  ]],

       ...,

       [[0.666  ],
        [0.497  ],
        [0.482  ],
        ...,
        [0.328  ],
        [0.325  ],
        [0.325  ]],

       [[0.712  ],
        [0.596  ],
        [0.475  ],
        ...,
        [0.501  ],
        [0.507  ],
        [0.504  ]],

       [[1.     ],
        [0.861  ],
        [0.327  ],
        ...,
        [0.305  ],
        [0.297  ],
        [0.29   ]]])

In [43]:
test_y

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [4]], dtype=int64)

# Find the baseline (0.5 point)

In [44]:
from sklearn.dummy import DummyClassifier

dummy_clf = DummyClassifier(strategy="most_frequent")

dummy_clf.fit(train_x, train_y)

In [45]:
from sklearn.metrics import accuracy_score

# Baseline Train Accuracy

In [46]:
dummy_train_pred = dummy_clf.predict(train_x)

baseline_train_acc = accuracy_score(train_y, dummy_train_pred)

print('Baseline Train Accuracy: {}' .format(baseline_train_acc))

Baseline Train Accuracy: 0.5791457286432161


# Baseline Test Accuracy

In [47]:
dummy_test_pred = dummy_clf.predict(test_x)

baseline_test_acc = accuracy_score(test_y, dummy_test_pred)

print('Baseline Test Accuracy: {}' .format(baseline_test_acc))

Baseline Test Accuracy: 0.5887772194304858


# Input shape of our Data

In [48]:
import tensorflow as tf
from tensorflow import keras
# fix random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [49]:
#What is your input shape?
#(meaning: how many neurons should be in the input layer?)

train_x.shape

(5572, 80, 1)

# Regular Cross-sectional Neural Network model using Keras with only one hidden layer

In [50]:
#Define the model: for multi-class

model = keras.models.Sequential()

model.add(keras.layers.Input(shape=80))
model.add(keras.layers.Dense(50, activation='relu'))
model.add(keras.layers.Dense(5, activation='softmax'))

#final layer: there has to be 5 nodes with softmax (because we have 5 categories)


In [51]:
# Compile model

#Optimizer:
adam = keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [52]:
# Fit the model

history = model.fit(train_x, train_y, 
                    validation_data=(test_x, test_y), 
                    epochs=20, batch_size=100)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [53]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.26243922114372253, 0.9246231317520142]

In [54]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.26
accuracy: 92.46%


In [55]:
# Predictions are probabilities.

predictions = model.predict(test_x)



In [56]:
# Train values

train_scores = model.evaluate(train_x, train_y, verbose=0)
print(f"Train {model.metrics_names[0]}: {train_scores[0]:.2f}")
print(f"Train {model.metrics_names[1]}: {train_scores[1]*100:.2f}%")
train_scores

# In results, first is loss, second is accuracy

Train loss: 0.25
Train accuracy: 92.23%


[0.2462518811225891, 0.922290027141571]

In [57]:
# Test values

test_scores = model.evaluate(test_x, test_y, verbose=0)
print(f"Test {model.metrics_names[0]}: {test_scores[0]:.2f}")
print(f"Test {model.metrics_names[1]}: {test_scores[1]*100:.2f}%")
test_scores

# In results, first is loss, second is accuracy

Test loss: 0.26
Test accuracy: 92.46%


[0.26243922114372253, 0.9246231317520142]

In [58]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 50)                4050      
                                                                 
 dense_1 (Dense)             (None, 5)                 255       
                                                                 
Total params: 4,305
Trainable params: 4,305
Non-trainable params: 0
_________________________________________________________________


# Regular Deep Cross-Sectional Neural Network model using Keras with two or more hidden layers 

In [59]:
train_x.shape

(5572, 80, 1)

In [60]:
#Define the model: for multi-class
# we have used Pipe Architecture to build a deep neural network model. 
model = keras.models.Sequential()
model.add(keras.layers.Input(shape=80))
model.add(keras.layers.Dense(80, activation='relu'))
model.add(keras.layers.Dense(80, activation='relu'))
model.add(keras.layers.Dense(80, activation='relu'))
model.add(keras.layers.Dense(5, activation='softmax'))

#final layer: there has to be 5 nodes with softmax (because we have 5 categories)

In [61]:
# Compile model
#Optimizer:
adam = keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [62]:
# Fit the model
history = model.fit(train_x, train_target, 
                    validation_data=(test_x, test_target), 
                    epochs=20, batch_size=100)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [63]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.26339027285575867, 0.9313232898712158]

In [64]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.26
accuracy: 93.13%


In [65]:
# Test values

test_scores = model.evaluate(test_x, test_y, verbose=0)
print(f"Test {model.metrics_names[0]}: {test_scores[0]:.2f}")
print(f"Test {model.metrics_names[1]}: {test_scores[1]*100:.2f}%")
test_scores
# In results, first is loss, second is accuracy

Test loss: 0.26
Test accuracy: 93.13%


[0.26339027285575867, 0.9313232898712158]

In [66]:
# Train values
train_scores = model.evaluate(train_x, train_y, verbose=0)
print(f"Train {model.metrics_names[0]}: {train_scores[0]:.2f}")
print(f"Train {model.metrics_names[1]}: {train_scores[1]*100:.2f}%")
train_scores
# In results, first is loss, second is accuracy

Train loss: 0.20
Train accuracy: 93.92%


[0.1965837925672531, 0.9391601085662842]

In [67]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 80)                6480      
                                                                 
 dense_3 (Dense)             (None, 80)                6480      
                                                                 
 dense_4 (Dense)             (None, 80)                6480      
                                                                 
 dense_5 (Dense)             (None, 5)                 405       
                                                                 
Total params: 19,845
Trainable params: 19,845
Non-trainable params: 0
_________________________________________________________________


# Build LSTM Model with only one layer

In [68]:
n_steps = 80
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.LSTM(80, input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(5, activation='softmax')
])

In [69]:
from tensorflow.keras.callbacks import EarlyStopping
earlystop = EarlyStopping(monitor='val_loss', patience=15, verbose=1, mode='max')
callback = [earlystop]

In [70]:
from tensorflow.keras.callbacks import EarlyStopping

np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: early stopping


In [71]:
# evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)
scores
# In results, first is loss, second is accuracy

[0.30266693234443665, 0.9137353301048279]

In [72]:
# extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.30
accuracy: 91.37%


In [73]:
# Train values
train_scores = model.evaluate(train_x, train_y, verbose=0)
print(f"Train {model.metrics_names[0]}: {train_scores[0]:.2f}")
print(f"Train {model.metrics_names[1]}: {train_scores[1]*100:.2f}%")
train_scores
# In results, first is loss, second is accuracy

Train loss: 0.30
Train accuracy: 91.51%


[0.29682058095932007, 0.915111243724823]

In [74]:
# Test values
test_scores = model.evaluate(test_x, test_y, verbose=0)
print(f"Test {model.metrics_names[0]}: {test_scores[0]:.2f}")
print(f"Test {model.metrics_names[1]}: {test_scores[1]*100:.2f}%")
test_scores
# In results, first is loss, second is accuracy

Test loss: 0.30
Test accuracy: 91.37%


[0.30266693234443665, 0.9137353301048279]

In [75]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 80)                26240     
                                                                 
 dense_6 (Dense)             (None, 5)                 405       
                                                                 
Total params: 26,645
Trainable params: 26,645
Non-trainable params: 0
_________________________________________________________________


# LSTM Model with only two layers

In [76]:
n_steps = 80
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.LSTM(80, return_sequences=True, input_shape=[n_steps, n_inputs]),
    keras.layers.LSTM(80),
    keras.layers.Dense(5, activation='softmax')
])

In [77]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [78]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.6354867815971375, 0.7881072163581848]

In [79]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.64
accuracy: 78.81%


In [80]:
# Train values

train_scores = model.evaluate(train_x, train_y, verbose=0)
print(f"Train {model.metrics_names[0]}: {train_scores[0]:.2f}")
print(f"Train {model.metrics_names[1]}: {train_scores[1]*100:.2f}%")

train_scores

# In results, first is loss, second is accuracy

Train loss: 0.62
Train accuracy: 79.15%


[0.6183532476425171, 0.7914572954177856]

In [81]:
# Test values

test_scores = model.evaluate(test_x, test_y, verbose=0)
print(f"Test {model.metrics_names[0]}: {test_scores[0]:.2f}")
print(f"Test {model.metrics_names[1]}: {test_scores[1]*100:.2f}%")

test_scores

# In results, first is loss, second is accuracy

Test loss: 0.64
Test accuracy: 78.81%


[0.6354867815971375, 0.7881072163581848]

In [82]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 80, 80)            26240     
                                                                 
 lstm_2 (LSTM)               (None, 80)                51520     
                                                                 
 dense_7 (Dense)             (None, 5)                 405       
                                                                 
Total params: 78,165
Trainable params: 78,165
Non-trainable params: 0
_________________________________________________________________


# GRU Model with only one layer

In [83]:
n_steps = 80
n_inputs = 1
model = keras.models.Sequential([
    keras.layers.GRU(80, input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(5, activation='softmax')
])

In [84]:
np.random.seed(42)
tf.random.set_seed(42)
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: early stopping


In [85]:
# evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)
scores
# In results, first is loss, second is accuracy

[0.21802346408367157, 0.9413735270500183]

In [86]:
# extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.22
accuracy: 94.14%


In [87]:
# Train values
train_scores = model.evaluate(train_x, train_y, verbose=0)
print(f"Train {model.metrics_names[0]}: {train_scores[0]:.2f}")
print(f"Train {model.metrics_names[1]}: {train_scores[1]*100:.2f}%")
train_scores
# In results, first is loss, second is accuracy

Train loss: 0.16
Train accuracy: 94.72%


[0.16185347735881805, 0.947236180305481]

In [88]:
# Test values

test_scores = model.evaluate(test_x, test_y, verbose=0)
print(f"Test {model.metrics_names[0]}: {test_scores[0]:.2f}")
print(f"Test {model.metrics_names[1]}: {test_scores[1]*100:.2f}%")
test_scores

# In results, first is loss, second is accuracy

Test loss: 0.22
Test accuracy: 94.14%


[0.21802346408367157, 0.9413735270500183]

In [89]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 80)                19920     
                                                                 
 dense_8 (Dense)             (None, 5)                 405       
                                                                 
Total params: 20,325
Trainable params: 20,325
Non-trainable params: 0
_________________________________________________________________


# Deep GRU Model with only two layers

In [90]:
n_steps = 80
n_inputs = 1
model = keras.models.Sequential([
    keras.layers.GRU(80, return_sequences=True, input_shape=[n_steps, n_inputs]),
    keras.layers.GRU(80),
    keras.layers.Dense(5, activation='softmax')
])

In [91]:
np.random.seed(42)
tf.random.set_seed(42)
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: early stopping


In [92]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.435077965259552, 0.8580402135848999]

In [93]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.44
accuracy: 85.80%


In [94]:
# Train values

train_scores = model.evaluate(train_x, train_y, verbose=0)
print(f"Train {model.metrics_names[0]}: {train_scores[0]:.2f}")
print(f"Train {model.metrics_names[1]}: {train_scores[1]*100:.2f}%")

train_scores

# In results, first is loss, second is accuracy

Train loss: 0.41
Train accuracy: 86.18%


[0.41383096575737, 0.8618090748786926]

In [95]:
# Test values

test_scores = model.evaluate(test_x, test_y, verbose=0)
print(f"Test {model.metrics_names[0]}: {test_scores[0]:.2f}")
print(f"Test {model.metrics_names[1]}: {test_scores[1]*100:.2f}%")

test_scores

# In results, first is loss, second is accuracy

Test loss: 0.44
Test accuracy: 85.80%


[0.435077965259552, 0.8580402135848999]

In [96]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_1 (GRU)                 (None, 80, 80)            19920     
                                                                 
 gru_2 (GRU)                 (None, 80)                38880     
                                                                 
 dense_9 (Dense)             (None, 5)                 405       
                                                                 
Total params: 59,205
Trainable params: 59,205
Non-trainable params: 0
_________________________________________________________________


# Discussion

## List the test values of each model you buid

## Which model performs the best and why?

## How does it compare to baseline?