In [5]:
from os import listdir
from os.path import isfile, join
from sklearn.metrics import accuracy_score
from keras.callbacks import ModelCheckpoint
import pandas as pd
import scipy.io as sio
import json
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Conv1D, GlobalAveragePooling1D, MaxPooling1D
import matplotlib.pyplot as plt

In [6]:
# path of data
path = 'training2017/'

In [7]:
# load data 
mats = [f for f in listdir(path) if ( f[0] == 'A' and f[7] == 'm')]
size = len(mats)
print('Full Data Size:', len(mats))

Full Data Size: 8528


In [8]:
# preprocessing data
max_ecg_len = 10000
full_data = np.zeros((size, max_ecg_len))
for i in range(size):
    temp = sio.loadmat(path + mats[i])['val'][0, :]
    if (max_ecg_len <= len(temp)):   # data length >= 10000, cut down
        full_data[i, :] = temp[0:max_ecg_len]
    else:                            # data length < 10000, add in
        add_in = temp[0:(max_ecg_len - len(temp))]
        result = np.hstack((temp, add_in))
        while len(result) < max_ecg_len:
            add_in = temp[0:(max_ecg_len - len(result))]
            result = np.hstack((result, add_in))
        full_data[i, :] = result

# normalization
full_data = (full_data - full_data.mean())/(full_data.std())
full_data = np.expand_dims(full_data, axis=2)

# preprocessing labels
full_label = np.zeros((size, 1))
labels = pd.read_csv(path + 'REFERENCE.csv', sep=',', header=None, names=None)
for i in range(size):
    if labels.loc[labels[0] == mats[i][:6], 1].values == 'N':
        full_label[i] = 0
    elif labels.loc[labels[0] == mats[i][:6], 1].values == 'A':
        full_label[i] = 1
    elif labels.loc[labels[0] == mats[i][:6], 1].values == 'O':
        full_label[i] = 2
    else:
        full_label[i] = 3

# one-hot coding for labels
full_target = np.zeros((size, 4))
for i in range(size):
    temp = np.zeros(4)
    temp[int(full_label[i])] = 1
    full_target[i, :] = temp

In [9]:
# divide train and validation data
train_size = 0.9
num_epochs = 30
batch_size = 256
verbose = 2

X_train = full_data[:int(train_size * size), :]
y_train = full_target[:int(train_size * size), :]
X_val = full_data[int(train_size * size):, :]
y_val = full_target[int(train_size * size):, :]

In [10]:
# build model with keras sequential
model = Sequential([
    Conv1D(64, 55, activation='relu', input_shape=(max_ecg_len, 1)),
    MaxPooling1D(10),
    Dropout(0.5),

    Conv1D(64, 25, activation='relu'),
    MaxPooling1D(5),
    Dropout(0.5),

    Conv1D(64, 10, activation='relu'),
    MaxPooling1D(5),
    Dropout(0.5),

    Conv1D(64, 5, activation='relu'),
    GlobalAveragePooling1D(),

    Dense(256, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),

    Dense(128, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),

    Dense(64, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),

    Dense(4, kernel_initializer='normal', activation='softmax'),
])

In [11]:
#check model sanity
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 9946, 64)          3584      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 994, 64)          0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 994, 64)           0         
                                                                 
 conv1d_1 (Conv1D)           (None, 970, 64)           102464    
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 194, 64)          0         
 1D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 194, 64)           0

In [12]:
# training
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='models/Best_model.h5', monitor='val_acc', verbose=1, save_best_only=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=num_epochs, verbose=verbose, shuffle=True, callbacks=[checkpointer]) 

Epoch 1/30
30/30 - 57s - loss: 1.1635 - accuracy: 0.5691 - val_loss: 1.0896 - val_accuracy: 0.5416 - 57s/epoch - 2s/step
Epoch 2/30
30/30 - 58s - loss: 1.0089 - accuracy: 0.5965 - val_loss: 1.0000 - val_accuracy: 0.5416 - 58s/epoch - 2s/step
Epoch 3/30
30/30 - 59s - loss: 0.9450 - accuracy: 0.5977 - val_loss: 0.9672 - val_accuracy: 0.5416 - 59s/epoch - 2s/step
Epoch 4/30
30/30 - 57s - loss: 0.8946 - accuracy: 0.5978 - val_loss: 0.9130 - val_accuracy: 0.5416 - 57s/epoch - 2s/step
Epoch 5/30
30/30 - 59s - loss: 0.8383 - accuracy: 0.6100 - val_loss: 0.9450 - val_accuracy: 0.5733 - 59s/epoch - 2s/step
Epoch 6/30
30/30 - 57s - loss: 0.8106 - accuracy: 0.6571 - val_loss: 0.8261 - val_accuracy: 0.6284 - 57s/epoch - 2s/step
Epoch 7/30
30/30 - 60s - loss: 0.7669 - accuracy: 0.6900 - val_loss: 0.7874 - val_accuracy: 0.6342 - 60s/epoch - 2s/step
Epoch 8/30
30/30 - 61s - loss: 0.7393 - accuracy: 0.6995 - val_loss: 0.7506 - val_accuracy: 0.6471 - 61s/epoch - 2s/step
Epoch 9/30
30/30 - 60s - loss: 0

In [13]:
# write the training result
temp = history.history
json_str = json.dumps(temp)
with open('model3.json', 'w') as json_file:
    json_file.write(json_str)