#Reproducing results from paper : Deep Learning for ECG Classification
#Link to Original Paper For Dataset Information : https://iopscience.iop.org/article/10.1088/1742-6596/913/1/012004


#Imports

In [1]:
from sklearn.metrics import confusion_matrix, accuracy_score
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import pandas as pd
import scipy.io as sio
from os import listdir
from os.path import isfile, join
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras import regularizers
# from keras.utils import np_utils
from keras import utils
import sys
from matplotlib import pyplot as plt
from scipy import signal
from keras.models import model_from_json

#Get Data

#Functions

In [2]:
def to_one_hot(y): # 0. >> [1. 0. 0. 0.]
    return utils.to_categorical(y)


def change(x):  #  [1. 0. 0. 0.]  >> 0 
    answer = np.zeros((np.shape(x)[0]))
    for i in range(np.shape(x)[0]):
        max_value = max(x[i, :])
        max_index = list(x[i, :]).index(max_value)
        answer[i] = max_index
    return answer.astype(int)

#Data Preprocessing

In [3]:
number_of_classes = 4  # Total number of classes

mypath = 'data/training2017/'
onlyfiles = [f for f in listdir(mypath) if (isfile(join(mypath, f)) and f[0] == 'A')]

# print(onlyfiles)

bats = [f for f in onlyfiles if f[7] == 'm']

# print(bats)

check = 100

mats = [f for f in bats if (np.shape(sio.loadmat(mypath + f)['val'])[1] >= check)]
# print(mats)

size = len(mats)
print('Total training size is ', size)

big = 10100
X = np.zeros((size, big))

for i in range(size):
    dummy = sio.loadmat(mypath + mats[i])['val'][0, :]
    if (big - len(dummy)) <= 0:
        X[i, :] = dummy[0:big]
    else:
        b = dummy[0:(big - len(dummy))]
        goal = np.hstack((dummy, b))
        while len(goal) != big:
            b = dummy[0:(big - len(goal))]
            goal = np.hstack((goal, b))
        X[i, :] = goal

target_train = np.zeros((size, 1))


Train_data = pd.read_csv(mypath + 'REFERENCE.csv', sep=',', header=None, names=None)
print(Train_data)


Total training size is  8528
           0  1
0     A00001  N
1     A00002  N
2     A00003  N
3     A00004  A
4     A00005  A
...      ... ..
8523  A08524  N
8524  A08525  O
8525  A08526  N
8526  A08527  N
8527  A08528  N

[8528 rows x 2 columns]


In [4]:
for i in range(size):
    if Train_data.loc[Train_data[0] == mats[i][:6], 1].values == 'N':
        target_train[i] = 0
    elif Train_data.loc[Train_data[0] == mats[i][:6], 1].values == 'A':
        target_train[i] = 1
    elif Train_data.loc[Train_data[0] == mats[i][:6], 1].values == 'O':
        target_train[i] = 2
    else:
        target_train[i] = 3

Label_set = to_one_hot(target_train)

X = (X - X.mean()) / (X.std())  # Some normalization here
X = np.expand_dims(X, axis=2)  # For Keras's data input size

values = [i for i in range(size)]
permutations = np.random.permutation(values)
X = X[permutations, :]
Label_set = Label_set[permutations, :]

train = 0.9  # Size of training set in percentage
X_train = X[:int(train * size), :]
Y_train = Label_set[:int(train * size), :]
X_val = X[int(train * size):, :]
Y_val = Label_set[int(train * size):, :]

In [5]:
##saving data
# Save the array to a binary file
np.save('savedData\Xtrain.npy', X_train)
np.save('savedData\Ytrain.npy', Y_train)
np.save('savedData\Xval.npy', X_val)
np.save('savedData\Yval.npy', Y_val)

In [4]:
#Loading data
X_train=np.load('savedData\Xtrain.npy')
Y_train=np.load('savedData\Ytrain.npy')
X_val =np.load('savedData\Xval.npy')
Y_val =np.load('savedData\Yval.npy')

#Model

In [5]:



# def create_model():
model = Sequential()
model.add(Conv1D(128, 55, activation='relu', input_shape=(big, 1)))
model.add(MaxPooling1D(10))
model.add(Dropout(0.5))
model.add(Conv1D(128, 25, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))

model.add(Conv1D(128, 10, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(Conv1D(128, 5, activation='relu'))
model.add(GlobalAveragePooling1D())
# model.add(Flatten())
model.add(Dense(256, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, kernel_initializer='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(number_of_classes, kernel_initializer='normal', activation='softmax'))

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 10046, 128)        7168      
                                                                 
 max_pooling1d (MaxPooling1  (None, 1004, 128)         0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 1004, 128)         0         
                                                                 
 conv1d_1 (Conv1D)           (None, 980, 128)          409728    
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 196, 128)          0         
 g1D)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 196, 128)          0

#Training

In [7]:
#train model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
hist = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), batch_size=256, epochs=60, verbose=2, shuffle=True)


######saving weight
model_json = model.to_json()
with open("DL_ECG_classification_model\model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("DL_ECG_classification_model\model.h5")
print("Saved model to disk")
######

Epoch 1/60
30/30 - 168s - loss: 1.1655 - accuracy: 0.5491 - val_loss: 1.0333 - val_accuracy: 0.5862 - 168s/epoch - 6s/step
Epoch 2/60
30/30 - 169s - loss: 1.0014 - accuracy: 0.5880 - val_loss: 0.9438 - val_accuracy: 0.5862 - 169s/epoch - 6s/step
Epoch 3/60
30/30 - 182s - loss: 0.9513 - accuracy: 0.5928 - val_loss: 0.9043 - val_accuracy: 0.5862 - 182s/epoch - 6s/step
Epoch 4/60
30/30 - 181s - loss: 0.8726 - accuracy: 0.5945 - val_loss: 0.8105 - val_accuracy: 0.6155 - 181s/epoch - 6s/step
Epoch 5/60
30/30 - 183s - loss: 0.8095 - accuracy: 0.6452 - val_loss: 0.8439 - val_accuracy: 0.6471 - 183s/epoch - 6s/step
Epoch 6/60
30/30 - 168s - loss: 0.7657 - accuracy: 0.6823 - val_loss: 0.7339 - val_accuracy: 0.6800 - 168s/epoch - 6s/step
Epoch 7/60
30/30 - 181s - loss: 0.7045 - accuracy: 0.7144 - val_loss: 0.6606 - val_accuracy: 0.7140 - 181s/epoch - 6s/step
Epoch 8/60
30/30 - 183s - loss: 0.6669 - accuracy: 0.7285 - val_loss: 0.6456 - val_accuracy: 0.7140 - 183s/epoch - 6s/step
Epoch 9/60
30/30

In [6]:
###Load model from json
json_file = open('DL_ECG_classification_model\model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights("DL_ECG_classification_model\model.h5")
print("Loaded model from disk")
 
# evaluate loaded model on test data
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Loaded model from disk


In [7]:
import warnings


warnings.filterwarnings('ignore')


predictions = model.predict(X_val)
score = accuracy_score(change(Y_val), change(predictions))
print('Last epoch\'s validation score is ', score)

Last epoch's validation score is  0.8628370457209847


In [9]:
#reading mitbih files:
path = 'MyMIT-BIH'
# print(allCsv)
size=75
print('MIT-BIH test size ', size)
#fq=360Hz
big = 12120
MIT_BIH1 = np.zeros((size, big))

for i in range(size):
    dummy = pd.read_csv(path+"/"+str(i+1)+".csv", sep=',', header=None, names=None)[:].values.tolist()
    dummy = np.reshape(dummy,(len(dummy)))
    if(len(dummy)>=big):
        MIT_BIH1[i,:] = dummy[:big]
    elif(big-len(dummy)<=big/2):
        b = dummy[0:(big - len(dummy))]
        MIT_BIH1[i,:] = np.hstack((dummy, b))
    else:
        b=dummy
        while len(dummy)!=big:
            if(big-len(dummy)>=len(b)):
                dummy = np.hstack((dummy,b))
            else:
                dummy= np.hstack((dummy,b[0:big - len(dummy)]))
        MIT_BIH1[i, :] = dummy
#normalize
MIT_BIH1=(MIT_BIH1 - MIT_BIH1.mean()) / (MIT_BIH1.std())
#downsampling
MIT_BIH2=np.zeros((size, 10100))
for i in range(size):
    MIT_BIH2[i, :]=signal.resample(MIT_BIH1[i, :],10100)
MIT_BIH2 = np.expand_dims(MIT_BIH2, axis=2)
#Labels
MyLabels=pd.read_csv(path+"/Labels.csv", sep=',', header=None, names=None)[:].values.tolist()
MyLabels=np.reshape(MyLabels,(size))

MIT-BIH test size  75


In [10]:
import warnings


warnings.filterwarnings('ignore')


prediction = model.predict(MIT_BIH2)
print(change(prediction))
print(MyLabels)
score = accuracy_score(MyLabels, change(prediction))
print('The score is ', score)
TP=0
FP=0
FN=0
TN=0
for i in range(len(MyLabels)):
    if change(prediction)[i]==1:
        if MyLabels[i]==1:
            TP=TP+1
        else:
            FP=FP+1
    else:
        if change(prediction)[i]==MyLabels[i]:
            TN=TN+1
        else:
            FN=FN+1
print(TP)
print(FP)
print(FN)
print(TN)

precision=TP/(TP+FP)
recall=TP/(TP+FN)
accuracy=(TP+TN)/len(MyLabels)
print("precision is ",precision)
print("recall is ",recall)
print("accuracy is ",accuracy)

[1 1 1 1 2 1 1 1 2 1 1 1 3 1 1 2 2 1 2 1 2 2 1 1 2 0 0 2 0 0 2 2 2 2 0 2 0
 2 0 0 0 2 0 0 2 0 0 0 2 2 2 2 2 3 2 0 2 2 1 2 0 1 2 0 1 1 2 2 0 2 2 2 2 2
 2]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2]
The score is  0.6133333333333333
16
4
25
30
precision is  0.8
recall is  0.3902439024390244
accuracy is  0.6133333333333333


In [None]:
#draw graph

# print(MIT_BIH1)
# plt.figure(figsize=(20,15))
# plt.plot(MIT_BIH1[74, :])
# plt.show()

In [98]:
# path = 'MIT-BH'
# onlyfiles1 = [f for f in listdir(path) if (isfile(join(path, f)) )]

# # print(onlyfiles1)

# allCsv= [f for f in onlyfiles1 if f[4] == 'c']


# size=np.shape(allCsv)[0]
# print('MIT-BH test size ', size)



# big = 21600
# MIT_BH = np.zeros((size, big))



# for i in range(size):
#     dummy = pd.read_csv(path+"/"+allCsv[i], sep=',', header=None, names=None)[1][2:big+2].values.tolist()
#     MIT_BH[i, :] = dummy

# MIT_BH=(MIT_BH - MIT_BH.mean()) / (MIT_BH.std())
# MIT_BH2=np.zeros((size, 18000))
# MIT_BH3=np.zeros((size, 10100))
# ##downsample

# for i in range(size):
#     MIT_BH2[i, :]=signal.resample(MIT_BH[i, :],18000)

# ####
# MIT_BH3=MIT_BH2[:,:10100]

# MIT_BH3 = np.expand_dims(MIT_BH3, axis=2)


# import warnings


# warnings.filterwarnings('ignore')


# prediction1 = model.predict(MIT_BH3)
# print(change(prediction1))
#label mit-bh
# path2 = 'mitbih_database'
# allFiles = [f for f in listdir(path2) if (isfile(join(path2, f)) )]
# allTxt= [f for f in allFiles if f[3] == 'a']


    
# prediction_Num=change(prediction1)
# ii=0
# for filename in allTxt:
#     f = open(path2+"/"+filename, "r")
#     STR=f.read()
#     if prediction_Num[ii]==0:
#         if STR[93]==" ":
#             print('N   '+STR[90:92])
#         else:
#             print('N   '+STR[90:95])
#     elif prediction_Num[ii]==1: 
#         if STR[93]==" ":
#             print('A   '+STR[90:92])
#         else:
#             print('A   '+STR[90:95])
#     elif prediction_Num[ii]==2: 
#         if STR[93]==" ":
#             print('O   '+STR[90:92])
#         else:
#             print('O   '+STR[90:95])
#     else:
#         if STR[93]==" ":
#             print('~   '+STR[90:92])
#         else:
#             print('~   '+STR[90:95])
#     ii=ii+1
