# CNN learning for arrhythmia classification

## Import libraries

In [102]:
from __future__ import division, print_function

import json

import sys
import os
import glob
import re
import wfdb
import cv2
import pandas as pd
import numpy as np
import biosppy
import matplotlib.pyplot as plt
from sklearn.utils import class_weight

# Keras
import keras
from keras.applications.imagenet_utils import preprocess_input, decode_predictions
from keras.models import save_model, load_model
from keras.preprocessing import image
from keras.callbacks import History
from keras.models import Sequential
from keras.layers import Conv2D, BatchNormalization, MaxPool2D, ELU, Dropout, Dense, Flatten
from keras import backend as K
import tensorflow as tf

## Preprocess data

Get all records names from [MIT-BIH Arrhythmia Database](https://physionet.org/content/mitdb/1.0.0/)

In [81]:
records = wfdb.get_record_list('mitdb')
records[:10]

['100', '101', '102', '103', '104', '105', '106', '107', '108', '109']

We need images of beats to learn CNN. So create and save images and labels for each beat of all records

In [60]:
def signal_to_image(signal, folder_name, record_ind, signal_ind):
    fig = plt.figure(frameon=False)
    plt.plot(signal, linewidth=3.5) 
    plt.xticks([]), plt.yticks([])
    for spine in plt.gca().spines.values():
        spine.set_visible(False)

    filename = folder_name + '/' + str(record_ind) + '_' + str(signal_ind) + '.png'
    
    fig.savefig(filename)
    im_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    im_gray = cv2.resize(im_gray, (128, 128))
    cv2.imwrite(filename, im_gray)
    plt.close(fig)

    return im_gray

In [95]:
class_to_idx = {'nor': 1, 'lbb': 2, 'rbb': 5, 'apc': 0, 'pvc': 4, 'pab': 3, 'veb': 6, 'vfw': 7}

In [96]:
idx_to_class = dict(zip(class_to_idx.values(), class_to_idx.keys()))
idx_to_class

{0: 'apc',
 1: 'nor',
 2: 'lbb',
 3: 'pab',
 4: 'pvc',
 5: 'rbb',
 6: 'veb',
 7: 'vfw'}

In [97]:
symbol_to_label = {'N':'nor', 'L':'lbb', 'R':'rbb', 'A':'apc', 
                   'V':'pvc', '/':'pab', 'E':'veb', '!':'vfw'}

In [87]:
signal_ind = 0
for record_ind, record in enumerate(records):
    signals = wfdb.rdsamp(record, channels=[0], pb_dir='mitdb')[0]
    ann = wfdb.rdann(record, 'atr', pb_dir='mitdb') 
    symbols = ann.symbol
    beats = list(ann.sample)

    for i in range(len(beats)):
        if symbols[i] in list(symbol_to_label.keys()):
            left_ind = 0 if i == 0 else beats[i - 1] + 20
            right_ind = len(signals) if i == len(beats) - 1 else beats[i + 1] - 20
            signal = signals[left_ind: right_ind]

            signal_to_image(signal, 'signal_images', record_ind, signal_ind)
    
            with open('labels.txt', 'a') as f:
                f.write(str(record_ind) + '_' + str(signal_ind) + ' ' + str(class_to_idx[symbol_to_label[symbols[i]]]))
                f.write('\n')
                
            signal_ind += 1

    print(record_ind, end=' ')

## Prepare train dataset

Get images paths and get images IDs from the paths

In [83]:
all_paths = glob.glob('./signal_images/*.png')

cropped_paths = {}
beg = all_paths[0].find('\\') + 1
for path in all_paths:
    end = path.rfind('.')
    
    number = path[beg:end]
    record_n = int(number[:number.find('_')])
    sig_n = int(number[number.find('_') + 1:])
    
    if cropped_paths.get(record_n) is None:
        cropped_paths[record_n] = 0
    cropped_paths[record_n] += 1

Read images labels from the file

In [84]:
id_labels = {}
with open('labels.txt', 'r') as f:
    for line in f:
        n, l = line.split()
        id_labels[n] = int(l)

len(id_labels)

107668

Tha dataset is imbalanced, so we'll need to artificially augment smaller classes

In [85]:
np.unique(list(id_labels.values()), return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([ 2546, 75052,  8075,  7028,  7130,  7259,   106,   472],
       dtype=int64))

Create a list of labels from the dictionary

In [86]:
labels = np.array(list(id_labels.values()))
len(labels)

107668

In [87]:
NUM_CLASSES = len(np.unique(labels))
NUM_CLASSES

8

Shuffle data for learning

In [88]:
indices = np.arange(len(labels))
np.random.seed(123)
np.random.shuffle(indices)

print(len(indices), indices)

107668 [23423 48798  7516 ... 17730 28030 15725]


## Training generators

This method augments smaller classes ten times cropping and shifting images

In [89]:
def get_cropping_images(image):
    #Left Top Crop
    left_top = cv2.resize(image[:96, :96], (128, 128))

    #Center Top Crop
    center_top = cv2.resize(image[:96, 16:112], (128, 128))

    #Right Top Crop
    right_top = cv2.resize(image[:96, 32:], (128, 128))

    #Left Center Crop
    left_center = cv2.resize(image[16:112, :96], (128, 128))

    #Center Center Crop
    center_center = cv2.resize(image[16:112, 16:112], (128, 128))

    #Right Center Crop    
    right_center = cv2.resize(image[16:112, 32:], (128, 128))

    #Left Bottom Crop
    left_bottom = cv2.resize(image[32:, :96], (128, 128))

    #Center Bottom Crop
    center_bottom = cv2.resize(image[32:, 16:112], (128, 128))

    #Right Bottom Crop    
    right_bottom = cv2.resize(image[32:, 32:], (128, 128))

    return np.array([left_top, center_top, right_top,
            left_center, center_center, right_center,
            left_bottom, center_bottom, right_bottom])

We'll use generators to train CNN

In [90]:
def get_generator(ind, augment=False):
    image = cv2.imread(all_paths[ind], cv2.IMREAD_GRAYSCALE)
    number_path = all_paths[ind][all_paths[ind].find('\\') + 1 : all_paths[ind].rfind('.')]
    label = id_labels[number_path]
    
    if augment and label != class_to_idx['nor']:
        cropped_images = get_cropping_images(image)
        images = np.vstack((np.expand_dims(image, axis=0), cropped_images)) 
        yield images, [label] * len(images)
    else:
        yield np.expand_dims(image, 0), [label]

In [91]:
def raw_batch_generator(batch_size, augment=False, debug=False):
    global batch_i
    
    generators = np.array([get_generator(ind, augment) for ind in range(len(all_paths))])
    while True:
        batch_indices = indices[(batch_i - 1) * batch_size : batch_i * batch_size]
        batch_i += 1
        yield [gen.__next__() for gen in generators[batch_indices]]

In [92]:
def images_and_labels_generator(batch_size, augment=False):
    for batch in raw_batch_generator(batch_size, augment):
        batch_images = []
        batch_labels = []
        for e in batch:
            batch_images.append(e[0])
            batch_labels.extend(e[1])
        batch_images = np.stack(batch_images, axis=0) if not augment else np.vstack(batch_images)
        yield batch_images, batch_labels

In [93]:
def train_iterator(batch_size, augment=False):      
    for batch in images_and_labels_generator(batch_size, augment):
        batch_images = batch[0]
        batch_images = np.expand_dims(batch_images, -1)
        batch_labels = keras.utils.to_categorical(batch[1], NUM_CLASSES)
        yield batch_images, batch_labels

Test train generator

In [98]:
batch_i = 1
train_iterator(32, augment=True).__next__()[0].shape

(104, 128, 128, 1)

## Prepare CNN model

In [107]:
# reset graph when you change architecture!
def reset_tf_session():
    curr_session = tf.get_default_session()
    # close current session
    if curr_session is not None:
        curr_session.close()
    # reset graph
    K.clear_session()
    # create new session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    s = tf.InteractiveSession(config=config)
    K.set_session(s)
    return s

Class for saving model after each epoch

In [88]:
class ModelSaveCallback(keras.callbacks.Callback):
    def __init__(self, file_name):
        super(ModelSaveCallback, self).__init__()
        self.file_name = file_name
        self.f1s = []
        
    def on_epoch_end(self, epoch, logs=None):     
        model_filename = self.file_name.format(epoch % 3)
        save_model(self.model, model_filename)
        print("Model saved in {}".format(model_filename))
        return

Custom F1 score mectric for measuring training quality

In [101]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    f1_val = 2 * ((precision * recall) / (precision + recall + K.epsilon()))
            
    return f1_val

Define model architecture

In [38]:
s = reset_tf_session()

In [39]:
model = Sequential()

model.add(Conv2D(64, (3,3),strides = (1,1), input_shape = (128, 128, 1), kernel_initializer='glorot_uniform'))
model.add(ELU())
model.add(BatchNormalization())

model.add(Conv2D(64, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))
model.add(ELU())
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2), strides= (2,2)))

model.add(Conv2D(128, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))
model.add(ELU())
model.add(BatchNormalization())

model.add(Conv2D(128, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))
model.add(ELU())
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2), strides= (2,2)))

model.add(Conv2D(256, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))
model.add(ELU())
model.add(BatchNormalization())

model.add(Conv2D(256, (3,3),strides = (1,1),kernel_initializer='glorot_uniform'))
model.add(ELU())
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2), strides= (2,2)))

model.add(Flatten())
model.add(Dense(2048))
model.add(ELU())
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(8, activation='softmax'))

model.summary()

model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',  # gradient clipping just in case
  metrics=[keras.metrics.categorical_accuracy, f1] 
)  

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 126, 126, 64)      640       
_________________________________________________________________
elu_1 (ELU)                  (None, 126, 126, 64)      0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 126, 126, 64)      256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 124, 124, 64)      36928     
_________________________________________________________________
elu_2 (ELU)                  (None, 124, 124, 64)      0         
_________________________________________________________________
batch_normalization_2

Define parameters for CNN training

In [109]:
IMAGE_SIZE = 128
BATCH_SIZE = 16
STEPS_PER_EPOCH = 50
EPOCHS = 100

Count class weights to cope with imbalanced dataset during training

In [104]:
labels_for_weights = labels
for ind in [0, 2, 3, 4, 5, 6, 7]:
    new_labels = np.full(sum(labels == ind) * 9, ind)
    labels_for_weights = np.append(labels_for_weights, new_labels)
len(labels_for_weights)

401212

In [105]:
class_weights = class_weight.compute_class_weight('balanced', np.unique(labels_for_weights), labels_for_weights)
class_weights

array([ 1.9698154 ,  0.66822337,  0.62107121,  0.71359562,  0.7033871 ,
        0.69088717, 47.31273585, 10.6253178 ])

## CNN training

In [48]:
batch_i = 1
hist = History()

model.fit_generator(
    train_iterator(BATCH_SIZE, True), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback('model_v3' + '_{}'), hist],
    verbose=1,  
    class_weight=class_weights,
    initial_epoch=0
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Model saved in model_v3_0
Epoch 2/100
Model saved in model_v3_1
Epoch 3/100
Model saved in model_v3_2
Epoch 4/100
Model saved in model_v3_0
Epoch 5/100
Model saved in model_v3_1
Epoch 6/100
Model saved in model_v3_2
Epoch 7/100
Model saved in model_v3_0
Epoch 8/100
Model saved in model_v3_1
Epoch 9/100
Model saved in model_v3_2
Epoch 10/100
Model saved in model_v3_0
Epoch 11/100
Model saved in model_v3_1
Epoch 12/100
10/50 [=====>........................] - ETA: 32:52 - loss: 0.5282 - categorical_accuracy: 0.8316 - f1: 0.8376

KeyboardInterrupt: 

In [61]:
batch_i = 551
hist = History()
model = load_model('model_v3_1', custom_objects={'f1':f1})
file_name = 'model_v3' + '_{}'

model.fit_generator(
    train_iterator(BATCH_SIZE, augment=True), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback(file_name), hist],
    verbose=1,  
    class_weight=class_weights,
    initial_epoch=11
)

Epoch 12/100
Model saved in model_v3_2
Epoch 13/100
Model saved in model_v3_0
Epoch 14/100
Model saved in model_v3_1
Epoch 15/100
Model saved in model_v3_2
Epoch 16/100
Model saved in model_v3_0
Epoch 17/100
Model saved in model_v3_1
Epoch 18/100
Model saved in model_v3_2
Epoch 19/100
Model saved in model_v3_0
Epoch 20/100
Model saved in model_v3_1
Epoch 21/100
Model saved in model_v3_2
Epoch 22/100
Model saved in model_v3_0
Epoch 23/100
Model saved in model_v3_1
Epoch 24/100
Model saved in model_v3_2
Epoch 25/100
Model saved in model_v3_0
Epoch 26/100
Model saved in model_v3_1
Epoch 27/100
Model saved in model_v3_2
Epoch 28/100
Model saved in model_v3_0
Epoch 29/100
Model saved in model_v3_1
Epoch 30/100
 2/50 [>.............................] - ETA: 32:21 - loss: 0.2512 - categorical_accuracy: 0.9135 - f1: 0.9059

KeyboardInterrupt: 

In [None]:
batch_i = 1451
hist = History()
model = load_model('model_v3_1', custom_objects={'f1':f1})
file_name = 'model_v3' + '_{}'

model.fit_generator(
    train_iterator(BATCH_SIZE, augment=True), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback(file_name), hist],
    verbose=1,  
    class_weight=class_weights,
    initial_epoch=29
)

Epoch 30/100
Model saved in model_v3_2
Epoch 31/100
Model saved in model_v3_0
Epoch 32/100
Model saved in model_v3_1
Epoch 33/100
Model saved in model_v3_2
Epoch 34/100
Model saved in model_v3_0
Epoch 35/100
Model saved in model_v3_1
Epoch 36/100
Model saved in model_v3_2
Epoch 37/100
Model saved in model_v3_0
Epoch 38/100
Model saved in model_v3_1
Epoch 39/100
Model saved in model_v3_2
Epoch 40/100
Model saved in model_v3_0
Epoch 41/100
Model saved in model_v3_1
Epoch 42/100
Model saved in model_v3_2
Epoch 43/100
Model saved in model_v3_0
Epoch 44/100
Model saved in model_v3_1
Epoch 45/100
Model saved in model_v3_2
Epoch 46/100
Model saved in model_v3_0
Epoch 47/100
Model saved in model_v3_1
Epoch 48/100
Model saved in model_v3_2
Epoch 49/100
Model saved in model_v3_0
Epoch 50/100
Model saved in model_v3_1
Epoch 51/100
Model saved in model_v3_2
Epoch 52/100
Model saved in model_v3_0
Epoch 53/100
Model saved in model_v3_1
Epoch 54/100
Model saved in model_v3_2
Epoch 55/100

In [None]:
batch_i = 2701
hist = History()
model = load_model('model_v3_2', custom_objects={'f1':f1})
file_name = 'model_v3' + '_{}'

model.fit_generator(
    train_iterator(BATCH_SIZE, augment=True), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback(file_name), hist],
    verbose=1,  
    class_weight=class_weights,
    initial_epoch=54
)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 55/100
Model saved in model_v3_0
Epoch 56/100
Model saved in model_v3_1
Epoch 57/100
Model saved in model_v3_2
Epoch 58/100

In [44]:
batch_i = 2851
hist = History()
model = load_model('model_v3_2', custom_objects={'f1':f1})
file_name = 'model_v3' + '_{}'

model.fit_generator(
    train_iterator(BATCH_SIZE, augment=True), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback(file_name), hist],
    verbose=1,  
    class_weight=class_weights,
    initial_epoch=57
)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 58/100
Model saved in model_v3_0
Epoch 59/100
Model saved in model_v3_1
Epoch 60/100
Model saved in model_v3_2
Epoch 61/100
Model saved in model_v3_0
Epoch 62/100
Model saved in model_v3_1
Epoch 63/100
Model saved in model_v3_2
Epoch 64/100
Model saved in model_v3_0
Epoch 65/100
Model saved in model_v3_1
Epoch 66/100
Model saved in model_v3_2
Epoch 67/100
Model saved in model_v3_0
Epoch 68/100
Model saved in model_v3_1
Epoch 69/100
Model saved in model_v3_2
Epoch 70/100
Model saved in model_v3_0
Epoch 71/100
Model saved in model_v3_1
Epoch 72/100
Model saved in model_v3_2
Epoch 73/100
Model saved in model_v3_0
Epoch 74/100
Model saved in model_v3_1
Epoch 75/100
Model saved in model_v3_2
Epoch 76/100
Model saved in model_v3_0
Epoch 77/100
Model saved

KeyboardInterrupt: 

In [None]:
batch_i = 3851
hist = History()
model = load_model('model_v3_1', custom_objects={'f1':f1})
file_name = 'model_v3' + '_{}'

model.fit_generator(
    train_iterator(BATCH_SIZE, augment=True), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback(file_name), hist],
    verbose=1,  
    class_weight=class_weights,
    initial_epoch=77
)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 78/100
Model saved in model_v3_2
Epoch 79/100
Model saved in model_v3_0
Epoch 80/100
Model saved in model_v3_1
Epoch 81/100
Model saved in model_v3_2
Epoch 82/100
Model saved in model_v3_0
Epoch 83/100
Model saved in model_v3_1
Epoch 84/100
Model saved in model_v3_2
Epoch 85/100
Model saved in model_v3_0
Epoch 86/100
Model saved in model_v3_1
Epoch 87/100
Model saved in model_v3_2
Epoch 88/100
Model saved in model_v3_0
Epoch 89/100
Model saved in model_v3_1
Epoch 90/100
Model saved in model_v3_2
Epoch 91/100
Model saved in model_v3_0
Epoch 92/100
 6/50 [==>...........................] - ETA: 28:01 - loss: 0.5106 - categorical_accuracy: 0.8838 - f1: 0.8861

After each training stage quality metrics are saved in the files

In [45]:
with open('f1_scores.txt', 'a') as fout:
    for val in hist.history['f1']:
        fout.write(str(val) + '\n')
        
with open('cat_accuracies.txt', 'a') as fout:
    for val in hist.history['categorical_accuracy']:
        fout.write(str(val) + '\n')