# ECG Classification

## Project Goal
The goal of this project is to create a deep neural network able to classify ECG signals and cardiac arrhythmia with a high degree of accuracy. Alongside this goal is the aim to explore the implications of introducing simulated data into the training sets in hopes of improving the overall accuracy of the model.

The model created and used for this project is based off of the model preseneted in the Yildrim20 paper :
https://www.sciencedirect.com/science/article/abs/pii/S016926072031573X


### Firstly import all of the neccesary libraries and mount google drive to load data


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
import scipy.io

from keras.models import Sequential, load_model
from keras.layers import Conv1D, MaxPooling1D, BatchNormalization, LeakyReLU, Dropout, LSTM, Flatten, Dense 
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelBinarizer
from matplotlib import pyplot

from google.colab import files
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Function to load the relevant data
Ammend the file path based upon where data is stored

In [None]:
def load_data():

  # Import the data
  data = pd.read_csv("/content/drive/My Drive/ECG/ammendedData/mergedData.csv").values
  labels = pd.read_csv("/content/drive/My Drive/ECG/ammendedData/mergedLabels.csv").values

  # Converts labels to a binary format
  encoder = LabelBinarizer()
  binaryLabels = encoder.fit_transform(labels)

  return data, binaryLabels

### Function to create the model


In [None]:
def create_model():
  # Initialise model
  model = Sequential()

  # 'Input' layer chunk
  model.add( Conv1D( filters=64, kernel_size=21, strides=11, input_shape=[5000, 1] ) )
  model.add( MaxPooling1D( pool_size=2 ) )
  model.add( BatchNormalization() )
  model.add( LeakyReLU( alpha=0.1 ) )
  model.add( Dropout( rate=0.3 ) )

  # Hidden layers
  model.add( Conv1D( filters=64, kernel_size=7, strides=1, input_shape=[64, 226] ) )
  model.add( MaxPooling1D( pool_size=2 ) )
  model.add( BatchNormalization() )

  model.add( Conv1D( filters=128, kernel_size=5, strides=1, input_shape=[64, 110] ) )
  model.add( MaxPooling1D( pool_size=2 ) )

  model.add( Conv1D( filters=256, kernel_size=13, strides=1, input_shape=[128, 53] ) )
  model.add( Conv1D( filters=512, kernel_size=7, strides=1, input_shape=[256, 41] ) )
  model.add( Dropout( rate=0.3 ) )

  model.add( Conv1D( filters=256, kernel_size=9, strides=1, input_shape=[512, 35] ) )
  model.add( MaxPooling1D( pool_size=2 ) )

  # 'Output' layer chunk
  model.add( LSTM( units=128, return_sequences=True) )
  model.add( Flatten() )
  model.add( Dense( units=64, activation='relu' ) )
  model.add( Dense( units=1, activation='sigmoid' ) )

  # Create optimizer with custom learning rate
  opt = tf.keras.optimizers.Adam(learning_rate=0.0002)

  # Compile the model
  model.compile( loss="binary_crossentropy", optimizer=opt, metrics=['binary_accuracy', tf.keras.metrics.TruePositives(), tf.keras.metrics.FalsePositives(), tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalseNegatives()] )

  return model


### Train the model
Ammend the file path in model checkpoint in order too save model in desired file

In [None]:
def train_model(model, X_train, Y_train, X_test, Y_test):
  X_train = X_train[..., None]
  X_test = X_test[..., None]
  #X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
  #X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)

  earlyStopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  modelCheckpoint = ModelCheckpoint('/content/drive/My Drive/ECG/real75sim25Model/model-{epoch:03d}-{val_loss:03f}.hdf5', monitor='val_loss')
  reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001 )

  history = model.fit( X_train, Y_train,  epochs=60, validation_data=(X_test,Y_test), callbacks=[earlyStopping, modelCheckpoint, reduceLR] )
  
  return history

In [None]:
def getLabelIndex( train_index, data, labels ):

  # initialise array
  normal_index = []
  atrial_index = []

  # for each element only keep desired  N=1  A=0
  for i in range(train_index.size):
    if labels[i] == 1:
      normal_index.append(train_index[i])
    else:
      atrial_index.append(train_index[i])

  # convert to numpy array
  normal_index = np.array(normal_index)
  atrial_index = np.array(atrial_index)

  return normal_index, atrial_index

### Create Kfolds and train the CNN



In [None]:
total_folds=5
i=1

# Load the data
data, labels = load_data()

# Note all simulated data are appeneded to the end of file
# No way to check as all stored in identical way to real data so needs hardcoded
total_simulated = 622

# Total size of entire dataset
total_elements = int(data.size / 5000) # 5000 becuase length of each individual record

# Total real data in dataset
total_real = total_elements - total_simulated

# Create index for simulated data
if (total_simulated>0):
  simulated_index = np.arange(total_real, total_elements)

# Create K folds ensuring theyre balanced
skf = StratifiedKFold(total_folds,  True)

# Train CNN
# Select only the real data to make splits to ensure test split is only real data
for train_index, test_index in skf.split( data[:total_real, :], labels[:total_real, :] ):

  # Use to get classes index    #### Comment out next 2 lines if not needed for model ####
  normal_index, atrial_index = getLabelIndex(train_index, data, labels)
  train_index = normal_index

  # Add in the simulated datas index back to the training set so its used for training
  train_index = np.concatenate( (train_index, atrial_index[:373]) ) #### Adjust index of atrial_index to control amount of atrial data comment out line if 0 ####
  train_index = np.concatenate( (train_index, simulated_index[:125]) ) #### Adjust index of simulated_index to control amount of simulated data comment out line if 0 ####

  print("Running Fold", i, "/", total_folds)
  print('-----------------------------------------------------')

  model = create_model()

  # Create the test and training sets with relevant labes
  X_train, X_test = data[train_index], data[test_index]
  Y_train, Y_test = labels[train_index], labels[test_index]

  # Train the model
  history = train_model(model, X_train, Y_train, X_test, Y_test)

  # Save history
  filename = "/content/drive/My Drive/ECG/history/real75sim25_history_" + str(i) + ".csv"
  pd.DataFrame.from_dict(history.history).to_csv(filename, index=False)

  i=i+1

Running Fold 1 / 5
-----------------------------------------------------
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Running Fold 2 / 5
-----------------------------------------------------
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40

In [None]:
print("Average test loss: ", np.average(history.history['val_loss']))
