# Task: Classification into 4 classes


## Idea is to use CWT images of the data and perform classification using CNNs

# Notes:
- Can't drop nans collectively because of different signal lengths
- For the imbalanced datasets we can introduce some bias towards the under-represented values

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from biosppy.signals import ecg
from scipy import signal
import tensorflow as tf
from tensorflow import keras

2022-11-28 15:45:32.227099: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-28 15:45:32.457352: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-28 15:45:32.457414: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-28 15:45:39.544359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-

## Data Import – takes a while

In [3]:


# import X_train.csv dataset into a pandas dataframe
X_train = pd.read_csv("X_train.csv", index_col="id")
y_train = pd.read_csv("y_train.csv", index_col="id")
X_test = pd.read_csv("X_test.csv", index_col="id")

In [10]:
print(np.mean(X_train.loc[0].dropna().to_numpy(dtype="float32")))
print(X_train.loc[0].dropna().to_numpy(dtype="float32").shape[0])

9.164318
16322


## Get class representatives

In [4]:
y_train_np = y_train.to_numpy()
ones = np.where(y_train_np == 0)[0]
twos = np.where(y_train_np == 1)[0]
threes = np.where(y_train_np == 2)[0]
fours = np.where(y_train_np == 3)[0]
print(len(ones))
print(len(twos))
print(len(threes))
print(len(fours))

3030
443
1474
170


## Continuous Wavelet transform:
### Create CWT images

In [None]:
# widths = np.arange(1,31)
# print("cwtmatrix shape (30 from width x # signal length")
# Class = 3
# for i in range(len(threes)):
#         print(i)
#         # print(len(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32")))
#         cwtmatr = signal.cwt(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32"), signal.ricker, widths)
#         # print(cwtmatr.shape)
#         plt.figure()
#         plt.imshow(cwtmatr, extent=[-1, 1, 31, 1], cmap='PRGn', aspect='auto',
#                 vmax=abs(cwtmatr).max(), vmin=-abs(cwtmatr).max())
#         plt.axis('off')
#         # plt.show()
#         plt.savefig("3/cwtmatr_class{}_index{}.png".format(Class, i), bbox_inches='tight', pad_inches=0)
#         plt.close()

## Import images:

In [5]:
IMG_WIDTH = 334
IMG_HEIGHT = 217
def load_image(img, training):
    '''Helper Function to load the image'''
    img = tf.image.decode_png(tf.io.read_file(img), channels=3)
    img = tf.cast(img, tf.float32)
    img = img / 127.5 - 1
    img = tf.image.resize(img, (IMG_HEIGHT, IMG_WIDTH))
    if training:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
    return img

In [10]:
image_1 = load_image("3/cwtmatr_class3_index0.png", True)

2022-11-26 16:42:48.104928: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## ToDo: 
- Extract features from heartbeat templates and concatenate with scalar output from model
- consider imbalanced dataset => add bias towards under-represented classes
- train model
- predict

## Feature Extraction: 
<!-- per signal it plots both a few possible templates as well as the average over 180 templates -->

Potential features:

In [91]:
# ecg_output = ecg.ecg(X_train.loc[ones[0]].dropna().to_numpy(dtype="float32"))
# print("classes: ", ecg_output[0].keys())
# print("shape of time series: ", ecg_output[0][0].shape)
# print("shape of filtered series: ", ecg_output[0][1].shape)
# print("number of peaks", ecg_output[0][2].shape[0])
# print("shape of templates: ", ecg_output[0][3].shape)
# print("peaks: ", ecg_output[0][2]/300)
# print("templates: ", ecg_output[0][4])

Plot Heartbeats

In [12]:
def plot_features(signal):
    r_peaks = ecg.engzee_segmenter(signal, 300)['rpeaks']
    if len(r_peaks) >= 2:
        beats = ecg.extract_heartbeats(signal, r_peaks, 300)['templates']
        # print("Beats (peaks - 1, 180 proposals of heartbeats): ", beats)
        if len(beats) != 0:
            mu = np.mean(beats, axis=0) 
            var = np.std(beats, axis=0)
            md = np.median(beats, axis=0)
            
            fig = plt.figure()
            plt.subplot(211)

            ax1 = plt.subplot(211)
            ax1.plot(beats[0,:])
            ax1.plot(beats[1,:])
            ax1.plot(beats[2,:])
            ax1.plot(beats[3,:])
            ax1.set_title("template samples")

            ax2 = plt.subplot(212)
            ax2.set_title("average template")
            ax2.plot(range(mu.shape[0]), mu, label='Average HeartBeat')
            # Fill the variance range
            ax2.fill_between(range(mu.shape[0]), mu - var, mu + var, linewidth=0, alpha=0.1)
            # Plot a median
            ax2.plot(range(md.shape[0]), md,  label='Median HeartBeat', color='#CC4F1B')
    # ecg_results = ecg.ecg(signal, sampling_rate=300, show=True)
    # return ecg_results
            

## Extract peaks and save as concatenates matrix

In [76]:
max_num_peaks = 159
peaks = np.zeros((max_num_peaks, threes.shape[0]))
for i in range(threes.shape[0]):
    data = X_train.loc[threes[i]].dropna().to_numpy(dtype="float32")
    ECG = ecg.ecg(data, sampling_rate=300, show=False)
    ecg_dimension = ECG[2].shape[0]
    buffer = np.zeros((max_num_peaks - ecg_dimension))
    peaks[:,i] = np.concatenate((ECG[2], buffer), axis=0)
np.savetxt("peaks/3/ecg_peaks.csv", peaks, delimiter=",", fmt='%0.0f')


Scale the data

In [6]:
peaks = pd.read_csv("peaks/4/ecg_peaks.csv", header=None).to_numpy(dtype="float32")
zeros = np.where(peaks == 0)
mean = np.mean(peaks, axis=0)
std = np.std(peaks, axis=0)
peaks_normalized = (peaks - mean) / std
np.savetxt("peaks/4/ecg_peaks_normalized.csv", peaks_normalized, delimiter=",", fmt='%0.7f')
peaks_normalized[zeros] = 0
np.savetxt("peaks/4/ecg_peaks_normalized_zeros.csv", peaks_normalized, delimiter=",", fmt='%0.7f')


# Classification

## Create Model

### Paper NN Structure:
Automatic ECG Classification Using Continuous Wavelet
Transform and Convolutional Neural Network

![Paper NN structure](Paper_pipeline.png "Paper NN Structure")

<!-- insert image -->
![Paper NN structure](Paper_NN_structure.png "Paper NN Structure")

In [9]:
# initialize weights according to class distribution:
data_size = 5117
class_1_size = 3030
class_2_size = 443
class_3_size = 1474
class_4_size = 170
class_weights = {0: class_1_size/data_size, 1: class_2_size/data_size, 2: class_3_size/data_size, 3: class_4_size/data_size}
class_weights

{0: 0.5921438342778972,
 1: 0.08657416454954074,
 2: 0.2880594098104358,
 3: 0.03322259136212625}

In [12]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

def create_feature_extractor_model(metrics=METRICS, output_bias=None):
    '''Create feature extractor'''
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    #determine input shape
    image_input = tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    encoder = tf.keras.Sequential()
    encoder.add(keras.layers.Conv2D(filters=16,kernel_size=7,strides=1,input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)))
    encoder.add(keras.layers.BatchNormalization())
    encoder.add(keras.layers.ReLU())
    encoder.add(keras.layers.MaxPool2D(5,5))
    
    encoder.add(keras.layers.Conv2D(32,3,1))
    encoder.add(keras.layers.BatchNormalization())
    encoder.add(keras.layers.ReLU())
    encoder.add(keras.layers.MaxPool2D(3,3))

    encoder.add(keras.layers.Conv2D(64,3,1))
    encoder.add(keras.layers.BatchNormalization())
    encoder.add(keras.layers.ReLU())
    encoder.add(keras.layers.MaxPool2D(3))

    # fully connected layer for scalar output
    encoder.add(keras.layers.Flatten())
    encoder.add(keras.layers.Dense(64, activation='relu'))
    image_encoder = keras.Model(image_input, encoder(image_input))
    # peaks encoder
    peak_encoder = keras.Input(shape=(159))

    complete_encoder = keras.layers.concatenate([image_encoder.output, peak_encoder])
    classifier = keras.layers.Dense(32, activation='relu')(complete_encoder)
    # Set initial bias towards class 1 and 3
    classifier = keras.layers.Dense(4, activation='softmax', bias_initializer=output_bias)(classifier)
    model = keras.Model([image_encoder.input, peak_encoder], classifier)

    model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=1e-3),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=metrics)

    return model


## Account for imbalanced dataset:
 Idea for imbalanced data: sample all classes to be of size 170 and proceed using ensemble method of the models

In [13]:
model = create_feature_extractor_model()
print(model.output_shape)
model.summary()
# plot model
keras.utils.plot_model(model, show_shapes=True, dpi=64)

(None, 4)
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 217, 334, 3  0           []                               
                                )]                                                                
                                                                                                  
 sequential (Sequential)        (None, 64)           99744       ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, 159)]        0           []                               
                                                                                                  
 concatenate (Concatenate)      (None, 223)          0           ['sequential[0][0

In [None]:
# image = load_image("1/1.jpg")
# peaks = pd.read_csv("peaks/1/ecg_peaks_normalized_zeros.csv", header=None).to_numpy(dtype="float32")
model.predict([image, peaks])

## Train Model

Fix seed:

In [2]:
from numpy.random import seed
seed(1)
print(np.random.rand(1))

NameError: name 'np' is not defined

In [10]:
# Larger Batch size to ensure that some examples of that class are in the batch
EPOCHS = 100
BATCH_SIZE = 2048