# Task: Classification into 4 classes


## Idea is to use CWT images of the data and perform classification using CNNs

# Notes:
- Can't drop nans collectively because of different signal lengths
- For the imbalanced datasets we can introduce some bias towards the under-represented values

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from biosppy.signals import ecg
from scipy import signal
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Data Import – takes a while

In [9]:


# import X_train.csv dataset into a pandas dataframe
X_train = pd.read_csv("X_train.csv", index_col="id")
y_train = pd.read_csv("y_train.csv", index_col="id")
X_test = pd.read_csv("X_test.csv", index_col="id")

In [10]:
print(np.mean(X_train.loc[0].dropna().to_numpy(dtype="float32")))
print(X_train.loc[0].dropna().to_numpy(dtype="float32").shape[0])

9.164318
16322


## Get class representatives

In [11]:
y_train_np = y_train.to_numpy()
ones = np.where(y_train_np == 0)[0]
twos = np.where(y_train_np == 1)[0]
threes = np.where(y_train_np == 2)[0]
fours = np.where(y_train_np == 3)[0]
print(len(ones))
print(len(twos))
print(len(threes))
print(len(fours))

3030
443
1474
170


Basic Signal Plot attempt

In [None]:
# X_train_trimmed_0 = X_train.loc[0,].dropna().to_numpy(dtype="float32")
# # print("Training data shape: ", X_train.loc[0].dropna().to_numpy(dtype="float32").shape)
# x = np.linspace(0,X_train_trimmed_0.shape[0],X_train_trimmed_0.shape[0])
# plt.plot(x, X_train_trimmed_0)
# plt.show()

## Continuous Wavelet transform:
### Create CWT images

In [None]:
# widths = np.arange(1,31)
# print("cwtmatrix shape (30 from width x # signal length")
# Class = 3
# for i in range(len(threes)):
#         print(i)
#         # print(len(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32")))
#         cwtmatr = signal.cwt(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32"), signal.ricker, widths)
#         # print(cwtmatr.shape)
#         plt.figure()
#         plt.imshow(cwtmatr, extent=[-1, 1, 31, 1], cmap='PRGn', aspect='auto',
#                 vmax=abs(cwtmatr).max(), vmin=-abs(cwtmatr).max())
#         plt.axis('off')
#         # plt.show()
#         plt.savefig("3/cwtmatr_class{}_index{}.png".format(Class, i), bbox_inches='tight', pad_inches=0)
#         plt.close()

# Classification


## Import images:

In [53]:
IMG_WIDTH = 334
IMG_HEIGHT = 217
def load_image(img, training):
    '''Helper Function to load the image'''
    img = tf.image.decode_png(tf.io.read_file(img), channels=3)
    img = tf.cast(img, tf.float32)
    img = img / 127.5 - 1
    img = tf.image.resize(img, (IMG_HEIGHT, IMG_WIDTH))
    if training:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
    return img

In [10]:
image_1 = load_image("3/cwtmatr_class3_index0.png", True)

2022-11-26 16:42:48.104928: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Account for imbalanced dataset:
 Idea for imbalanced data: sample all classes to be of size 170 and proceed using ensemble method of the models

## Create Model

### Paper NN Structure:
Automatic ECG Classification Using Continuous Wavelet
Transform and Convolutional Neural Network

![Paper NN structure](Paper_pipeline.png "Paper NN Structure")

<!-- insert image -->
![Paper NN structure](Paper_NN_structure.png "Paper NN Structure")

In [51]:
def create_feature_extractor_model():
    '''Create feature extractor'''
    #determine input shape
    model = keras.Sequential([
        tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        tf.keras.layers.Conv2D(filters=16,kernel_size=7,strides=1,input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.MaxPool2D(5,5),
        
        tf.keras.layers.Conv2D(32,3,1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.MaxPool2D(3,3),

        tf.keras.layers.Conv2D(64,3,1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.MaxPool2D(3),

        # fully connected layer for scalar output
        tf.keras.layers.Flatten(),
        # Concatenation layer
    ])

    return model


In [54]:
model = create_feature_extractor_model()
print(model.output_shape)
model.summary()

(None, 1152)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 211, 328, 16)      2368      
                                                                 
 batch_normalization (BatchN  (None, 211, 328, 16)     64        
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                (None, 211, 328, 16)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 42, 65, 16)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 40, 63, 32)        4640      
                                                                 
 batch_normalization_1 (Batc  (None, 40, 63

2022-11-27 16:53:02.821041: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## ToDo: 
- Extract features from heartbeat templates and concatenate with scalar output from model
- consider imbalanced dataset => add bias towards under-represented classes
- train model
- predict

## Feature Extraction: 
<!-- per signal it plots both a few possible templates as well as the average over 180 templates -->

In [3]:
# def plot_features(signal):
#     r_peaks = ecg.engzee_segmenter(signal, 300)['rpeaks']
#     if len(r_peaks) >= 2:
#         beats = ecg.extract_heartbeats(signal, r_peaks, 300)['templates']
#         # print("Beats (peaks - 1, 180 proposals of heartbeats): ", beats)
#         if len(beats) != 0:
#             # for i in range(5):
#             #     plt.plot(beats[i])
#             #     plt.show()
#             mu = np.mean(beats, axis=0) 
#             var = np.std(beats, axis=0)
#             md = np.median(beats, axis=0)
            
#             fig = plt.figure()
#             plt.subplot(211)

#             ax1 = plt.subplot(211)
#             ax1.plot(beats[0,:])
#             ax1.plot(beats[1,:])
#             ax1.plot(beats[2,:])
#             ax1.plot(beats[3,:])
#             ax1.set_title("template samples")

#             ax2 = plt.subplot(212)
#             ax2.set_title("average template")
#             ax2.plot(range(mu.shape[0]), mu, label='Average HeartBeat')
#             # Fill the variance range
#             ax2.fill_between(range(mu.shape[0]), mu - var, mu + var, linewidth=0, alpha=0.1)
#             # Plot a median
#             ax2.plot(range(md.shape[0]), md,  label='Median HeartBeat', color='#CC4F1B')

In [12]:
def plot_features(signal):
    r_peaks = ecg.engzee_segmenter(signal, 300)['rpeaks']
    if len(r_peaks) >= 2:
        beats = ecg.extract_heartbeats(signal, r_peaks, 300)['templates']
        # print("Beats (peaks - 1, 180 proposals of heartbeats): ", beats)
        if len(beats) != 0:
            mu = np.mean(beats, axis=0) 
            var = np.std(beats, axis=0)
            md = np.median(beats, axis=0)
            
            fig = plt.figure()
            plt.subplot(211)

            ax1 = plt.subplot(211)
            ax1.plot(beats[0,:])
            ax1.plot(beats[1,:])
            ax1.plot(beats[2,:])
            ax1.plot(beats[3,:])
            ax1.set_title("template samples")

            ax2 = plt.subplot(212)
            ax2.set_title("average template")
            ax2.plot(range(mu.shape[0]), mu, label='Average HeartBeat')
            # Fill the variance range
            ax2.fill_between(range(mu.shape[0]), mu - var, mu + var, linewidth=0, alpha=0.1)
            # Plot a median
            ax2.plot(range(md.shape[0]), md,  label='Median HeartBeat', color='#CC4F1B')
    # ecg_results = ecg.ecg(signal, sampling_rate=300, show=True)
    # return ecg_results
            

In [63]:
# for i in range(5):
    # plot_features(X_train.loc[ones[i]].dropna().to_numpy(dtype="float32"))
    # plot_features(X_train.loc[twos[i]].dropna().to_numpy(dtype="float32"))
    # plot_features(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32"))
    # plot_features(X_train.loc[fours[i]].dropna().to_numpy(dtype="float32"))
max = 0
for i in range(fours.shape[0]):
    data = X_train.loc[fours[i]].dropna().to_numpy(dtype="float32")
    ECG = ecg.ecg(data, sampling_rate=300, show=False)
    if ECG[2].shape[0] > max:
        max = ECG[2].shape[0]
    # np.savetxt("peaks/4/ecg_peaks_class1_index" + str(i) + ".csv", ECG[2], delimiter=",", fmt='%0.0f')
# data = X_train.loc[0].dropna().to_numpy(dtype="float32")


Potential features:

In [49]:
print("classes: ", ecg_output[0].keys())
print("shape of time series: ", ecg_output[0][0].shape)
print("shape of filtered series: ", ecg_output[0][1].shape)
print("number of peaks", ecg_output[0][2].shape[0])
print("shape of templates: ", ecg_output[0][3].shape)
print("peaks: ", ecg_output[0][2]/300)
print("templates: ", ecg_output[0][4])

classes:  ['ts', 'filtered', 'rpeaks', 'templates_ts', 'templates', 'heart_rate_ts', 'heart_rate']
shape of time series:  (16322,)
shape of filtered series:  (16322,)
number of peaks 66
shape of templates:  (180,)
peaks:  [ 0.33666667  0.66333333  1.49666667  2.31666667  3.16333333  4.01333333
  4.86333333  5.7         6.55333333  7.4         8.23666667  9.07666667
  9.92       10.76333333 11.58666667 12.42       13.27       14.10333333
 14.92666667 15.75666667 16.57       17.37666667 18.17666667 18.97333333
 19.76333333 20.54333333 21.33       22.11666667 22.90666667 23.68333333
 24.44       25.2        25.98       26.78333333 27.59       28.39666667
 29.22333333 30.04666667 30.88333333 31.71666667 32.54       33.37666667
 34.20333333 35.03       35.85       36.68666667 37.52333333 38.35666667
 39.18666667 40.01666667 40.85       41.68333333 42.52333333 43.36666667
 44.19       45.01333333 45.84666667 46.68666667 47.53       48.36333333
 49.2        50.04       50.87666667 51.70666667

## Classification