# Preprocessing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from biosppy.signals import ecg
from scipy import signal
import tensorflow as tf
from tensorflow import keras

# Load The Data - may take a while

In [None]:
# import X_train.csv dataset into a pandas dataframe
X_train = pd.read_csv("X_train.csv", index_col="id")
y_train = pd.read_csv("y_train.csv", index_col="id")
X_test = pd.read_csv("X_test.csv", index_col="id")

## Get the class representatives

In [None]:
y_train_np = y_train.to_numpy()
ones = np.where(y_train_np == 0)[0]
twos = np.where(y_train_np == 1)[0]
threes = np.where(y_train_np == 2)[0]
fours = np.where(y_train_np == 3)[0]
np.savetxt("class_reps/ones.csv", ones, delimiter=",", fmt="%d")
np.savetxt("class_reps/twos.csv", twos, delimiter=",", fmt="%d")
np.savetxt("class_reps/threes.csv", threes, delimiter=",", fmt="%d")
np.savetxt("class_reps/fours.csv", fours, delimiter=",", fmt="%d")
print(len(ones))
print(len(twos))
print(len(threes))
print(len(fours))

## Continuous Wavelet Transform – create images and save them
have to manually change the class number 3 times here

In [None]:
widths = np.arange(1,31)
print("cwtmatrix shape (30 from width x # signal length")
Class = 3
for i in range(len(threes)):
        print(i)
        # print(len(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32")))
        cwtmatr = signal.cwt(X_train.loc[threes[i]].dropna().to_numpy(dtype="float32"), signal.ricker, widths)
        # print(cwtmatr.shape)
        plt.figure()
        plt.imshow(cwtmatr, extent=[-1, 1, 31, 1], cmap='PRGn', aspect='auto',
                vmax=abs(cwtmatr).max(), vmin=-abs(cwtmatr).max())
        plt.axis('off')
        # plt.show()
        plt.savefig("cwt_images/3/cwtmatr_class{}_index{}.png".format(Class, i), bbox_inches='tight', pad_inches=0)
        plt.close()

## Choice of additional features to extract from the signals

In [None]:
ecg_output = ecg.ecg(X_train.loc[ones[0]].dropna().to_numpy(dtype="float32"))
print("classes: ", ecg_output[0].keys())
print("shape of time series: ", ecg_output[0][0].shape)
print("shape of filtered series: ", ecg_output[0][1].shape)
print("number of peaks", ecg_output[0][2].shape[0])
print("shape of templates: ", ecg_output[0][3].shape)
print("peaks: ", ecg_output[0][2]/300)
print("templates: ", ecg_output[0][4])

Plot Heartbeats for understanding

In [None]:
def plot_features(signal):
    r_peaks = ecg.engzee_segmenter(signal, 300)['rpeaks']
    if len(r_peaks) >= 2:
        beats = ecg.extract_heartbeats(signal, r_peaks, 300)['templates']
        # print("Beats (peaks - 1, 180 proposals of heartbeats): ", beats)
        if len(beats) != 0:
            mu = np.mean(beats, axis=0) 
            var = np.std(beats, axis=0)
            md = np.median(beats, axis=0)
            
            fig = plt.figure()
            plt.subplot(211)

            ax1 = plt.subplot(211)
            ax1.plot(beats[0,:])
            ax1.plot(beats[1,:])
            ax1.plot(beats[2,:])
            ax1.plot(beats[3,:])
            ax1.set_title("template samples")

            ax2 = plt.subplot(212)
            ax2.set_title("average template")
            ax2.plot(range(mu.shape[0]), mu, label='Average HeartBeat')
            # Fill the variance range
            ax2.fill_between(range(mu.shape[0]), mu - var, mu + var, linewidth=0, alpha=0.1)
            # Plot a median
            ax2.plot(range(md.shape[0]), md,  label='Median HeartBeat', color='#CC4F1B')
    # ecg_results = ecg.ecg(signal, sampling_rate=300, show=True)
    # return ecg_results

## Extract Peaks and store as concatenated matrix

In [None]:
max_num_peaks = 159
peaks = np.zeros((max_num_peaks, threes.shape[0]))
for i in range(threes.shape[0]):
    data = X_train.loc[threes[i]].dropna().to_numpy(dtype="float32")
    ECG = ecg.ecg(data, sampling_rate=300, show=False)
    ecg_dimension = ECG[2].shape[0]
    buffer = np.zeros((max_num_peaks - ecg_dimension))
    peaks[:,i] = np.concatenate((ECG[2], buffer), axis=0)
np.savetxt("peaks/3/ecg_peaks.csv", peaks, delimiter=",", fmt='%0.0f')

### Scale the data

In [None]:
peaks = pd.read_csv("peaks/4/ecg_peaks.csv", header=None).to_numpy(dtype="float32")
zeros = np.where(peaks == 0)
mean = np.mean(peaks, axis=0)
std = np.std(peaks, axis=0)
peaks_normalized = (peaks - mean) / std
np.savetxt("peaks/4/ecg_peaks_normalized.csv", peaks_normalized, delimiter=",", fmt='%0.7f')
peaks_normalized[zeros] = 0
np.savetxt("peaks/4/ecg_peaks_normalized_zeros.csv", peaks_normalized, delimiter=",", fmt='%0.7f')