* Imports and definitions

In [217]:
import plotly.graph_objects as go
from scipy import *
import numpy as np
import pywt
import pandas as pd
from sklearn.utils import shuffle

samplingRate = 360  # Hz (given in the doc)

In [99]:
def draw(amplitudes, indices=None):
    if type(indices) == "list":  # if we have certain indices or not
        fig_cont = go.Figure()
        fig_cont.add_trace(
            go.Scatter(x=indices, y=amplitudes, mode="lines", name="Continuous Signal")
        )
    else:
        fig_cont = go.Figure()
        fig_cont.add_trace(
            go.Scatter(y=amplitudes, mode="lines", name="Continuous Signal")
        )
    fig_cont.update_layout(
        title="Continuous Signal",
        xaxis_title="Index",
        yaxis_title="Amplitude",
        width=1000,
        height=500,
    )

    fig_cont.show()  # For notebooks

In [17]:
# HB means HeartBeat
def readHB(fileName):
    signals = []
    with open(fileName) as l:
        line = l.readline()
        while line:
            signal = line.split("|")
            signals.append([float(value) for value in signal[:-1]])
            line = l.readline()
    return signals

In [39]:
healthySignals = readHB("Data/Data/Normal&PVC/Normal_Train.txt")

In [43]:
PVCSignals = readHB("Data/Data/Normal&PVC/PVC_Train.txt")

## After some investigating , I noticed the following:
* healthySignals and PVCSignals each contain 200 signals
* Each signal has 300 points
* Max amplitude is 1.6692
* Min amplitude is -2.1939

In [77]:
draw(healthySignals[104])  # drawing a random signal to see what we are working with

In [78]:
draw(PVCSignals[180])

### There's a huge difference between the healthy and the PVC , hopefully the classification won't be hard

### Applying the Band Pass filter with cutoffs = [0.5,40]

In [120]:
b, a = signal.butter(
    N=4, Wn=[0.5, 40], btype="bandpass", fs=samplingRate
)  # getting the filter coofecients

In [126]:
filteredHealthy = [signal.lfilter(b, a, s) for s in healthySignals]
filteredPVC = [signal.lfilter(b, a, s) for s in PVCSignals]

### The effect of filtering is very clear. Removed all unneeded noise

In [129]:
draw(filteredHealthy[104])

### Now will apply DWT and take the details coefficients because they contain the difference between normal and PVC signals

In [194]:
healthyDetails = [pywt.dwt(signal, "db4")[1] for signal in filteredHealthy]
PVCDetails = [pywt.dwt(signal, "db4")[1] for signal in filteredPVC]

In [195]:
draw(healthyDetails[0])

In [196]:
draw(PVCDetails[0])

### We can see that the difference between them is noticeable 

### We will use min-max scaling so the signal would range from [0,1]

In [197]:
def normalize(signal):
    return (signal - np.min(signal)) / (np.max(signal) - np.min(signal))

In [198]:
normalizedHealthy = [normalize(signal) for signal in healthyDetails]
normalizedPVC = [normalize(signal) for signal in PVCDetails]

### A small comparison between processed signal vs raw signal

In [199]:
draw(PVCSignals[0])

In [200]:
draw(normalizedPVC[0])

* PVC : 1
* Healthy : 0

### Creating the DF and shuffling so its ready for training

In [223]:
data = normalizedHealthy + normalizedPVC
labels = list(np.zeros(200)) + list(np.ones(200))

df = pd.DataFrame({"x": data, "y": labels})
df = shuffle(df).reset_index(drop=True)

In [224]:
df

Unnamed: 0,x,y
0,"[0.5667979447327515, 0.6062392713445347, 0.439...",0.0
1,"[0.5141599655467034, 0.6620884872640939, 0.0, ...",1.0
2,"[0.5470613606687449, 0.5505755541494767, 0.536...",0.0
3,"[0.5346076602230022, 0.699752126929632, 0.0, 0...",1.0
4,"[0.4436365284277808, 0.5904921728317692, 0.0, ...",1.0
...,...,...
395,"[0.5373398858312205, 0.7009049663003447, 0.0, ...",1.0
396,"[0.5581570865545884, 0.5547087823243307, 0.573...",0.0
397,"[0.47360168867858565, 0.3274891224808873, 0.95...",1.0
398,"[0.5099643551579294, 0.65846861849301, 0.0, 0....",1.0
