In [1]:
import pandas as pd
import numpy as np
import wfdb
import os
import urllib.request
import matplotlib.pyplot as plt
import random
from scipy import signal
import glob
from tqdm import tqdm

## Reading Data

In [None]:
data = wfdb.rdrecord("s0030-04051907",sampfrom = 1800000, sampto = 25316395, channels=[0,1])
# p_signal convert wfdb object data to numpy array data
data.p_signal.shape

In [None]:
data.p_signal

In [None]:
wfdb.plot_wfdb(record = data, title = "Medical Data")

In [None]:
data1 = data.p_signal

plt.figure(figsize = (15,6))
plt.plot(list(range(1000,6000)), data1[1000:6000, 0])
plt.xlabel("Time")
plt.ylabel("RV5/6-L clavicle")
plt.show()

plt.figure(figsize = (15,6))
plt.plot(list(range(1000,6000)), data1[1000:6000, 1])
plt.xlabel("Time")
plt.ylabel("V1/2-L clavicle")
plt.show()

## Splitting into 100 random non-overlapping parts - Size:6000

In [None]:
num = [6000*i + x for i, x in enumerate(sorted(random.sample(range(3000), 100)))]
datalist = []
for i in num:
    datalist.append(data1[i:6000+i,:])

In [None]:
datalist = np.array(datalist)
# Min Max normalization
normalizedData = (datalist-np.min(datalist))/(np.max(datalist)-np.min(datalist))
del datalist
# saving data on disk
np.save('ecg_dataset.npy', normalizedData)
print(normalizedData.shape)
normalizedData

## Reading Data and applying pre-processing

In [None]:
data_array = np.load('ecg_dataset.npy')
print(data_array.shape)
data_array

In [4]:
# removing small random fluctions
# Butter LowPass Filter: The cutoff frequency and polynomial order were 11 Hz and two (quadratic)
def butter_lowpass_filter(data, cutoff_freq, nyq_freq, order=4):
    normal_cutoff = float(cutoff_freq) / nyq_freq
    b, a = signal.butter(order, normal_cutoff, btype='lowpass')
    print(a,b)
    y = signal.filtfilt(b, a, data, padlen=1)
    return y

In [None]:
# smooth signals
smoothened_signals = butter_lowpass_filter(data_array, 11, 6000, order = 2)

# high frequency signals
high_freq_signals = data_array - smoothened_signals

# First, Second and Third Order Derivative of smooth signals
gradient = np.gradient(smoothened_signals)

In [None]:
def indexFunc(i):
    if i == 0:
        return "Normalized"
    elif i == 1:
        return "Smoothened Signals"
    elif i == 2:
        return "High Frequency"
    elif i == 3:
        return "First Order Derivative"
    elif i == 4:
        return "Second Order Derivative"

var = [data_array, smoothened_signals, high_freq_signals, gradient[0], gradient[1]]

for i in range(len(var)):
    plt.figure(figsize = (15,6))
    plt.plot(list(range(6000)), var[i][2, : , 0])
    plt.title(indexFunc(i))
    plt.xlabel("Time")
    plt.ylabel("RV5/6-L clavicle")
    plt.savefig(indexFunc(i)+'.png')
    plt.show()

# plt.figure(figsize = (15,6))
# plt.plot(list(range(6000)), smoothened_signals[2, : , 0])
# plt.title("Smoothened")
# plt.xlabel("Time")
# plt.ylabel("RV5/6-L clavicle")
# plt.show()

# plt.figure(figsize = (15,6))
# plt.plot(list(range(6000)), high_freq_signals[2, : , 0])
# plt.title("High Frequency")
# plt.xlabel("Time")
# plt.ylabel("RV5/6-L clavicle")
# plt.show()

## Data classification

In [None]:
directory = "Dataset/"
os.makedirs(directory, exist_ok = True)
df = pd.DataFrame(columns=["File", "Class"])
counter = 0
for index in tqdm(range(len(data_array))):
    for i in range(2):
        # saving data
        filename = "ecg_{}_{}.npy".format(counter, i)
        data = data_array[index,:,i].reshape((6000,1))
        smoothened = smoothened_signals[index,:,i].reshape((6000,1))
        high_freq = high_freq_signals[index, :, i].reshape((6000,1))
        grad1 = gradient[0][index, :, i].reshape((6000,1))
        grad2 = gradient[1][index, :, i].reshape((6000,1))
        array = np.concatenate((data, smoothened, high_freq, grad1, grad2), axis=1)
        np.save(directory+filename, array)
        # saving image
        plt.plot(list(range(6000)), data_array[index,:,i])
        plt.xlabel("Time")
        plt.ylabel("RV5/6-L clavicle")
        plt.savefig(directory+"ecg_{}_{}.png".format(counter, i))
        plt.clf()
        # saving CSv data
        df.loc[len(df.index)] = [filename, None]
    counter+=1

df.to_csv(directory + "labels.csv", index=False)

# Dynamic NPY File Generation

In [2]:
loc = "New Dataset/"
files = glob.glob(loc + "*.dat")
for file in tqdm(files):
    basename = file.split(".")[0]
    data = wfdb.rdrecord(basename, sampfrom = 1800000, channels=[0,1])
    data = data.p_signal
    num = [6000*i + x for i, x in enumerate(sorted(random.sample(range(5000), 100)))]
    datalist = []
    for i in num:
        datalist.append(data[i:6000+i,:])
    datalist = np.array(datalist)
    # Min Max normalization
    normalizedData = (datalist-np.min(datalist))/(np.max(datalist)-np.min(datalist))
    del datalist
    # saving data on disk
    name = basename.split("/")[-1]
    np.save(name+'.npy', normalizedData)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.89s/it]


In [5]:
directory = "Dataset - 1/"
loc = "New Dataset/"

os.makedirs(directory, exist_ok = True)
df = pd.DataFrame(columns=["File", "Class"])
counter = 0

files = glob.glob(loc + "*.npy")
for file in files:
    filename = file.split(".")[0].split("\\")[-1].split("-")[0]
    print("Processing File: ", file)
    data_array = np.load(file)
    # smooth signals
    smoothened_signals = butter_lowpass_filter(data_array, 11, 6000, order = 2)
    # high frequency signals
    high_freq_signals = data_array - smoothened_signals
    # First, Second and Third Order Derivative of smooth signals
    gradient = np.gradient(smoothened_signals)
    
    for index in tqdm(range(len(data_array))):
        for i in range(2):
            # saving data
            new_filename = "{}_{}_{}.npy".format(counter, filename, i)
            data = data_array[index,:,i].reshape((6000,1))
            smoothened = smoothened_signals[index,:,i].reshape((6000,1))
            high_freq = high_freq_signals[index, :, i].reshape((6000,1))
            grad1 = gradient[0][index, :, i].reshape((6000,1))
            grad2 = gradient[1][index, :, i].reshape((6000,1))
            array = np.concatenate((data, smoothened, high_freq, grad1, grad2), axis=1)
            np.save(directory+new_filename, array)
            # saving image
            plt.plot(list(range(6000)), data_array[index,:,i])
            plt.xlabel("Time")
            plt.ylabel("RV5/6-L clavicle")
            plt.savefig(directory+"{}_{}_{}.png".format(counter, filename, i))
            plt.clf()
            # saving CSv data
            df.loc[len(df.index)] = [new_filename, None]
        counter+=1

df.to_csv(directory + "labels.csv", index=False)

Processing File:  New Dataset\s0324-07030105.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.21it/s]


Processing File:  New Dataset\s0324-07030106.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.43it/s]


Processing File:  New Dataset\s0363-07072603.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.24it/s]


Processing File:  New Dataset\s0363-07072604.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.37it/s]


Processing File:  New Dataset\s0378-07100403.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.35it/s]


Processing File:  New Dataset\s0378-07100404.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.29it/s]


Processing File:  New Dataset\s0397-08012203.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.38it/s]


Processing File:  New Dataset\s0397-08012205.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.29it/s]


Processing File:  New Dataset\s0402-08010703.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.47it/s]


Processing File:  New Dataset\s0402-08010704.npy
[ 1.         -1.99185476  0.9918878 ] [8.25954832e-06 1.65190966e-05 8.25954832e-06]


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.29it/s]


<Figure size 432x288 with 0 Axes>