In [2]:
import csv
import os

import biosppy.signals.ecg as ecg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Helper functions

In [4]:
# Function to extract r-peaks of single ECG signal
# Discards NaN values
def extract_r_peaks(signal, sampling_rate=300):
    signal = signal.dropna().to_numpy(dtype='float32')
    try:
        r_peaks = ecg.engzee_segmenter(signal, sampling_rate=sampling_rate)['rpeaks']
        return r_peaks.tolist()
    except ValueError as e:
        print(f"Error processing row: {e}")
        return []

In [5]:
# Function to extract heartbeats for a single signal, given the signal and r_peaks
def extract_heartbeats(signal, r_peaks, sampling_rate=300):
    try:
        heartbeats = ecg.extract_heartbeats(signal, r_peaks, sampling_rate=sampling_rate)
        return heartbeats
    except ValueError as e:
        print(f"Error in heartbeat extraction: {e}")
        return []

In [6]:
# Full heartbeat extraction pipeline
# Assumes given data has no NaN values
def process_ecg_dataset(data, sampling_rate=300):
    r_peaks_list = []
    heartbeats_list = []

    for i in range(len(data)):
        r_peaks = extract_r_peaks(data.loc[i], sampling_rate)
        r_peaks_list.append(r_peaks)

        heartbeats = extract_heartbeats(data.loc[i], r_peaks, sampling_rate)['templates']
        heartbeats_list.append(heartbeats)

    return r_peaks_list, heartbeats_list

### Extract filtered heartbeats

In [8]:
def filtered_ecg_dataset(data, sampling_rate=300):
    filtered_signal = []
    beats = []
    for i in range(len(data)):
        output = ecg.ecg(data.loc[i].dropna().to_numpy(dtype='float32'), sampling_rate=sampling_rate, show=False)
        filtered = output['filtered']
        beat = output['templates']
        filtered_signal.append(filtered)
        beats.append(beat)
        if len(filtered) < 1:
            print('filtered {} length is less than one'.format(i))
        if len(beat) < 1:
            print('Beat {} length is less than one'.format(i))
    return filtered_signal, beats

## Data processing

In [30]:
# dataTrain = pd.read_csv('data/train.csv', index_col='id')
# r_peaks_list, heartbeats_list = process_ecg_dataset(dataTrain)

dataTest = pd.read_csv('data/test.csv', index_col='id')
# r_peaks_list, heartbeats_list = process_ecg_dataset(dataTest)

In [9]:
train_x = dataTrain.drop('y', axis=1)
filtered_signals, beats = filtered_ecg_dataset(train_x)

In [None]:
filtered_signals, beats_test = filtered_ecg_dataset(dataTest)

In [25]:
# Processing for filtered data
filtered_heartbeats_list = []
for i, signal in enumerate(beats):
    if(len(signal)>1):
        if np.isnan(signal[-1]).any():
            filtered_heartbeats_list.append(signal[:-1])
        else:
            filtered_heartbeats_list.append(signal)
    elif(len(signal)==1):
        signal = np.nan_to_num(signal, nan=0.0)
        filtered_heartbeats_list.append(signal)
    else:
        filtered_heartbeats_list.append(np.zeros((1,180)))

In [None]:
# Processing for filtered data
filtered_heartbeats_list = []
for i, signal in enumerate(beats_test):
    if(len(signal)>1):
        if np.isnan(signal[-1]).any():
            filtered_heartbeats_list.append(signal[:-1])
        else:
            filtered_heartbeats_list.append(signal)
    elif(len(signal)==1):
        signal = np.nan_to_num(signal, nan=0.0)
        filtered_heartbeats_list.append(signal)
    else:
        filtered_heartbeats_list.append(np.zeros((1,180)))

In [31]:
import pickle

# # Process data and save it since processing takes a looong time
# result_data = pd.DataFrame({
#     'y': dataTrain['y'],
#     'heartbeat': filtered_heartbeats_list
# })

# # Save it as a pkl file because saving lists of lists in csv makes parsing it afterwards a hassle
# with open('data/processed_train_dataset.pkl', 'wb') as f:
#     pickle.dump(result_data, f)


# For test set
result_data = pd.DataFrame({
    'heartbeat': beats_test 
})

with open('data/processed_test_dataset.pkl', 'wb') as f:
    pickle.dump(result_data, f)

NameError: name 'beats_test' is not defined