In [1]:
import pandas as pd
import numpy as np
import torch
from biosppy.signals import ecg
import matplotlib.pyplot as plt
import os
import pickle

In [2]:
data = pd.read_csv('original_data/train.csv', index_col='id')
train_y = data['y']
train_x = data.drop('y', axis=1)

In [3]:
# Extract peaks
# Try ecg.ecg? Returns ts, filtered ECG signal, rpeaks, heart_rate, etc
def extract_peaks(signal, sampling_rate=300):
    signal = signal.dropna().to_numpy(dtype='float32')
    r_peaks = ecg.engzee_segmenter(signal, sampling_rate=sampling_rate)['rpeaks']
    return r_peaks.tolist()

In [4]:
# Extract heartbeats
def extract_heartbeat(signal, r_peaks, sampling_rate=300):
    signal = signal.dropna().to_numpy(dtype='float32')
    beats = ecg.extract_heartbeats(signal, r_peaks, sampling_rate=sampling_rate)['templates']
    return beats

In [5]:
def ecg_signal(data, sampling_rate=300):
    r_peaks = []
    heartbeats = []
    for i in range(len(data)):
        r_peak = extract_peaks(data.loc[i], sampling_rate)
        heartbeat = extract_heartbeat(data.loc[i], r_peak, sampling_rate)
        r_peaks.append(r_peak)
        heartbeats.append(heartbeat)
        
    return r_peaks, heartbeats

In [None]:
r_peaks, heartbeats = ecg_signal(train_x)

In [None]:
all_beats = []
full_beats = []
train_y = train_y.to_numpy()
train_y_full = []

for i, signal in enumerate(heartbeats):
    if len(signal)>=1:
        all_beats.append(signal)
        full_beats.append(signal)
        train_y_full.append(train_y[i])
    else:
        signal = np.nan_to_num(signal, nan=0.0)
        all_beats.append(signal)

In [None]:
with open('data/all_beats.pkl', 'wb') as f:
    pickle.dump(all_beats, f)
    
with open('data/full_beats.pkl', 'wb') as f:
    pickle.dump(full_beats, f)
    
with open('data/train_y_full.pkl', 'wb') as f:
    pickle.dump(train_y_full, f)

In [None]:
filtered_signal = []
beats = []
for i in range(len(train_x)):
    output = ecg.ecg(train_x.loc[i].dropna().to_numpy(dtype='float32'), sampling_rate=300, show=False)
    filtered = output['filtered']
    beat = output['templates']
    filtered_signal.append(filtered)
    beats.append(beat)
    if len(filtered) < 1:
        print('filtered {} length is less than one'.format(i))
    if len(beat) < 1:
        print('Beat {} length is less than one'.format(i))

In [None]:
with open('data/filtered_ecg.pkl', 'wb') as f:
    pickle.dump(filtered_signal, f)
    
with open('data/heartbeat_templates_ecg.pkl', 'wb') as f:
    pickle.dump(beats, f)