In [43]:
import pandas as pd
import biosppy.signals.ecg as ecg
import numpy as np
import pickle

train = pd.read_csv('train.csv')
train_features = train.iloc[:, 2:].to_numpy()
train_labels = train.iloc[:, 1].to_numpy()

In [44]:
def valid_heartbeats(samples):
    heartbeats_list = []

    for idx, sequence in enumerate(samples):
        output = ecg.engzee_segmenter(signal=sequence, sampling_rate=300)['rpeaks']
        rpeaks = output.tolist()

        heartbeats = ecg.extract_heartbeats(signal=sequence, rpeaks=rpeaks, sampling_rate=300)['templates']
        valid_heartbeats = [heartbeat for heartbeat in heartbeats if not np.isnan(heartbeat).any()]

        heartbeats_list.append(valid_heartbeats)

    return heartbeats_list

In [45]:
train_valid_heartbeats = valid_heartbeats(train_features)

In [47]:
max_length = max(len(sublist) for sublist in train_valid_heartbeats)
min_length = min(len(sublist) for sublist in train_valid_heartbeats)
average_length = sum(len(sublist) for sublist in train_valid_heartbeats) / len(train_valid_heartbeats)

print(f"Maximum number of heartbeats: {max_length}")
print(f"Minimum number of heartbeats: {min_length}")
print(f"Average number of heartbeats: {average_length:.2f}")

Maximum number of heartbeats: 159
Minimum number of heartbeats: 0
Average number of heartbeats: 34.78


In [55]:
with open('train_data.pkl', 'wb') as f:
    pickle.dump({'X': train_valid_heartbeats, 'y': train_labels}, f)

In [49]:
test = pd.read_csv('test.csv')
test_features = test.iloc[:, 1:].to_numpy()

In [50]:
print(len(test_features))

3411


In [51]:
test_valid_heartbeats = valid_heartbeats(test_features)

In [52]:
print(len(test_valid_heartbeats))

3411


In [53]:
with open('test_data.pkl', 'wb') as f:
    pickle.dump({'X': test_valid_heartbeats}, f)

In [54]:
max_length = max(len(sublist) for sublist in test_valid_heartbeats)
min_length = min(len(sublist) for sublist in test_valid_heartbeats)
average_length = sum(len(sublist) for sublist in test_valid_heartbeats) / len(test_valid_heartbeats)

print(f"Maximum number of heartbeats: {max_length}")
print(f"Minimum number of heartbeats: {min_length}")
print(f"Average number of heartbeats: {average_length:.2f}")

Maximum number of heartbeats: 172
Minimum number of heartbeats: 0
Average number of heartbeats: 34.64
