In [15]:
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import wfdb
from tqdm.auto import tqdm

In [10]:
with open('./data/mit-bih-arrhythmia-database-1.0.0/RECORDS', "r") as f:
    records = f.read().split("\n")

In [6]:
ansi_map = {
    ".": "N",
    "N": "N",
    "L": "N",
    "R": "N",
    "e": "N",
    "j": "N",
    "A": "S",
    "J": "S",
    "a": "S",
    "S": "S",
    "E": "V",
    "V": "V",
    "F": "F",
    "/": "Q",
    "f": "Q",
    "Q": "Q"
}

In [22]:
heartbeat_data = [] # list of lists (by record) of heartbeats
heartbeat_labels = [] # list of lists (by record) of labels
for record_id in tqdm(records):
    record = wfdb.rdrecord('./data/mit-bih-arrhythmia-database-1.0.0/' + record_id)
    annotation = wfdb.rdann('./data/mit-bih-arrhythmia-database-1.0.0/' + record_id, 'atr')
    heartbeats = []
    labels = []
    for (idx, hb_class) in zip(annotation.sample, annotation.symbol):
        # Get +- 180 samples around the heartbeat
        if hb_class not in ansi_map:
            continue
        try:
            heartbeat = record.p_signal[idx - 180:idx + 180, 0] # only use first channel
        except Exception:
            continue
        if len(heartbeat) != 360:
            continue
        heartbeats.append(heartbeat)
        labels.append(ansi_map[hb_class])
    heartbeat_data.append(heartbeats)
    heartbeat_labels.append(labels)

  0%|          | 0/48 [00:00<?, ?it/s]

In [23]:
for (record_id, heartbeats, labels) in zip(records, heartbeat_data, heartbeat_labels):
    np.savez_compressed('./data/heartbeats/' + record_id, heartbeats=heartbeats, labels=labels)

In [24]:
npzfile = np.load('./data/heartbeats/100.npz')
print(npzfile.files)

['heartbeats', 'labels']


In [25]:
npzfile["heartbeats"].shape

(2271, 360)