In [1]:
# !aws s3 sync --no-sign-request s3://physionet-open/ltafdb/1.0.0/ ./data/ltafdb/

In [1]:
import os

import wfdb
import numpy as np
import polars as pl
from tqdm.notebook import tqdm
from wfdb.processing import calc_rr
from scipy.signal import butter, filtfilt

import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Download the dataset
os.makedirs('./data/ltafdb', exist_ok=True)
if not os.path.exists('./data/ltafdb/75.dat'):
    wfdb.dl_database('ltafdb', './data/ltafdb')
else:
    print('Dataset already downloaded')

Dataset already downloaded


In [3]:
def list_records(path: str) -> list[str]:
    records = []

    for root, _, files in os.walk(path):
        for file in files:
            if file.endswith(".dat"):
                records.append(os.path.join(root, file.replace(".dat", "")))
        
    return records

def load_record(record: str) -> tuple[pl.DataFrame, np.ndarray, dict]:
    qrs_annotations = wfdb.rdann(record, "qrs")
    annotations = wfdb.rdann(record, "atr")
    signals, fields = wfdb.rdsamp(record)

    annotation_df = pl.from_dict({
        'symbol': annotations.symbol,
        'aux': annotations.aux_note,
        'position': annotations.sample,
    })
    
    return qrs_annotations, annotation_df, signals, fields

## Annotations

As per the PhysioNet website (<https://physionet.org/static/lightwave/doc/annotations.html>),
the following annotations are available (selected only the ones present in this
dataset):

### Beat Annotations

| Annotation | Description                       |
| ---------- | --------------------------------- |
| N          | Normal Beat                       |
| A          | Atrial Premature Beat             |
| V          | Premature Ventricular Contraction |
| Q          | Unclassified Beat                 |

### Non-Beat Annotations

| Annotation | Description        |
| ---------- | ------------------ |
| +          | Rhythm Change      |
| "          | Comment Annotation |


In [4]:
qrs_ann, ann, sig, fields = load_record("./data/ltafdb/100")

start_sample = 106000
end_sample = 107000
channel_sample = 0

sample_sig = sig[start_sample:end_sample, channel_sample]
sample_ann = ann.filter(pl.col('position') >= start_sample, pl.col('position') <= end_sample)
sample_ann = sample_ann.with_columns((pl.col('position') - start_sample) / fields['fs'])

fig = px.line(x=np.arange(len(sample_sig), dtype=np.float64) / fields['fs'], y=sample_sig, title="Sample signal")
fig.update_layout(xaxis_title="Time (s)", yaxis_title="ECG [mV]")

for symbol, position in sample_ann.group_by('symbol').agg(pl.col('position')).iter_rows():
    fig.add_trace(go.Scatter(x=position, y=np.repeat(2.0, len(position)), mode='markers+text', textposition='top center', text=np.repeat(symbol, len(position)), name=symbol))

for pos, sym in zip(qrs_ann.sample, qrs_ann.symbol):
    if pos < start_sample or pos > end_sample:
        continue

    fig.add_annotation(x=(pos - start_sample) / fields['fs'], y=3.0, text=f"QRS - {sym}", showarrow=True)

fig.show()

In [6]:
def filter_sample(sample: np.ndarray, low_cutoff: float = 0.5, high_cutoff: float = 50, fs: int = 128):
    nyq = 0.5 * fs
    low = low_cutoff / nyq
    high = high_cutoff / nyq

    b, a = butter(5, [low, high], btype='band')

    return filtfilt(b, a, sample)

sample_sig_filtered = filter_sample(sample_sig)

fig = px.line(x=np.arange(len(sample_sig_filtered), dtype=np.float64) / fields['fs'], y=sample_sig_filtered, title="Sample signal (Filtered)")
fig.update_layout(xaxis_title="Time (s)", yaxis_title="ECG [mV]")

for symbol, position in sample_ann.group_by('symbol').agg(pl.col('position')).iter_rows():
    fig.add_trace(go.Scatter(x=position, y=np.repeat(2.0, len(position)), mode='markers+text', textposition='top center', text=np.repeat(symbol, len(position)), name=symbol))

fig.show()

In [19]:
def extract_beats(signal, ann, fs, margin: int = 48) -> pl.DataFrame:
    filtered_signal = filter_sample(signal[:, 0], fs=fs)
    target_peaks = ann.filter(pl.col('symbol').is_in(('A', 'V', 'N')))

    beats = []

    for row in tqdm(target_peaks.iter_rows(named=True), total=len(target_peaks), desc="Extracting Beats", unit="Beat", leave=False):
        signal_sample = filtered_signal[row['position'] - margin : row['position'] + margin + 1]
        symbol = row['symbol']
        is_arrhythmia = symbol in ('A', 'V')

        beats.append({
            'signal': signal_sample.tolist(),
            'symbol': symbol,
            'is_arrhythmia': is_arrhythmia,
        })
    
    return pl.from_dicts(beats)

sample_beats = extract_beats(sig, ann, fields['fs'])

Extracting Beats:   0%|          | 0/91427 [00:00<?, ?Beat/s]

In [60]:
def plot_beat(beat, name: str = "UNK"):
    fig = px.line(x=np.arange(len(beat['signal']), dtype=np.float64) / fields['fs'], y=beat['signal'], title=f"Sample Beat")
    fig.update_layout(xaxis_title="Time (s)", yaxis_title="ECG [mV]")
    fig.show()

plot_beat(sample_beats.sample(1).row(0, named=True))

In [61]:
all_beats = [
    extract_beats(sig, ann, fields['fs'])
    for _, ann, sig, fields in (
        load_record(record)
        for record in tqdm(list_records("./data/ltafdb"), desc="Loading Records", unit="Records")
    )
]

Loading Records:   0%|          | 0/84 [00:00<?, ?Records/s]

Extracting Beats:   0%|          | 0/106247 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/90546 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/80957 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/112922 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/104663 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/108326 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/106512 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/108619 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/98165 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/114383 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/91427 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/151656 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/84154 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/68998 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/102596 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/105293 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/102510 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/116161 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/112269 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/112776 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/108969 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/111760 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/100072 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/125152 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/98754 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/117068 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/110439 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/77739 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/88707 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/85354 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/68969 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/123877 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/138059 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/141512 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/107898 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/107995 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/82617 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/86023 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/145623 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/73839 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/136120 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/116500 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/121410 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/131259 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/120742 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/81600 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/124758 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/99954 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/95839 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/109964 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/68937 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/31161 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/97999 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/89195 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/135166 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/121154 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/88858 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/64643 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/81328 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/113929 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/114196 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/107029 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/147446 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/95592 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/146761 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/89158 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/79708 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/94241 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/47495 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/118198 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/106866 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/132130 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/108238 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/116815 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/123406 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/119778 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/96096 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/184797 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/141523 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/128335 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/125201 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/144198 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/91784 [00:00<?, ?Beat/s]

Extracting Beats:   0%|          | 0/126771 [00:00<?, ?Beat/s]

[shape: (106_247, 3)
 ┌─────────────────────────────────┬────────┬───────────────┐
 │ signal                          ┆ symbol ┆ is_arrhythmia │
 │ ---                             ┆ ---    ┆ ---           │
 │ list[f64]                       ┆ str    ┆ bool          │
 ╞═════════════════════════════════╪════════╪═══════════════╡
 │ [-0.815129, -0.805131, … -0.05… ┆ N      ┆ false         │
 │ [0.04759, 0.062487, … 0.120928… ┆ N      ┆ false         │
 │ [0.034213, 0.107824, … -0.0460… ┆ N      ┆ false         │
 │ [-0.132703, -0.098985, … 0.091… ┆ N      ┆ false         │
 │ [0.068137, 0.05442, … -0.05306… ┆ N      ┆ false         │
 │ …                               ┆ …      ┆ …             │
 │ [0.026392, 0.030242, … -0.0482… ┆ N      ┆ false         │
 │ [-0.03093, -0.037001, … 0.0340… ┆ N      ┆ false         │
 │ [-0.006478, -0.024214, … 0.039… ┆ N      ┆ false         │
 │ [0.039726, 0.005935, … 0.04328… ┆ N      ┆ false         │
 │ [0.048721, 0.054681, … 0.02301… ┆ N      ┆ fal

In [63]:
pl.concat(all_beats, how='vertical').write_parquet("./data/beats.pqt.zst", compression='zstd', compression_level=5)