 Configuration & Setup

In [1]:
import os
import numpy as np
import wfdb

# --- Configuration ---
DATA_DIR = "../Data"
SAVE_DIR = "Preprocessed_data"
os.makedirs(SAVE_DIR, exist_ok=True)

RECORDS = [
    '100', '101', '102', '103', '104', '105', '106', '107',
    '108', '109', '111', '112', '113', '114', '115', '116',
    '117', '118', '119', '121', '122', '123', '124', '200',
    '201', '202', '203', '205', '207', '208', '209', '210',
    '212', '213', '214', '215', '217', '219', '220', '221',
    '222', '223', '228', '230', '231', '232', '233', '234'
]

BEAT_WINDOW = 90  # samples per beat (centered around R-peak)


Loading Records & Extracting R-peaks

In [2]:
# Storage for beats and labels
beats = []
labels = []

for rec in RECORDS:
    record_path = os.path.join(DATA_DIR, rec)

    # Load ECG signal and annotation
    sig = wfdb.rdrecord(record_path)
    ann = wfdb.rdann(record_path, 'atr')

    # Use the first channel (MLII)
    ecg = sig.p_signal[:, 0]
    r_peaks = ann.sample  # R-peak positions
    symbols = ann.symbol  # Beat types

    # Loop over each R-peak and extract a centered window
    for idx, r in enumerate(r_peaks):
        start = r - BEAT_WINDOW // 2
        end = r + BEAT_WINDOW // 2

        # Skip if the window goes out of bounds
        if start < 0 or end > len(ecg):
            continue

        beats.append(ecg[start:end])
        labels.append(symbols[idx])


Mapping Beat Types to AAMI Classes

In [3]:
# --- AAMI EC57 beat mapping ---
aami_map = {
    # N class
    'N': 'N', 'L': 'N', 'R': 'N', 'e': 'N', 'j': 'N',
    # S class
    'A': 'S', 'a': 'S', 'J': 'S', 'S': 'S',
    # V class
    'V': 'V', 'E': 'V',
    # F class
    'F': 'F',
    # Q class
    '/': 'Q', 'f': 'Q', 'Q': 'Q'
}

# Apply mapping & filter unknown types
mapped_beats = []
mapped_labels = []
for beat, sym in zip(beats, labels):
    if sym in aami_map:  # keep only mapped symbols
        mapped_beats.append(beat)
        mapped_labels.append(aami_map[sym])

beats = np.array(mapped_beats)
labels = np.array(mapped_labels)

print("Extracted beats shape:", beats.shape)
print("Unique labels after mapping:", np.unique(labels))


Extracted beats shape: (109487, 90)
Unique labels after mapping: ['F' 'N' 'Q' 'S' 'V']


Cleaning & Normalization

In [4]:
import numpy as np

# beats: shape (num_beats, beat_length)

beats_normalized = []
for beat in beats:
    min_val = np.min(beat)
    max_val = np.max(beat)
    range_val = max_val - min_val
    
    if range_val == 0:
        # flat signal - set everything to 0
        normalized = np.zeros_like(beat)
    else:
        normalized = (beat - min_val) / range_val
    
    beats_normalized.append(normalized)

beats = np.array(beats_normalized, dtype=np.float32)

# Add channel dimension for Conv1D: (num_beats, beat_length, 1)
beats = np.expand_dims(beats, axis=-1)

print(f"After normalization: min = {beats.min():.3f} , max = {beats.max():.3f}")
print(f"Final beats shape: {beats.shape}")


After normalization: min = 0.000 , max = 1.000
Final beats shape: (109487, 90, 1)


Final Model-Ready Data

save the beats 

In [5]:
# --- Save ONLY preprocessed data ---
np.savez(
    os.path.join(SAVE_DIR, "ecg_data.npz"),
    beats=beats,   
    labels=labels    
)

print(f"Preprocessed data saved to: {os.path.join(SAVE_DIR, 'ecg_data.npz')}")
print(f"Saved beats shape: {beats.shape}, labels shape: {labels.shape}")


Preprocessed data saved to: Preprocessed_data\ecg_data.npz
Saved beats shape: (109487, 90, 1), labels shape: (109487,)


check the saved data from the .npz file

In [6]:
import numpy as np

data = np.load("Preprocessed_data/ecg_data.npz")
print(data.files)  # Lists keys saved inside the file, should print ['beats', 'labels']

beats = data['beats']
labels = data['labels']

print("Beats shape:", beats.shape)
print("Labels shape:", labels.shape)
print("Unique labels:", np.unique(labels))
print("Sample beat data (first beat):", beats[0].flatten()[:10])  # print first 10 values of first beat
print("Sample label (first beat):", labels[0])


['beats', 'labels']
Beats shape: (109487, 90, 1)
Labels shape: (109487,)
Unique labels: ['F' 'N' 'Q' 'S' 'V']
Sample beat data (first beat): [0.17674063 0.16985463 0.15837796 0.15837796 0.15837796 0.16602908
 0.17291507 0.16602908 0.15837796 0.1469013 ]
Sample label (first beat): N
