In [1]:
import wfdb
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt
from IPython.display import display

# -------------------------------
# Load ECG record & annotations
# -------------------------------
record_id = "100"
record = wfdb.rdrecord(record_id, pn_dir="mitdb")
annotation = wfdb.rdann(record_id, "atr", pn_dir="mitdb")

fs = record.fs
signal = record.p_signal[:, 0]   # MLII lead
r_peaks = annotation.sample

# -------------------------------
# Bandpass filter (same as preprocessing)
# -------------------------------
def bandpass_filter(signal, lowcut=0.5, highcut=40, fs=360, order=2):
    nyq = 0.5 * fs
    b, a = butter(order,
                  [lowcut / nyq, highcut / nyq],
                  btype='band')
    return filtfilt(b, a, signal)

signal = bandpass_filter(signal, fs=fs)

# -------------------------------
# Feature Extraction
# -------------------------------
features = []

for i in range(1, len(r_peaks) - 1):
    r_prev = r_peaks[i - 1]
    r_curr = r_peaks[i]
    r_next = r_peaks[i + 1]

    # RR intervals (seconds)
    rr_prev = (r_curr - r_prev) / fs
    rr_next = (r_next - r_curr) / fs

    # Physiological RR filtering (MANDATORY)
    if rr_prev < 0.3 or rr_prev > 2.0:
        continue

    # Heart Rate
    heart_rate = 60 / rr_prev

    # QRS window ±50 ms (for amplitude only)
    qrs_window = int(0.05 * fs)
    start = max(r_curr - qrs_window, 0)
    end = min(r_curr + qrs_window, len(signal))
    qrs_segment = signal[start:end]

    features.append({
        "beat_index": i,
        "RR_prev(s)": rr_prev,
        "RR_next(s)": rr_next,
        "Heart_Rate(bpm)": heart_rate,
        "R_amp": signal[r_curr],
        "QRS_max": np.max(qrs_segment),
        "QRS_min": np.min(qrs_segment)
    })

# -------------------------------
# Create DataFrame
# -------------------------------
features_df = pd.DataFrame(features).round(4)

# -------------------------------
# Clean Table Preview (NO COLOURS)
# -------------------------------
display(features_df.head(15))

# -------------------------------
# Dataset Info
# -------------------------------
print("\nDataset Info:")
print(features_df.info())

# -------------------------------
# Save CLEAN CSV
# -------------------------------
csv_name = f"ecg_features_record_{record_id}_clean.csv"
features_df.to_csv(csv_name, index=False)

print(f"\n✅ Clean feature CSV saved as '{csv_name}'")


Unnamed: 0,beat_index,RR_prev(s),RR_next(s),Heart_Rate(bpm),R_amp,QRS_max,QRS_min
0,2,0.8139,0.8111,73.7201,1.1957,1.1957,-0.2026
1,3,0.8111,0.7889,73.9726,1.134,1.1435,-0.222
2,4,0.7889,0.7917,76.0563,1.0618,1.0714,-0.2845
3,5,0.7917,0.7889,75.7895,1.0214,1.0214,-0.2153
4,6,0.7889,0.8167,76.0563,1.0741,1.0741,-0.2119
5,7,0.8167,0.6528,73.4694,1.1528,1.1528,-0.2214
6,8,0.6528,0.9944,91.9149,1.0995,1.0995,-0.2393
7,9,0.9944,0.8444,60.3352,1.1377,1.1484,-0.205
8,10,0.8444,0.8111,71.0526,1.1139,1.1139,-0.2298
9,11,0.8111,0.7889,73.9726,1.0842,1.0869,-0.2288



Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2271 entries, 0 to 2270
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   beat_index       2271 non-null   int64  
 1   RR_prev(s)       2271 non-null   float64
 2   RR_next(s)       2271 non-null   float64
 3   Heart_Rate(bpm)  2271 non-null   float64
 4   R_amp            2271 non-null   float64
 5   QRS_max          2271 non-null   float64
 6   QRS_min          2271 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 124.3 KB
None

✅ Clean feature CSV saved as 'ecg_features_record_100_clean.csv'
