In [1]:
 # ============================================
# Notebook 04: Feature Extraction (ALL RECORDS)
# ============================================
import wfdb
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt, find_peaks

# 1. Define Filter Function
def bandpass_filter(signal, fs=360):
    nyq = 0.5 * fs
    b, a = butter(2, [0.5/nyq, 40/nyq], btype='band')
    return filtfilt(b, a, signal)

# 2. Get list of ALL records
# (We exclude records that don't use MLII lead sometimes, but for now let's try all)
all_records = wfdb.get_record_list('mitdb')
print(f"Found {len(all_records)} records.")

all_features = []

# 3. THE BIG LOOP
for record_id in all_records:
    try:
        print(f"Processing Record {record_id}...", end="\r")
        
        # Load Data
        record = wfdb.rdrecord(record_id, pn_dir='mitdb')
        annotation = wfdb.rdann(record_id, 'atr', pn_dir='mitdb')
        
        # Check if MLII lead exists
        if 'MLII' in record.sig_name:
            lead_idx = record.sig_name.index('MLII')
        else:
            # Fallback to first lead if MLII is missing (rare)
            lead_idx = 0
            
        raw_signal = record.p_signal[:, lead_idx]
        
        # Preprocess
        clean_signal = bandpass_filter(raw_signal)
        
        # Get Real R-peaks from Annotations (Ground Truth)
        r_peaks = annotation.sample
        
        # Loop through beats
        for i in range(1, len(r_peaks) - 1):
            r_curr = r_peaks[i]
            r_prev = r_peaks[i-1]
            r_next = r_peaks[i+1]
            
            # RR Intervals
            rr_prev = (r_curr - r_prev) / 360.0
            rr_next = (r_next - r_curr) / 360.0
            
            # Skip weird beats (artifacts)
            if rr_prev < 0.2 or rr_prev > 2.0: continue
                
            # Features
            features = {
                "record_id": record_id,
                "RR_prev(s)": rr_prev,
                "RR_next(s)": rr_next,
                "QRS_duration(s)": 0.08, # Placeholder or calc width
                "R_amp": clean_signal[r_curr],
                "Heart_Rate(bpm)": 60/rr_prev
            }
            # Simple QRS Min/Max
            window = 18 # +/- 50ms approx
            start = max(0, r_curr - window)
            end = min(len(clean_signal), r_curr + window)
            segment = clean_signal[start:end]
            features["QRS_max"] = np.max(segment)
            features["QRS_min"] = np.min(segment)
            
            all_features.append(features)
            
    except Exception as e:
        print(f"\nError in {record_id}: {e}")

# 4. Save BIG CSV
df = pd.DataFrame(all_features)
df.to_csv("combined_ecg_features.csv", index=False)
print(f"\n\n✅ DONE! Saved {len(df)} beats from all patients.")

Found 48 records.
Processing Record 234...

✅ DONE! Saved 111862 beats from all patients.
