In [37]:
import requests

BASE_URL = 'https://physionet.org/files/music-sudden-cardiac-death/1.0.0/High-resolution_ECG/'


def download_physionet_files(record_id):
    files = [f'{record_id}.dat', f'{record_id}.hea']

    for file in files:
        response = requests.get(BASE_URL + file)
        if response.status_code == 200:
            with open(file, 'wb') as f:
                f.write(response.content)
        else:
            print(f'Failed to download the file {file}')
               

In [38]:
import os
import wfdb
import numpy as np
import neurokit2 as nk
from scipy import signal


def extract_ecg_features(record_id):
    
    # First download the files
    download_physionet_files(record_id)

    record = wfdb.rdrecord(record_id)
    signals = record.p_signal.T
    features = {}

    # Basic signal fetures
    for channel in range(len(signals)):
        signal_data = signals[channel]

        # 1. Heart Rate
        _, info = nk.ecg_peaks(signal_data, sampling_rate=record.fs)
        # Calculate average RR intervals (time between R-peaks)
        rr_intervals = np.diff(info['ECG_R_Peaks']) / record.fs  # Convert to seconds
        # Calculate heart rate: 60 seconds / average RR interval
        mean_hr = 60 / np.mean(rr_intervals)
        features[f'heart_rate_ch{channel}'] = mean_hr



    # Clean up downloaded files
    os.remove(f'{record_id}.dat')
    os.remove(f'{record_id}.hea')
    
    
    return features

In [46]:
import pandas as pd

def process_multiple_records(record_ids):
     all_features = []

     for record_id in record_ids:
        try:
            print(f'Processing record {record_id}')
            
            features = extract_ecg_features(record_id)
            features['record_id'] = record_id
            all_features.append(features)
            
        except Exception as e:
            print(f'Error processing record {record_id} : {e}')
                
     return pd.DataFrame(all_features)


In [47]:
def generate_record_ids():
    record_ids = []
    
    for i in range(1, 899):  # 1 to 898
        record_id = f"P{str(i).zfill(4)}_H"  # zfill pads with zeros, e.g., 0001
        record_ids.append(record_id)
    return record_ids

In [None]:
record_ids = generate_record_ids()[:20]  # TODO: Test with first 20 records

df = process_multiple_records(record_ids)
df.to_csv('ecg_features.csv', index=False, float_format='%.4f')

df_final = pd.read_csv('ecg_features.csv')
print("\nFinal DataFrame Shape:", df_final.shape)