In [1]:
import wfdb
import numpy as np
import pandas as pd

In [2]:
#First 5 hours

subset_ranges = {
    1: {'ecg': (0, int(4.5e6)), 'resp': (int(0), int(9e6))},
    2: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    3: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    4: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    5: {'ecg': (int(0), int(4.5e6)), 'resp': (int(0), int(0.9e6))},
    6: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    7: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    8: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    9: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))},
    10: {'ecg': (int(0), int(9e6)), 'resp': (int(0), int(0.9e6))}
}

In [3]:
def extract_heart_rate(annotation, start, end):
    # Get relevant portion of R-peaks
    r_peaks = annotation.sample[(annotation.sample >= start) & (annotation.sample <= end)]
    rr_intervals = np.diff(r_peaks) / annotation.fs  # Convert sample differences to seconds using sampling rate from annotation
    heart_rate = 60 / rr_intervals  # HR in beats per minute (bpm)
    return r_peaks[1:], heart_rate

def extract_respiration_rate(annotation, start, end):
    # Get relevant portion of respiration peaks
    resp_peaks = annotation.sample[(annotation.sample >= start) & (annotation.sample <= end)]
    resp_intervals = np.diff(resp_peaks) / annotation.fs  # Convert sample differences to seconds using sampling rate from annotation
    respiration_rate = 60 / resp_intervals  # RR in breaths per minute
    return resp_peaks[1:], respiration_rate


def process_infant_subset(infant_number, subset_ranges):
    # Get the specific ranges for this infant
    ranges = subset_ranges[infant_number]
    ecg_range = ranges['ecg']
    resp_range = ranges['resp']

    # Load ECG signal and respiration signal
    ecg_record = wfdb.rdrecord(f'infant{infant_number}_ecg', sampfrom=ecg_range[0], sampto=ecg_range[1])
    resp_record = wfdb.rdrecord(f'infant{infant_number}_resp', sampfrom=resp_range[0], sampto=resp_range[1])

    # Load R-peak annotations to derive HR
    r_peak_annotations = wfdb.rdann(f'infant{infant_number}_ecg', 'qrsc')
    r_times, heart_rates = extract_heart_rate(r_peak_annotations, ecg_range[0], ecg_range[1])

    # Load respiration peak annotations to derive RR
    resp_peak_annotations = wfdb.rdann(f'infant{infant_number}_resp', 'resp')
    resp_times, respiration_rates = extract_respiration_rate(resp_peak_annotations, resp_range[0], resp_range[1])

    # Truncate or pad respiration peaks and rates to match heart rate length
    if len(resp_times) > len(heart_rates):
        resp_times = resp_times[:len(heart_rates)]
        respiration_rates = respiration_rates[:len(heart_rates)]
    else:
        median_resp_peak = np.median(resp_times) if len(resp_times) > 0 else 0
        resp_times = np.pad(resp_times, (0, len(heart_rates) - len(resp_times)), 'constant', constant_values=median_resp_peak)
        median_resp_rate = np.median(respiration_rates) if len(respiration_rates) > 0 else 0
        respiration_rates = np.pad(respiration_rates, (0, len(heart_rates) - len(respiration_rates)), 'constant', constant_values=median_resp_rate)

    # Create a DataFrame to store features
    data = pd.DataFrame({
        'time_r_peak': r_times / ecg_record.fs,
        'heart_rate': heart_rates,
        'respiration_peak_time': resp_times / resp_record.fs,
        'respiration_rate': respiration_rates,
    })

    return data





In [4]:
all_infants_subset_data = pd.DataFrame()

for i in range(1, 11):  # Assuming 10 infants numbered from 1 to 10
    infant_subset_data = process_infant_subset(i, subset_ranges)
    infant_subset_data['infant_number'] = i  # Add a column to identify the infant
    all_infants_subset_data = pd.concat([all_infants_subset_data, infant_subset_data], ignore_index=True)

# Save the unified subset dataset to CSV for future use
all_infants_subset_data.to_csv('all_infants_5h_subset_vital_signs.csv', index=False)
