In [1]:
import wfdb
import numpy as np
import pandas as pd

In [2]:
#First 5 hours

subset_ranges = {
    1: {'ecg': (0, 4500000), 'resp': (0, 9000000)},
    2: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    3: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    4: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    5: {'ecg': (0, 4500000), 'resp': (0, 900000)},
    6: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    7: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    8: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    9: {'ecg': (0, 9000000), 'resp': (0, 900000)},
    10: {'ecg': (0, 9000000), 'resp': (0, 900000)}
}


In [3]:
def remove_outliers_iqr(data, column_name):
    """
    Removes outliers from a pandas DataFrame based on the IQR method.
    
    Parameters:
        data (pd.DataFrame): The input data containing the column to filter.
        column_name (str): The name of the column to remove outliers from.
    
    Returns:
        pd.DataFrame: A DataFrame with outliers removed.
    """
    Q1 = data[column_name].quantile(0.25)
    Q3 = data[column_name].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    filtered_data = data[(data[column_name] >= lower_bound) & (data[column_name] <= upper_bound)]
    return filtered_data

In [4]:
def extract_heart_rate(annotation, start, end):
    # Get relevant portion of R-peaks
    r_peaks = annotation.sample[(annotation.sample >= start) & (annotation.sample <= end)]
    rr_intervals = np.diff(r_peaks) / annotation.fs  # Convert sample differences to seconds using sampling rate from annotation
    heart_rate = 60 / rr_intervals  # HR in beats per minute (bpm)
    return r_peaks[1:], heart_rate

def extract_respiration_rate(annotation, start, end):
    # Get relevant portion of respiration peaks
    resp_peaks = annotation.sample[(annotation.sample >= start) & (annotation.sample <= end)]
    resp_intervals = np.diff(resp_peaks) / annotation.fs  # Convert sample differences to seconds using sampling rate from annotation
    respiration_rate = 60 / resp_intervals  # RR in breaths per minute
    return resp_peaks[1:], respiration_rate


def process_infant_subset(infant_number, subset_ranges):
    # Get the specific ranges for this infant
    ranges = subset_ranges[infant_number]
    ecg_range = ranges['ecg']
    resp_range = ranges['resp']

    # Load ECG signal and respiration signal
    ecg_record = wfdb.rdrecord(f'infant{infant_number}_ecg', sampfrom=ecg_range[0], sampto=ecg_range[1])
    resp_record = wfdb.rdrecord(f'infant{infant_number}_resp', sampfrom=resp_range[0], sampto=resp_range[1])

    # Load R-peak annotations to derive HR
    r_peak_annotations = wfdb.rdann(f'infant{infant_number}_ecg', 'qrsc')
    r_times, heart_rates = extract_heart_rate(r_peak_annotations, ecg_range[0], ecg_range[1])

    # Load respiration peak annotations to derive RR
    resp_peak_annotations = wfdb.rdann(f'infant{infant_number}_resp', 'resp')
    resp_times, respiration_rates = extract_respiration_rate(resp_peak_annotations, resp_range[0], resp_range[1])

    # Create DataFrame for Heart Rate
    heart_rate_df = pd.DataFrame({
        'time_r_peak': r_times / ecg_record.fs,
        'heart_rate': heart_rates,
    })
    heart_rate_df = remove_outliers_iqr(heart_rate_df, 'heart_rate')
    
    # Create DataFrame for Respiration Rate
    respiration_rate_df = pd.DataFrame({
        'respiration_peak_time': resp_times / resp_record.fs,
        'respiration_rate': respiration_rates,
    })
    respiration_rate_df = remove_outliers_iqr(respiration_rate_df, 'respiration_rate')

    return heart_rate_df, respiration_rate_df





In [5]:
all_heart_rate_data = pd.DataFrame()
all_respiration_rate_data = pd.DataFrame()

for i in range(1, 11):  # Assuming 10 infants numbered from 1 to 10
    heart_rate_df, respiration_rate_df = process_infant_subset(i, subset_ranges)
    heart_rate_df['infant_number'] = i  # Add a column to identify the infant
    respiration_rate_df['infant_number'] = i  # Add a column to identify the infant
    all_heart_rate_data = pd.concat([all_heart_rate_data, heart_rate_df], ignore_index=True)
    all_respiration_rate_data = pd.concat([all_respiration_rate_data, respiration_rate_df], ignore_index=True)

# Save the unified subset datasets to CSV for future use
#all_heart_rate_data.to_csv('all_infants_5h_subset_heart_rate_outlierRem.csv', index=False)
#all_respiration_rate_data.to_csv('all_infants_5h_subset_respiration_rate_outlierRem.csv', index=False)


In [8]:
file_path = 'all_infants_5h_subset_respiration_rate_outlierRem.csv'
data = pd.read_csv(file_path)

infant_2_data = data[data['infant_number']==2]
sampled_infant_2_data = infant_2_data.iloc[::223].reset_index(drop=True)
sampled_infant_2_data

#output_inf_2_file_path = 'infant_2_5h_respiration_rate_outlierRem.csv'
#infant_2_data.to_csv(output_inf_2_file_path, index=False)



Unnamed: 0,respiration_peak_time,respiration_rate,infant_number
0,2.22,54.545455,2
1,411.28,40.000000,2
2,653.52,50.847458,2
3,959.22,39.473684,2
4,1255.22,65.217391,2
...,...,...,...
71,16368.14,62.500000,2
72,16592.44,63.829787,2
73,16814.88,48.387097,2
74,17643.14,78.947368,2
