In [1]:
import wfdb
from wfdb import processing
import pandas as pd
import numpy as np
import re
import os
from datetime import timedelta

import plotly.graph_objs as go
import plotly.offline as pyo

# Use the signal for filters
from scipy.signal import butter, filtfilt, iirnotch


import logging
logging.basicConfig(level=logging.DEBUG)

In [2]:
# Directory to save processed hourly results
processed_results_dir = 'data'
os.makedirs(processed_results_dir, exist_ok=True)

In [3]:
# Initialize a DataFrame for a full 24-hour period, with 5-minute segments
total_duration_hours = 24
segment_duration = timedelta(minutes=5)
total_segments = (total_duration_hours * 60) // 5

# Create the full DataFrame with NaN values initially
hr_hrv_df = pd.DataFrame({
    "Start Time": [timedelta(minutes=5 * i) for i in range(total_segments)],
    "Mean HR (bpm)": [np.nan] * total_segments,
    "HRV (SDNN) (ms)": [np.nan] * total_segments
})

In [4]:
def apply_high_low_filter(signal, sampling_rate, utility_freq):
    # High-pass filter to remove baseline drift (cutoff frequency = 0.5 Hz)
    b_high, a_high = butter(1, 0.5 / (sampling_rate / 2), btype='high')
    filtered_signal = filtfilt(b_high, a_high, signal)

    # Low-pass filter to remove high-frequency noise (cutoff frequency = 50 Hz)
    b_low, a_low = butter(1, 50 / (250 / 2), btype='low')
    filtered_signal = filtfilt(b_low, a_low, filtered_signal)

    # Notch filter to remove utility frequency noise (e.g., 50 Hz or 60 Hz)
    Q = 30.0
    b_notch, a_notch = iirnotch(utility_freq, Q, sampling_rate)
    filtered_signal = filtfilt(b_notch, a_notch, filtered_signal)

    return filtered_signal

In [5]:
# Define function to calculate HR and HRV for each 5-minute segment
def calculate_hr_hrv(signal, sampling_rate, local_start_time, total_samples):
    # Initialize the XQRS detector and detect QRS complexes
    xqrs = processing.XQRS(sig=signal, fs=sampling_rate)
    xqrs.detect(learn=True, verbose=False)
    qrs_indices = np.array(xqrs.qrs_inds)  # Indices of detected R-peaks in sample points

    # Convert QRS indices to local time in seconds (relative to recording start)
    r_wave_times = qrs_indices / sampling_rate

    # Calculate the duration to the next full 5-minute boundary from the local start time
    first_segment_duration = timedelta(minutes=5) - timedelta(seconds=local_start_time.total_seconds() % 300)
    first_segment_duration_seconds = first_segment_duration.total_seconds()
    print("First Segment Duration (s):", first_segment_duration_seconds)

    # Calculate the total duration in seconds
    total_duration = total_samples / sampling_rate  # in seconds

    # Calculate remaining time for full 5-minute segments after the first segment
    remaining_time = total_duration - first_segment_duration_seconds
    num_segments = int(remaining_time // 300)  # Full 5-minute segments
    print(f"Remaining Time (s): {remaining_time}, Full 5-Minute Segments: {num_segments}")

    # Initialize lists to store HR and HRV values, and the start points for each segment
    hr_values = []
    hrv_values = []
    segment_start_indices = []  # Store start points in sample indices

    # Process the shortened first segment using first_segment_duration directly
    first_segment_indices = r_wave_times < first_segment_duration_seconds
    first_segment_rr_intervals = np.diff(r_wave_times[first_segment_indices]) * 1000  # Convert to milliseconds

    # Calculate HR and HRV for the first segment
    if len(first_segment_rr_intervals) > 0:
        mean_hr = int(round(processing.calc_mean_hr(first_segment_rr_intervals / 1000, rr_units="seconds")))
        hrv = int(round(np.std(first_segment_rr_intervals)))
    else:
        mean_hr = np.nan
        hrv = np.nan

    # Append results for the first segment and its start index in sample points
    hr_values.append(mean_hr)
    hrv_values.append(hrv)
    # First segment boundary should be the beginning of the recording in samples
    first_segment_start_index = 0
    segment_start_indices.append(first_segment_start_index)

    # Update current_time to the end of the first segment for subsequent segments
    current_time = first_segment_duration_seconds

    # Process remaining full 5-minute segments based on the relative recording time
    for _ in range(num_segments):
        # Calculate the segment start index in data points (samples)
        segment_start_index = int(current_time * sampling_rate)
        segment_start_indices.append(segment_start_index)
        
        # Define the end of the current 5-minute segment in local recording time
        segment_end = current_time + 300  # 5 minutes in seconds

        # Select R-wave times within the current 5-minute segment
        segment_indices = (r_wave_times >= current_time) & (r_wave_times < segment_end)
        selected_r_wave_times = r_wave_times[segment_indices]
        segment_rr_intervals = np.diff(selected_r_wave_times) * 1000  # Convert RR intervals to milliseconds

        # Calculate HR and HRV if RR intervals are available
        if len(segment_rr_intervals) > 0:
            mean_hr = int(round(processing.calc_mean_hr(segment_rr_intervals / 1000, rr_units="seconds")))
            hrv = int(round(np.std(segment_rr_intervals)))
        else:
            mean_hr = np.nan
            hrv = np.nan

        # Append the HR and HRV results for each segment
        hr_values.append(mean_hr)
        hrv_values.append(hrv)

        # Move to the next 5-minute segment
        current_time = segment_end

    # Return HR, HRV values, QRS indices, and segment start points in sample indices
    return hr_values, hrv_values, qrs_indices, segment_start_indices


In [6]:
# Load both the header (.hea) and the signal data (.mat) using wfdb
record = wfdb.rdrecord('0284_001_004_ECG')

# Get metadata of the signal
patient_number = record.record_name.split("_")[0]
sampling_rate = record.fs
signal_len = record.sig_len
utility_freq = int(record.comments[0].split(": ")[1])
start_time = pd.to_timedelta(record.comments[1].split(": ")[1])
end_time = pd.to_timedelta(record.comments[2].split(": ")[1])

# Access the signal data (numpy array)
ecg_signal_data = record.p_signal.flatten()

filtered_signal = apply_high_low_filter(ecg_signal_data, sampling_rate, utility_freq)
hr_values, hrv_values, qrs_indices, segment_start_points = calculate_hr_hrv(filtered_signal, sampling_rate, start_time, signal_len)

print(hr_values, hrv_values)

# Find the nearest 5-minute interval for `start_time`
start_index = hr_hrv_df[hr_hrv_df["Start Time"] <= start_time].last_valid_index()

# Populate `hr_hrv_df` starting from `start_index`
for i, (hr, hrv) in enumerate(zip(hr_values, hrv_values)):
    if start_index + i < len(hr_hrv_df):  # Ensure we don't go out of bounds
        hr_hrv_df.at[start_index + i, "Mean HR (bpm)"] = hr
        hr_hrv_df.at[start_index + i, "HRV (SDNN) (ms)"] = hrv

First Segment Duration (s): 157.0
Remaining Time (s): 3000.0, Full 5-Minute Segments: 10
[100, 103, 104, 96, 96, 100, 101, 91, 92, 94, 102] [210, 36, 1, 98, 147, 72, 76, 110, 106, 98, 32]


In [7]:
import plotly.graph_objs as go
import plotly.offline as pyo

# Create the Plotly plot for the ECG Signal
trace_signal = go.Scatter(
    y=filtered_signal,
    mode='lines',
    name='ECG Signal'
)

# Create the Plotly plot for the R-Peaks
r_peaks_y = [filtered_signal[i] for i in qrs_indices]  # Get the amplitudes of the R-peaks
trace_r_peaks = go.Scatter(
    x=qrs_indices,
    y=r_peaks_y,
    mode='markers',
    name='R-Peaks',
    marker=dict(color='red', size=8, symbol='x')  # Customize the appearance
)

# Create vertical line markers at the start of each 5-minute segment
segment_lines = [
    go.layout.Shape(
        type="line",
        x0=boundary,
        y0=min(filtered_signal),  # Start from minimum signal amplitude
        x1=boundary,
        y1=max(filtered_signal),  # End at maximum signal amplitude
        line=dict(color="blue", width=1, dash="dash"),
    ) for boundary in segment_start_points
]

# Define the layout with added shapes
layout = go.Layout(
    title='Scrollable ECG Signal with R-Peaks and 5-Minute Segment Lines',
    xaxis=dict(
        title='Time (samples)',
        rangeslider=dict(visible=True),  # Add a range slider to make it scrollable
    ),
    yaxis=dict(
        title='Amplitude',
    ),
    shapes=segment_lines,  # Use segment_lines instead of shapes
    width=1200,  # Set a wider plot
    height=500
)

# Create the figure and add both traces
fig = go.Figure(data=[trace_signal, trace_r_peaks], layout=layout)

# Show the plot in an interactive window
pyo.plot(fig, filename='scrollable_ecg_with_r_peaks_segments.html')


'scrollable_ecg_with_r_peaks_segments.html'

In [8]:
hr_hrv_df.to_csv('data/hr_hrv_df.csv', index=False)

In [9]:
print("Segment boundaries (in samples):", segment_start_points)

Segment boundaries (in samples): [0, 78500, 228500, 378500, 528500, 678500, 828500, 978500, 1128500, 1278500, 1428500]
