In [1]:
# Import Modules
import os
import pandas as pd
import numpy as np
from scipy.io import loadmat, savemat
import matplotlib.pyplot as plt
import neurokit2 as nk
from scipy.signal import find_peaks
import zlib

In [2]:
data_dir = r'S:\Sedation_monitoring\PedAccel_directory\PedAccel\data_analysis\ECG_analysis\PICU_ECG_Extract'
original_sampling_rate = 240
lead_time = 10
window_size = 15

In [11]:
def calculate_sd1_sd2(rr_intervals):
    diff_rr = np.diff(rr_intervals)
    sd1 = np.sqrt(np.var(diff_rr) / 2)
    sd2 = np.sqrt(2 * np.var(rr_intervals) - np.var(diff_rr) / 2)
    return sd1, sd2

def calculate_ecg_features(ecg_signal, sampling_rate=240):
    # Ensure ecg_signal is a 1D array
    ecg_signal = np.ravel(ecg_signal)
    
    # Find R-peaks
    peaks, _ = find_peaks(ecg_signal, distance=sampling_rate//2)  # Assume at least 0.5s between peaks
    
    # Calculate RR intervals
    rr_intervals = np.diff(peaks) / sampling_rate
    
    # Calculate heart rate
    heart_rates = 60 / rr_intervals
    
    mean_hr = np.mean(heart_rates)
    sd_nn = np.std(rr_intervals)
    rmssd = np.sqrt(np.mean(np.diff(rr_intervals)**2))
    sd_hr = np.std(heart_rates)
    
    # Calculate SD1 and SD2
    sd1, sd2 = calculate_sd1_sd2(rr_intervals)
    
    return mean_hr, sd_nn, rmssd, sd_hr, sd1, sd2

In [4]:
def load_from_excel(file_path):
    # Implement this function to load data from Excel
    # Return the data and column names
    data = pd.read_excel(file_path)
    return data, data.columns.tolist()

In [12]:
for patient in os.listdir(data_dir):
    print(f"Processing {patient}")
    patient_dir = os.path.join(data_dir, patient)
    print(patient_dir)

    # sbs_file = os.path.join(patient_dir, f'{patient}_Retro_SBS_Scores.xlsx')
    # if not os.path.isfile(sbs_file):
    #     raise FileNotFoundError(f'EPIC file not found: {sbs_file}')
    
    # print(sbs_file)
    
    # epic_data, epic_names = load_from_excel(sbs_file)

    # # Statement to load Retrospective SBS Scores
    # epic_data = epic_data[(epic_data['Default'] != 'Y') & (epic_data['SBS'] != '')]

    # epic_data.dropna(subset=['SBS'], inplace=True)
    # epic_data['dts'] = pd.to_datetime(epic_data['Time_uniform'], format='mixed')
    # epic_data['start_time'] = epic_data['dts'] - pd.Timedelta(lead_time, 'minutes')
    # epic_data['end_time'] = epic_data['dts'] + pd.Timedelta(window_size - lead_time, 'minutes')
    # print(len(epic_data))


    ecg_file = os.path.join(patient_dir, f'{patient}_10MIN_5MIN_ECG_SBSFinal.mat')
    ecg_data = loadmat(ecg_file)

    print(ecg_data.keys())

    ecg_data['sbs_score'] = ecg_data['sbs_score'].flatten()
    ecg_data['ecg1'] = ecg_data['ecg1'].flatten()
    ecg_data['ecg2'] = ecg_data['ecg2'].flatten()
    ecg_data['ecg3'] = ecg_data['ecg3'].flatten()

    print(ecg_data['sbs_score'].shape)
    print(ecg_data['ecg1'].shape)
    print(ecg_data['ecg2'].shape)
    print(ecg_data['ecg3'].shape)

    features = []
for ecg_column in ['ecg1', 'ecg2', 'ecg3']:
    mean_hr, sd_nn, rmssd, sd_hr, sd1, sd2 = calculate_ecg_features(ecg_data[ecg_column])
    features.append({
        f'{ecg_column}_mean_hr': mean_hr,
        f'{ecg_column}_sd_nn': sd_nn,
        f'{ecg_column}_rmssd': rmssd,
        f'{ecg_column}_sd_hr': sd_hr,
        f'{ecg_column}_sd1': sd1,
        f'{ecg_column}_sd2': sd2
    })

    # Create a DataFrame for the calculated metrics
    df_features = pd.DataFrame(features)

    # Add the SBS score to the features DataFrame
    df_features['sbs_score'] = ecg_data['sbs_score']

    # Display the first few rows and column names of the features DataFrame
    print(df_features.head())
    print("\nColumn names:")
    print(df_features.columns)


Processing Patient4
S:\Sedation_monitoring\PedAccel_directory\PedAccel\data_analysis\ECG_analysis\PICU_ECG_Extract\Patient4
dict_keys(['__header__', '__version__', '__globals__', 'sbs_score', 'start_time', 'end_time', 'ecg1', 'ecg2', 'ecg3'])
(115,)
(115,)
(115,)
(115,)


ValueError: setting an array element with a sequence.