In [10]:
import wfdb
import pandas as pd
import os
import re

In [11]:
def aggregate_wf_files(subject_dir, subject_id, output_dir):  
    """  
    Aggregate data from WF files corresponding to a subject ID into separate CSV files for each WF file name,  
    and combine all PPG values into a single file named after the .hea file, only if PPG data is available.  

    Args:  
        subject_dir (str): Path to the directory containing all files for the subject ID.  
        subject_id (str): Subject ID.  
        output_dir (str): Output directory to save the CSV files.  
    """  
    
    ppg_data_found = False  

    try:   
        for file_name in os.listdir(subject_dir):  
            if file_name.endswith('.hea') and file_name.startswith(subject_id) and not file_name[-5] == 'n':  
                hea_path = os.path.join(subject_dir, file_name)  
 
                with open(hea_path, 'r') as hea_file:  
                    lines = hea_file.readlines()  
  
                available_files = {os.path.splitext(f)[0] for f in os.listdir(subject_dir)}  

                combined_ppg_data = []  

                for line in lines:  
                    matches = re.findall(r'\b\d{7}_\d{4}\b', line)  
                    for match in matches:  
                        if match in available_files:  
                            wf_path = os.path.join(subject_dir, match)  
                            try:  
                                record = wfdb.rdrecord(wf_path)  
 
                                if 'PLETH' in record.sig_name:  
                                    # Extract PPG signal  
                                    ppg_data = record.p_signal[:, record.sig_name.index('PLETH')]  

                                    # Replace any invalid or missing values (e.g., NaN, empty strings) with 0  
                                    ppg_data_cleaned = [0 if pd.isna(x) else x for x in ppg_data]  

                                    combined_ppg_data.extend(ppg_data_cleaned)  
                                    ppg_data_found = True  

                            except Exception as e:  
                                print(f"Error processing {match}: {e}")  

                if ppg_data_found:  
                    if not os.path.exists(output_dir):  
                        os.makedirs(output_dir)

                    combined_file_name = os.path.splitext(file_name)[0] + ".csv"  
                    combined_csv_file = os.path.join(output_dir, combined_file_name)  

                    combined_df = pd.DataFrame(combined_ppg_data)  
 
                    combined_df.to_csv(combined_csv_file, index=False, header=False)  
                    print(f"All PPG data from {file_name[:-4]} has been combined and saved to {combined_csv_file}.")  

    except Exception as e:  
        print(f"Error processing subject directory: {e}")  

In [12]:
# List of subject IDs  
subject_ids = ['p000608', 'p000776', 'p000946', 'p004490', 'p004829',  
               'p009526', 'p010391', 'p013072', 'p013136', 'p014079',  
               'p015852', 'p016684', 'p017344', 'p019608', 'p022954',  
               'p023824', 'p025117', 'p026377', 'p026964', 'p029512',  
               'p043613', 'p050089', 'p050384', 'p055204', 'p058932',  
               'p062160', 'p063039', 'p063628', 'p068956', 'p069339',  
               'p075371', 'p075796', 'p077729', 'p079998', 'p081349',  
               'p085866', 'p087275', 'p087675', 'p089565', 'p089964',  
               'p092289', 'p092846', 'p094847', 'p097547', 'p099674']  

base_subject_dir = 'I:/mimic_dataset/wfdb_dataset_125hz'  
base_output_dir = 'I:/mimic_dataset/csv_dataset_125hz'  

for subject_id in subject_ids:  
    subject_directory = os.path.join(base_subject_dir, subject_id)  
    output_directory = os.path.join(base_output_dir, subject_id)  

    aggregate_wf_files(subject_directory, subject_id, output_directory)

All PPG data from p000946-2120-05-14-08-08 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p000946\p000946-2120-05-14-08-08.csv.
All PPG data from p004829-2103-08-30-21-52 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p004829\p004829-2103-08-30-21-52.csv.
All PPG data from p013072-2194-01-22-16-13 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p013072\p013072-2194-01-22-16-13.csv.
All PPG data from p015852-2148-05-03-18-39 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p015852\p015852-2148-05-03-18-39.csv.
All PPG data from p016684-2188-01-29-00-06 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p016684\p016684-2188-01-29-00-06.csv.
All PPG data from p019608-2125-02-05-04-57 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p019608\p019608-2125-02-05-04-57.csv.
All PPG data from p022954-2136-02-29-17-52 has been combined and saved to I:/mimic_dataset/csv_dataset_125hz\p022954\p