In [None]:
import wfdb
import pandas as pd
import os
import re

In [None]:
def aggregate_wf_files(subject_dir, subject_id, output_dir):
    """
    Aggregate data from WF files corresponding to a subject ID into separate CSV files for each WF file name,
    and combine all PPG values into a single file named after the .hea file.

    Args:
        subject_dir (str): Path to the directory containing all files for the subject ID.
        subject_id (str): Subject ID.
        output_dir (str): Output directory to save the CSV files.
    """
    try:
        # Iterate through all files in the subject directory
        for file_name in os.listdir(subject_dir):
            # Check if the file is a .hea file and starts with the subject_id
            if file_name.endswith('.hea') and file_name.startswith(subject_id) and not file_name[-5] == 'n':
                # Full path of the .hea file
                hea_path = os.path.join(subject_dir, file_name)

                # Read the content of the .hea file
                with open(hea_path, 'r') as hea_file:
                    lines = hea_file.readlines()

                # Get the list of available files in the directory (without extensions)
                available_files = {os.path.splitext(f)[0] for f in os.listdir(subject_dir)}

                # Prepare a list to store combined PPG data for this .hea file
                combined_ppg_data = []

                # Process each line in the .hea file
                for line in lines:
                    # Find all valid file names in the line
                    matches = re.findall(r'\b\d{7}_\d{4}\b', line)
                    for match in matches:
                        if match in available_files:
                            wf_path = os.path.join(subject_dir, match)
                            try:
                                # Read the corresponding .dat file
                                record = wfdb.rdrecord(wf_path)

                                # Extract PPG signal from the first channel
                                ppg_data = record.p_signal[:, 0]

                                # Append PPG data to the combined list
                                combined_ppg_data.extend(ppg_data)

                            except Exception as e:
                                print(f"Error processing {match}: {e}")

                # Save the combined PPG data into a single CSV file named after the .hea file
                combined_file_name = os.path.splitext(file_name)[0] + ".csv"
                combined_csv_file = os.path.join(output_dir, combined_file_name)
                combined_df = pd.DataFrame(combined_ppg_data)
                combined_df.to_csv(combined_csv_file, index = False, header = False)
                print(f"All PPG data from {file_name[:-4]} has been combined and saved to {combined_csv_file}.")

    except Exception as e:
        print(f"Error processing subject directory: {e}")

In [None]:
# List of subject IDs
subject_ids = ['p000608', 'p000776', 'p000946', 'p004490', 'p004829',
               'p009526', 'p010391', 'p013072', 'p013136', 'p014079',
               'p015852', 'p016684', 'p017344', 'p019608', 'p022954',
               'p023824', 'p025117', 'p026377', 'p026964', 'p029512',
               'p043613', 'p050089', 'p050384', 'p055204', 'p058932',
               'p062160', 'p063039', 'p063628', 'p068956', 'p069339',
               'p075371', 'p075796', 'p077729', 'p079998', 'p081349',
               'p085866', 'p087275', 'p087675', 'p089565', 'p089964',
               'p092289', 'p092846', 'p094847', 'p097547', 'p099674',]

base_subject_dir = 'I:/mimic_dataset/wfdb_dataset_125hz'
base_output_dir = 'I:/mimic_dataset/csv_dataset_125hz'

for subject_id in subject_ids:
    subject_directory = os.path.join(base_subject_dir, subject_id)
    output_directory = os.path.join(base_output_dir, subject_id)

    os.makedirs(output_directory, exist_ok = True)

    aggregate_wf_files(subject_directory, subject_id, output_directory)