In [11]:
import os
import wfdb
import csv

# Function to process a single directory
def process_folder(directory, base_dir, result_base_dir):
    # Create result directory
    result_dir = os.path.join(result_base_dir, directory)
    os.makedirs(result_dir, exist_ok=True)

    # Path to the directory containing records
    records_dir = os.path.join(base_dir, directory.replace('/', os.sep))  # Normalize path separator
    records_path = os.path.join(records_dir, 'RECORDS')

    # If RECORDS file exists, process the files listed
    if os.path.exists(records_path):
        # Read the list of file identifiers from the RECORDS file
        with open(records_path, 'r') as records_file:
            for line in records_file:
                file_id = line.strip()
                mat_path = os.path.join(base_dir, directory, file_id + '.mat').replace('/', os.sep)
                hea_path = os.path.join(base_dir, directory, file_id).replace('/', os.sep)

                # If .mat file exists, process the file
                if os.path.exists(mat_path):
                    process_file_data((mat_path, hea_path, result_dir, file_id))

    # Recursively process subdirectories
    for sub_dir in os.listdir(records_dir):
        sub_dir_path = os.path.join(records_dir, sub_dir)
        if os.path.isdir(sub_dir_path):
            process_folder(sub_dir, base_dir, result_base_dir)

# Function to process a single file
def process_file_data(data_args):
    mat_path, hea_path, result_dir, file_id = data_args
    print(mat_path)

    # Read the WFDB file using rdsamp function
    signals, meta = wfdb.rdsamp(hea_path)

    # Access metadata information
    sample_rate = meta['fs']
    signal_length = meta['sig_len']
    num_signals = meta['n_sig']
    signal_names = meta['sig_name']
    csv_path = os.path.join(result_dir, f"{file_id}.csv")
    with open(csv_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)

        # Write the header row to the CSV file
        header = ['time'] + signal_names
        writer.writerow(header)

        # Write the data to the CSV file
        for ind in range(signal_length):
            row = [ind / sample_rate] + [signals[ind][j] for j in range(num_signals)]
            writer.writerow(row)

    

In [14]:
# Define records directory
records_directory = './a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0'

# Read subfolder paths from the RECORDS file
with open(os.path.join(records_directory, 'RECORDS'), 'r') as first_records_file:
   subfolder_paths = first_records_file.read().splitlines()

# Iterate through subfolder paths and process each folder
for index, subfolder_path in enumerate(subfolder_paths):
   process_folder(subfolder_path, records_directory, 'output')
   print(subfolder_path)

./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00001.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00002.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00004.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00005.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00006.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00007.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00008.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00009.mat
./a-large-scale-12-lead-electrocardiogram-database-for-arrhythmia-study-1.0.0/WFDBRecords/01/010/JS00010.mat
./a-large-scale-12-