In [2]:
import scipy.io
import pandas as pd
import os
import tqdm


In [7]:
def read_mat_file(mat_file_path):
    # Load .mat file using scipy
    mat_data = scipy.io.loadmat(mat_file_path)
    
    # Extract ECG data from the 'val' key, based on the sample code
    ecg_data = mat_data['val'].astype('float64')
    
    return ecg_data

def read_hea_file(hea_file_path):
    with open(hea_file_path, 'r') as f:
        header_info = f.readlines()
    
    return header_info

def write_to_csv(ecg_data, csv_file_path):
    # Convert the ECG data to a DataFrame
    df = pd.DataFrame(ecg_data.T, columns=[f"Lead_{i+1}" for i in range(ecg_data.shape[0])])
    
    # Write DataFrame to CSV file
    df.to_csv(csv_file_path, index=False)

def main(read_dataset_path, write_dataset_path):
    # Create the directory for saving .csv files if it doesn't exist
    os.makedirs(write_dataset_path, exist_ok=True)
    
    # List all header files
    header_files = [os.path.join(read_dataset_path, f) for f in os.listdir(read_dataset_path) if f.lower().endswith('.hea')]
    
    for header_file in tqdm.tqdm(header_files):        # Extract the number from the filename
        file_number = int(os.path.basename(header_file)[1:-4])  # This will extract numbers from filenames like 'E08342.hea'
        
        # Check if the file number is greater than 08342
        if file_number != 8342:
            continue
        
        # Load header and corresponding mat file
        mat_file = header_file.replace('.hea', '.mat')
        
        file_basename = os.path.basename(header_file).replace('.hea', '')
        csv_file = os.path.join(write_dataset_path, f"{file_basename}.csv")
        
        ecg_data = read_mat_file(mat_file)
        header_info = read_hea_file(header_file)
        
        print(f"Header Information for {header_file}:", header_info)
        
        write_to_csv(ecg_data, csv_file)

read_dataset_path = "NFT/ecg/WFDB"
write_dataset_path = "NFT/ecg/csv_files"

main(read_dataset_path, write_dataset_path)


  0%|          | 0/4 [00:00<?, ?it/s]

header_file=/home/noam.koren/multiTS/NFT/data/ecg/original_files/E00001.hea, file_number=1
file_basename E00001
csv_file /home/noam.koren/multiTS/NFT/data/ecg/original_files/E00001.csv
Header Information for /home/noam.koren/multiTS/NFT/data/ecg/original_files/E00001.hea: ['E00001.mat 12 500 5000 05-May-2020 14:50:55\n', 'E00001.mat 16+24 4880/mV 16 0 136 -28477 0 I\n', 'E00001.mat 16+24 4880/mV 16 0 87 545 0 II\n', 'E00001.mat 16+24 4880/mV 16 0 -48 29413 0 III\n', 'E00001.mat 16+24 4880/mV 16 0 -112 -18879 0 aVR\n', 'E00001.mat 16+24 4880/mV 16 0 92 -29015 0 aVL\n', 'E00001.mat 16+24 4880/mV 16 0 19 -17384 0 aVF\n', 'E00001.mat 16+24 4880/mV 16 0 -39 10780 0 V1\n', 'E00001.mat 16+24 4880/mV 16 0 58 -22686 0 V2\n', 'E00001.mat 16+24 4880/mV 16 0 87 -24025 0 V3\n', 'E00001.mat 16+24 4880/mV 16 0 97 -26617 0 V4\n', 'E00001.mat 16+24 4880/mV 16 0 87 21518 0 V5\n', 'E00001.mat 16+24 4880/mV 16 0 78 25805 0 V6\n', '#Age: NaN\n', '#Sex: Female\n', '#Dx: 426783006\n', '#Rx: Unknown\n', '#Hx:

 50%|█████     | 2/4 [00:00<00:00,  2.29it/s]

header_file=/home/noam.koren/multiTS/NFT/data/ecg/original_files/E00002.hea, file_number=2
file_basename E00002
csv_file /home/noam.koren/multiTS/NFT/data/ecg/original_files/E00002.csv
Header Information for /home/noam.koren/multiTS/NFT/data/ecg/original_files/E00002.hea: ['E00002.mat 12 500 5000 05-May-2020 14:50:55\n', 'E00002.mat 16+24 4880/mV 16 0 -29 22732 0 I\n', 'E00002.mat 16+24 4880/mV 16 0 -48 14193 0 II\n', 'E00002.mat 16+24 4880/mV 16 0 -19 -8085 0 III\n', 'E00002.mat 16+24 4880/mV 16 0 39 -18578 0 aVR\n', 'E00002.mat 16+24 4880/mV 16 0 -4 15736 0 aVL\n', 'E00002.mat 16+24 4880/mV 16 0 -34 3242 0 aVF\n', 'E00002.mat 16+24 4880/mV 16 0 9 7180 0 V1\n', 'E00002.mat 16+24 4880/mV 16 0 0 -28031 0 V2\n', 'E00002.mat 16+24 4880/mV 16 0 -19 27957 0 V3\n', 'E00002.mat 16+24 4880/mV 16 0 -29 27216 0 V4\n', 'E00002.mat 16+24 4880/mV 16 0 -29 -30966 0 V5\n', 'E00002.mat 16+24 4880/mV 16 0 -39 -5123 0 V6\n', '#Age: NaN\n', '#Sex: Female\n', '#Dx: 426783006\n', '#Rx: Unknown\n', '#Hx: Un

100%|██████████| 4/4 [00:01<00:00,  3.63it/s]

header_file=/home/noam.koren/multiTS/NFT/data/ecg/original_files/E00004.hea, file_number=4
file_basename E00004
csv_file /home/noam.koren/multiTS/NFT/data/ecg/original_files/E00004.csv
Header Information for /home/noam.koren/multiTS/NFT/data/ecg/original_files/E00004.hea: ['E00004.mat 12 500 5000 05-May-2020 14:50:55\n', 'E00004.mat 16+24 4880/mV 16 0 19 -15417 0 I\n', 'E00004.mat 16+24 4880/mV 16 0 -29 -1024 0 II\n', 'E00004.mat 16+24 4880/mV 16 0 -48 14870 0 III\n', 'E00004.mat 16+24 4880/mV 16 0 4 7928 0 aVR\n', 'E00004.mat 16+24 4880/mV 16 0 34 17587 0 aVL\n', 'E00004.mat 16+24 4880/mV 16 0 -39 -25847 0 aVF\n', 'E00004.mat 16+24 4880/mV 16 0 9 -30941 0 V1\n', 'E00004.mat 16+24 4880/mV 16 0 19 -10611 0 V2\n', 'E00004.mat 16+24 4880/mV 16 0 39 -8878 0 V3\n', 'E00004.mat 16+24 4880/mV 16 0 -9 10926 0 V4\n', 'E00004.mat 16+24 4880/mV 16 0 0 4649 0 V5\n', 'E00004.mat 16+24 4880/mV 16 0 39 -28293 0 V6\n', '#Age: 75\n', '#Sex: Male\n', '#Dx: 426177001,425623009,164934002,164873001\n', '#R




In [4]:
def extract_sex_from_hea_files(hea_folder_path):
    person_to_sex = {}
    
    # List all header files
    header_files = [os.path.join(hea_folder_path, f) for f in os.listdir(hea_folder_path) if f.lower().endswith('.hea')]
    
    for header_file in header_files:
        # Extract person identifier from filename
        person_id = os.path.basename(header_file).replace('.hea', '')
        
        with open(header_file, 'r') as f:
            lines = f.readlines()
            
            for line in lines:
                if line.startswith('#Sex:'):
                    sex = line.split(': ')[1].strip()
                    person_to_sex[person_id] = sex
    
    return person_to_sex

hea_folder_path = '/path/to/hea/files'

# person_to_sex = extract_sex_from_hea_files(hea_folder_path)

In [11]:

# Specify the directory path
dir_path = "NFT/ecg/csv_files"

# List all files in the directory
files_in_dir = os.listdir(dir_path)

# Count the number of files
num_files = len(files_in_dir)

print(f"There are {num_files} files in the directory.")


There are 10344 files in the directory.
