In [1]:
import scipy.io
import pandas as pd
import os
import tqdm


In [10]:
def read_mat_file(mat_file_path):
    # Load .mat file using scipy
    mat_data = scipy.io.loadmat(mat_file_path)
    
    # Extract ECG data from the 'val' key, based on the sample code
    ecg_data = mat_data['val'].astype('float64')
    
    return ecg_data

def read_hea_file(hea_file_path):
    with open(hea_file_path, 'r') as f:
        header_info = f.readlines()
    
    return header_info

def write_to_csv(ecg_data, csv_file_path):
    # Convert the ECG data to a DataFrame
    df = pd.DataFrame(ecg_data.T, columns=[f"Lead_{i+1}" for i in range(ecg_data.shape[0])])
    
    # Write DataFrame to CSV file
    df.to_csv(csv_file_path, index=False)

def main(read_dataset_path, write_dataset_path):
    # Create the directory for saving .csv files if it doesn't exist
    os.makedirs(write_dataset_path, exist_ok=True)
    
    # List all header files
    header_files = [os.path.join(read_dataset_path, f) for f in os.listdir(read_dataset_path) if f.lower().endswith('.hea')]
    
    for header_file in tqdm.tqdm(header_files):        # Extract the number from the filename
        file_number = int(os.path.basename(header_file)[1:-4])  # This will extract numbers from filenames like 'E08342.hea'
        
        # Check if the file number is greater than 08342
        if file_number != 8342:
            continue
        
        # Load header and corresponding mat file
        mat_file = header_file.replace('.hea', '.mat')
        
        file_basename = os.path.basename(header_file).replace('.hea', '')
        csv_file = os.path.join(write_dataset_path, f"{file_basename}.csv")
        
        ecg_data = read_mat_file(mat_file)
        header_info = read_hea_file(header_file)
        
        print(f"Header Information for {header_file}:", header_info)
        
        write_to_csv(ecg_data, csv_file)

read_dataset_path = "/home/noam.koren/multiTS/NFT/ecg/WFDB"
write_dataset_path = "/home/noam.koren/multiTS/NFT/ecg/csv_files"

main(read_dataset_path, write_dataset_path)


  0%|          | 0/10344 [00:00<?, ?it/s]

100%|██████████| 10344/10344 [00:00<00:00, 213641.46it/s]

Header Information for /home/noam.koren/multiTS/NFT/ecg/WFDB/E08342.hea: ['E08342.mat 12 500 5000 05-May-2020 09:49:17\n', 'E08342.mat 16+24 4880/mV 16 0 14 -8720 0 I\n', 'E08342.mat 16+24 4880/mV 16 0 434 18152 0 II\n', 'E08342.mat 16+24 4880/mV 16 0 419 26869 0 III\n', 'E08342.mat 16+24 4880/mV 16 0 -224 27879 0 aVR\n', 'E08342.mat 16+24 4880/mV 16 0 -202 14918 0 aVL\n', 'E08342.mat 16+24 4880/mV 16 0 427 22460 0 aVF\n', 'E08342.mat 16+24 4880/mV 16 0 -126 -19446 0 V1\n', 'E08342.mat 16+24 4880/mV 16 0 -151 -23710 0 V2\n', 'E08342.mat 16+24 4880/mV 16 0 -73 -581 0 V3\n', 'E08342.mat 16+24 4880/mV 16 0 -107 -2655 0 V4\n', 'E08342.mat 16+24 4880/mV 16 0 -117 15105 0 V5\n', 'E08342.mat 16+24 4880/mV 16 0 -136 21869 0 V6\n', '#Age: 45\n', '#Sex: Male\n', '#Dx: 713426002\n', '#Rx: Unknown\n', '#Hx: Unknown\n', '#Sx: Unknown\n']





In [4]:
def extract_sex_from_hea_files(hea_folder_path):
    person_to_sex = {}
    
    # List all header files
    header_files = [os.path.join(hea_folder_path, f) for f in os.listdir(hea_folder_path) if f.lower().endswith('.hea')]
    
    for header_file in header_files:
        # Extract person identifier from filename
        person_id = os.path.basename(header_file).replace('.hea', '')
        
        with open(header_file, 'r') as f:
            lines = f.readlines()
            
            for line in lines:
                if line.startswith('#Sex:'):
                    sex = line.split(': ')[1].strip()
                    person_to_sex[person_id] = sex
    
    return person_to_sex

hea_folder_path = '/path/to/hea/files'

# person_to_sex = extract_sex_from_hea_files(hea_folder_path)

In [11]:

# Specify the directory path
dir_path = "/home/noam.koren/multiTS/NFT/ecg/csv_files"

# List all files in the directory
files_in_dir = os.listdir(dir_path)

# Count the number of files
num_files = len(files_in_dir)

print(f"There are {num_files} files in the directory.")


There are 10344 files in the directory.
