Import Dependencies

Imports os, csv, and Counter from collections for file handling and counting labels.

In [5]:
import os
import csv
from collections import Counter

Extract Label from Header File

Defines a function extract_label() to read a .hea file and extract the reason for admission.

In [6]:
# Function to extract reason for admission from the header file
def extract_label(header_file):
    """Extracts the reason for admission from the header file."""
    with open(header_file, 'r') as file:
        for line in file:
            if line.startswith("# Reason for admission:"):
                return line.split(":", 1)[1].strip()
    return "Unknown"

Process PTB Directory

Defines process_ptb_directory() to loop through patient folders, extract labels from .hea files, and write them to a CSV file if they meet certain conditions.

In [7]:
# Function to process PTB directory and extract labels
def process_ptb_directory(ptb_dir, output_file):
    """Loops through all patient folders in ptb and extracts labels."""
    diagnosis_counts = Counter()
    all_data = []

    for patient_folder in os.listdir(ptb_dir):
        patient_path = os.path.join(ptb_dir, patient_folder)
        if os.path.isdir(patient_path):
            for file in os.listdir(patient_path):
                if file.endswith(".hea"):
                    header_path = os.path.join(patient_path, file)
                    reason = extract_label(header_path)
                    all_data.append((patient_folder, os.path.splitext(file)[0], reason))
                    diagnosis_counts[reason] += 1

    with open(output_file, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(["patient", "header file", "diagnosis"])

        for patient, header, reason in all_data:
            if diagnosis_counts[reason] > 2 and reason.lower() != "n/a":
                writer.writerow([patient, header, reason])

In [8]:
# Run the script in a Jupyter cell
ptb_directory = "ptb-diagnostic-ecg-database-1.0.0" 
output_csv = "new_labels.csv" 
process_ptb_directory(ptb_directory, output_csv)
print(f"Labels extracted and saved to {output_csv}")

Labels extracted and saved to new_labels.csv
