In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory

cudnn.benchmark = True
plt.ion()

<contextlib.ExitStack at 0x7e559742d7e0>

In [2]:
# !git clone https://github.com/physionetchallenges/python-example-2024/

In [3]:
# !git clone https://github.com/physionetchallenges/vanilla-cnn-2024.git

In [4]:
# !git clone https://github.com/alphanumericslab/ecg-image-kit.git

In [5]:
## For handwriting distortions
# !pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_sm-0.5.0.tar.gz &>1

In [6]:
# !pip install gdown &>1
# !pip install -r python-example-2024/requirements.txt &>1
# !pip install -r ecg-image-kit/codes/ecg-image-generator/requirements.txt &>1

In [7]:
# %%time
# !python {SCRIPT_PATH} \
#      -i {input_dir} \
#      -o {output_dir} \
#      --random_print_header 0.5 \
#      --lead_bbox \
#      --lead_name_bbox \
#      --wrinkles \
#      -ca 45 \
#      --augment \
#      --se {SEED} \
#      -rot 10  \
#      -noise 100 \
#      --random_bw 0.1 \
#      --random_grid_color \
#      --hw_text \
#      -n 10 \
#      --store_config 2

# **Reformatt the dataset such that the image paths and labels are in a CSV file**

In [8]:
import os
import pandas as pd

def extract_info_from_hea(folder_path, label_mapping):
    data = []

    # Iterate over all files in the given folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".hea"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                lines = file.readlines()
                image_path = ""
                labels = ""
                
                # Iterate over each line in the .hea file
                for line in lines:
                    if 'Labels' in line:
                        labels = line.split(":")[1].strip()
                    if 'png' in line:
                        image_path = line.split(":")[1].strip()
                
                # Create a dictionary to hold the image path and label information
                label_info = {label: 0 for label in label_mapping}
                if labels:
                    for label in labels.split(","):
                        if label.strip() in label_mapping:
                            label_info[label.strip()] = 1
                
                label_info["Image_Name"] = image_path
                data.append(label_info)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    return df

# Define the label mapping
label_mapping = {"NORM", "Acute MI", "Old MI", "STTC", "CD", "HYP", "PAC", "PVC", "AFIB/AFL", "TACHY", "BRADY"}

# Example usage:
folder_path = '/kaggle/input/subset-of-physionet/subset_of_physionet'
df = extract_info_from_hea(folder_path, label_mapping)

# Save the DataFrame to a CSV file
df.to_csv('extracted_info.csv', index=False)
df.head()


Unnamed: 0,Acute MI,PAC,NORM,PVC,Old MI,STTC,TACHY,HYP,CD,BRADY,AFIB/AFL,Image_Name
0,0,0,0,0,0,0,0,0,1,0,0,10003_hr-0.png
1,0,0,0,0,0,1,0,0,0,0,0,10004_hr-0.png
2,0,0,1,0,0,0,0,0,0,0,0,10007_hr-0.png
3,0,0,0,1,0,1,0,1,1,0,0,10006_hr-0.png
4,0,0,1,0,0,0,0,0,0,0,0,10005_hr-0.png


# **Restructuring the dataset in a new folder with the csv above and the images**

In [9]:
import os
import shutil
import pandas as pd

def extract_info_from_hea(folder_path, label_mapping):
    data = []

    # Iterate over all files in the given folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".hea"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                lines = file.readlines()
                image_path = ""
                labels = ""
                
                # Iterate over each line in the .hea file
                for line in lines:
                    if 'Labels' in line:
                        labels = line.split(":")[1].strip()
                    if 'png' in line:
                        image_path = line.split(":")[1].strip()
                
                # Create a dictionary to hold the image path and label information
                label_info = {label: 0 for label in label_mapping}
                if labels:
                    for label in labels.split(","):
                        if label.strip() in label_mapping:
                            label_info[label.strip()] = 1
                
                label_info["Image_Name"] = image_path
                data.append(label_info)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    return df

def move_files_and_create_structure(src_folder, dest_folder, df):
    # Create the destination folder if it doesn't exist
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    
    # Create the images subfolder
    images_folder = os.path.join(dest_folder, 'images')
    if not os.path.exists(images_folder):
        os.makedirs(images_folder)
    
    # Copy the images
    for index, row in df.iterrows():
        src_image_path = os.path.join(src_folder, row['Image_Name'])
        dest_image_path = os.path.join(images_folder, row['Image_Name'])
        if os.path.exists(src_image_path):
            shutil.copy2(src_image_path, dest_image_path)
    
    # Save the DataFrame to a CSV file
    csv_path = os.path.join(dest_folder, 'multilabel_classification.csv')
    df.to_csv(csv_path, index=False)

# Define the label mapping
label_mapping = {"NORM", "Acute MI", "Old MI", "STTC", "CD", "HYP", "PAC", "PVC", "AFIB/AFL", "TACHY", "BRADY"}

# Set the folder paths
src_folder = '/kaggle/input/subset-of-physionet/subset_of_physionet'
dest_folder = '/kaggle/working/new-data'

# Extract information from the .hea files and create the DataFrame
df = extract_info_from_hea(src_folder, label_mapping)

# Move the files and create the new folder structure
move_files_and_create_structure(src_folder, dest_folder, df)
