In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm

In [None]:
def organize_images_by_class(csv_path, image_dir, output_dir):
    class_cols = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK']

    df = pd.read_csv(csv_path)

    # Create label column from one-hot encoded class columns
    df['label'] = df[class_cols].values.argmax(axis=1)
    df['label_name'] = df['label'].map(lambda x: class_cols[x])

    # Construct full filename with .jpg extension
    df['filename'] = df['image'].astype(str) + '.jpg'

    # Create output folders for each class
    for class_name in class_cols:
        os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)

    print(f"üìÅ Organizing images into: {output_dir}")

    # Copy images to class folders
    for _, row in tqdm(df.iterrows(), total=len(df), desc="üì¶ Copying images"):
        src_path = os.path.join(image_dir, row['filename'])
        dst_path = os.path.join(output_dir, row['label_name'], row['filename'])

        if os.path.exists(src_path):
            shutil.copy2(src_path, dst_path)
        else:
            print(f"‚ö†Ô∏è Missing file: {row['filename']}")

    print("‚úÖ All images organized successfully.")

In [4]:
organize_images_by_class(
    csv_path='ISIC_2019_Training_GroundTruth.csv',
    image_dir='ISIC_2019_Training_Input',
    output_dir='ISIC_2019_organized/Train'
)

üìÅ Organizing images into: ISIC_2019_organized/Train


üì¶ Copying images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25331/25331 [08:53<00:00, 47.49it/s]

‚úÖ All images organized successfully.





In [6]:
organize_images_by_class(
    csv_path='ISIC_2019_Test_GroundTruth.csv',
    image_dir='ISIC_2019_Test_Input',
    output_dir='ISIC_2019_organized/Test'
)

üìÅ Organizing images into: ISIC_2019_organized/Test


üì¶ Copying images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8238/8238 [01:53<00:00, 72.28it/s] 

‚úÖ All images organized successfully.





In [None]:
def organize_ham10000_images(csv_path, image_dir, output_dir):
    # Read metadata
    df = pd.read_csv(csv_path)

    # Extract diagnosis labels
    class_labels = df['dx'].unique()

    # Create folders for each class
    for label in class_labels:
        os.makedirs(os.path.join(output_dir, label), exist_ok=True)

    # Construct full filename
    df['filename'] = df['image_id'].astype(str) + '.jpg'

    print(f"üìÅ Organizing HAM10000 images into: {output_dir}")

    # Copy images to class-labeled directories
    for _, row in tqdm(df.iterrows(), total=len(df), desc="üì¶ Copying images"):
        src_path = os.path.join(image_dir, row['filename'])
        dst_path = os.path.join(output_dir, row['dx'], row['filename'])

        if os.path.exists(src_path):
            shutil.copy2(src_path, dst_path)
        else:
            #print(f"‚ö†Ô∏è Missing file: {row['filename']}")
            pass

    print("‚úÖ All images organized successfully.")

In [5]:
organize_ham10000_images(
    csv_path='ha\HAM10000_metadata.csv',
    image_dir='ha\HAM10000_images_part_1',
    output_dir='HAM10000_organized'
)

üìÅ Organizing HAM10000 images into: HAM10000_organized


üì¶ Copying images:   0%|          | 0/10015 [00:00<?, ?it/s]

üì¶ Copying images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10015/10015 [01:27<00:00, 113.83it/s]

‚úÖ All images organized successfully.





In [6]:
organize_ham10000_images(
    csv_path='ha\HAM10000_metadata.csv',
    image_dir='ha\HAM10000_images_part_2',
    output_dir='HAM10000_organized'
)

üìÅ Organizing HAM10000 images into: HAM10000_organized


üì¶ Copying images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10015/10015 [01:45<00:00, 95.25it/s] 

‚úÖ All images organized successfully.



