In [2]:
import os
import shutil
import pandas as pd

# Path to the images and CSV file
data_dir = r"C:\Users\marti\Desktop\dom\Data"  # Folder containing the images
metadata_path = r"C:\Users\marti\Desktop\dom\Data\metadata.csv"  # CSV file with labels
output_dir = r"C:\Users\marti\Desktop\dom"  # Output folder

# Read the CSV file containing the labels
metadata = pd.read_csv(metadata_path)

# Check the structure of the CSV (ensure columns "isic_id" and "diagnosis" exist)
print(metadata.head())

# Filter the dataframe to keep only the required columns
metadata = metadata[['isic_id', 'diagnosis', 'age_approx']]

# Function to sort images by label and move them
def sort_images_by_label(data_dir, metadata, output_dir):
    """
    Sorts the images based on their labels and moves them into specific folders with custom names.
    
    Args:
        data_dir (str): Folder containing the images.
        metadata (DataFrame): CSV file containing the image labels.
        output_dir (str): Folder to create subfolders for each label.
    """
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Create subfolders for each unique label in the "diagnosis" column
    labels = metadata['diagnosis'].unique()
    for label in labels:
        label_dir = os.path.join(output_dir, label)
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

    # Process each image
    for idx, row in metadata.iterrows():
        img_name = row['isic_id'] + ".jpg"  # Construct the image name based on the 'isic_id' column
        label = row['diagnosis']  # Use the 'diagnosis' column as the label
        
        # Construct the source and destination image paths
        img_path = os.path.join(data_dir, img_name)
        label_dir = os.path.join(output_dir, label)

        # Check if the image file exists
        if os.path.exists(img_path):
            # Rename the image with the label and a unique number
            new_img_name = f"{label}_{len(os.listdir(label_dir)) + 1}.jpg"  # Name with label and number
            new_img_path = os.path.join(label_dir, new_img_name)

            # Move and rename the image
            shutil.copy(img_path, new_img_path)
            print(f"Image moved: {img_name} -> {new_img_name} to {label_dir}")
        else:
            print(f"Image not found: {img_name}")

# Call the function to sort and move the images
sort_images_by_label(data_dir, metadata, output_dir)


        isic_id                                        attribution  \
0  ISIC_0024306  ViDIR Group, Department of Dermatology, Medica...   
1  ISIC_0024307  ViDIR Group, Department of Dermatology, Medica...   
2  ISIC_0024308  ViDIR Group, Department of Dermatology, Medica...   
3  ISIC_0024309  ViDIR Group, Department of Dermatology, Medica...   
4  ISIC_0024310  ViDIR Group, Department of Dermatology, Medica...   

  copyright_license  age_approx anatom_site_general anatom_site_special  \
0          CC-BY-NC        45.0                 NaN                 NaN   
1          CC-BY-NC        50.0     lower extremity                 NaN   
2          CC-BY-NC        55.0                 NaN                 NaN   
3          CC-BY-NC        40.0                 NaN                 NaN   
4          CC-BY-NC        60.0      anterior torso                 NaN   

  benign_malignant  concomitant_biopsy diagnosis diagnosis_1  \
0           benign               False     nevus      Benign   
