In [1]:
import os

def list_folders(directory):
    return [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory, f))]

# Example usage
directory_path = "Skin Cancer Dataset"
folders = list_folders(directory_path)
print(folders)


['Acitinic Keratosis', 'Basal Cell Carcinoma', 'Dermatofibroma', 'Melanoma', 'Nevus', 'Pigmented Benign Keratosis', 'Seborrheic Keratosis', 'Squamous Cell Carcinoma', 'Vascular Lesion']


In [2]:
len(folders)

9

In [3]:
import os
import shutil
import random

def split_data(source_folder, dest_folder, train_ratio=0.8, test_ratio=0.1, split_ratio=0.1):
    # Ensure the destination folders exist
    train_path = os.path.join(dest_folder, "train")
    test_path = os.path.join(dest_folder, "test")
    split_path = os.path.join(dest_folder, "split")
    
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(test_path, exist_ok=True)
    os.makedirs(split_path, exist_ok=True)

    # Iterate through each disease subfolder
    for disease in os.listdir(source_folder):
        disease_path = os.path.join(source_folder, disease)
        if not os.path.isdir(disease_path):
            continue  # Skip if it's not a folder

        # Get all images inside the disease folder
        images = [img for img in os.listdir(disease_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
        
        # Shuffle images randomly
        random.shuffle(images)
        
        # Compute split indices
        total_images = len(images)
        train_count = int(total_images * train_ratio)
        test_count = int(total_images * test_ratio)

        # Split data
        train_images = images[:train_count]
        test_images = images[train_count:train_count + test_count]
        split_images = images[train_count + test_count:]

        # Function to copy files to new destination with renaming
        def copy_files(image_list, folder):
            target_folder = os.path.join(folder, disease)  # Create subfolder per class
            os.makedirs(target_folder, exist_ok=True)
            
            for i, img in enumerate(image_list):
                src_path = os.path.join(disease_path, img)
                new_name = f"{disease}_{i+1}.jpg"  # Rename with class name
                dest_path = os.path.join(target_folder, new_name)
                shutil.copy2(src_path, dest_path)  # Copy with metadata
            
        # Copy images
        copy_files(train_images, train_path)
        copy_files(test_images, test_path)
        copy_files(split_images, split_path)

    print("Dataset has been successfully split into train, test, and split sets!")

# Usage
source_folder = "Skin Cancer Dataset"  # Change this to your folder path
dest_folder = "skin-train"  # Change this to where you want to save the split dataset
split_data(source_folder, dest_folder)

Dataset has been successfully split into train, test, and split sets!
