In [5]:
import os
import shutil
from sklearn.model_selection import train_test_split

def split_dataset(input_folder, output_folder, test_size=0.1, validation_size=0.2, random_state=42):
    # Get the list of subfolders (assuming each subfolder represents a class/category)
    classes = os.listdir(input_folder)
    
    for class_name in classes:
        class_folder = os.path.join(input_folder, class_name)
        output_train_folder = os.path.join(output_folder, 'train', class_name)
        output_test_folder = os.path.join(output_folder, 'test', class_name)
        output_val_folder = os.path.join(output_folder, 'validation', class_name)
        
        # Create output folders if they don't exist
        os.makedirs(output_train_folder, exist_ok=True)
        os.makedirs(output_test_folder, exist_ok=True)
        os.makedirs(output_val_folder, exist_ok=True)

        # List all subfolders within the class folder
        subfolders = os.listdir(class_folder)

        for subfolder in subfolders:
            subfolder_path = os.path.join(class_folder, subfolder)

            # List all files in the current subfolder
            files = os.listdir(subfolder_path)

            # Split the files into train, test, and validation sets
            train_files, test_val_files = train_test_split(files, test_size=(test_size + validation_size), random_state=random_state)
            test_files, val_files = train_test_split(test_val_files, test_size=validation_size/(test_size + validation_size), random_state=random_state)

            # Create output subfolders within train, test, and validation folders
            output_train_subfolder = os.path.join(output_train_folder, subfolder)
            output_test_subfolder = os.path.join(output_test_folder, subfolder)
            output_val_subfolder = os.path.join(output_val_folder, subfolder)

            os.makedirs(output_train_subfolder, exist_ok=True)
            os.makedirs(output_test_subfolder, exist_ok=True)
            os.makedirs(output_val_subfolder, exist_ok=True)

            # Copy files to the corresponding output subfolders
            for file in train_files:
                src_path = os.path.join(subfolder_path, file)
                dest_path = os.path.join(output_train_subfolder, file)
                shutil.copy(src_path, dest_path)

            for file in test_files:
                src_path = os.path.join(subfolder_path, file)
                dest_path = os.path.join(output_test_subfolder, file)
                shutil.copy(src_path, dest_path)

            for file in val_files:
                src_path = os.path.join(subfolder_path, file)
                dest_path = os.path.join(output_val_subfolder, file)
                shutil.copy(src_path, dest_path)

# Example usage:
input_folder = 'C:/Users/RCUB-CS/age'
output_folder = 'C:/Users/RCUB-CS/age2'

split_dataset(input_folder, output_folder, test_size=0.1, validation_size=0.2, random_state=42)
