In [1]:
import os
import random
import shutil

def split_dataset(dataset_dir, output_dir, train_split=0.7, val_split=0.15, test_split=0.15):
    # Ensure the splits sum to 1.0
    assert train_split + val_split + test_split == 1.0, "Splits must sum to 1"

    # Create output directories if they don't exist
    train_dir = os.path.join(output_dir, 'train')
    val_dir = os.path.join(output_dir, 'validation')
    test_dir = os.path.join(output_dir, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Iterate through each class folder in the dataset directory
    for class_folder in os.listdir(dataset_dir):
        class_path = os.path.join(dataset_dir, class_folder)
        if os.path.isdir(class_path):
            # Create corresponding class folders in train, val, and test directories
            os.makedirs(os.path.join(train_dir, class_folder), exist_ok=True)
            os.makedirs(os.path.join(val_dir, class_folder), exist_ok=True)
            os.makedirs(os.path.join(test_dir, class_folder), exist_ok=True)

            # Get list of all images in the class folder
            images = os.listdir(class_path)
            random.shuffle(images)

            # Split the dataset
            train_count = int(train_split * len(images))
            val_count = int(val_split * len(images))

            train_images = images[:train_count]
            val_images = images[train_count:train_count + val_count]
            test_images = images[train_count + val_count:]

            # Move the files to their respective directories
            for image in train_images:
                shutil.move(os.path.join(class_path, image), os.path.join(train_dir, class_folder, image))

            for image in val_images:
                shutil.move(os.path.join(class_path, image), os.path.join(val_dir, class_folder, image))

            for image in test_images:
                shutil.move(os.path.join(class_path, image), os.path.join(test_dir, class_folder, image))

    print(f"Dataset successfully split into {train_dir}, {val_dir}, and {test_dir}")

# Define paths
dataset_dir = r'D:\GymBuddy\CNN\Data'  # Path to your dataset
output_dir = r'D:\GymBuddy\CNN\Split_Data'  # Output path for split datasets

# Call the function to split the dataset
split_dataset(dataset_dir, output_dir)


Dataset successfully split into D:\GymBuddy\CNN\Split_Data\train, D:\GymBuddy\CNN\Split_Data\validation, and D:\GymBuddy\CNN\Split_Data\test
