In [1]:
import os
import random
import shutil

In [2]:
def data_split(image_folder, train_folder, test_folder, validation_folder):
    # Create the train, test, and validation folders
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    os.makedirs(validation_folder, exist_ok=True)

    # Get the list of subfolders (image classes)
    subfolders = os.listdir(image_folder)

    # Iterate over each subfolder
    for subfolder in subfolders:
        class_folder = os.path.join(image_folder, subfolder)

        # Retrieve the list of image files in the current class folder
        image_files = os.listdir(class_folder)

        # Shuffle the list randomly
        random.shuffle(image_files)

        # Calculate the number of images for each set
        total_images = len(image_files)
        train_count = int(0.8 * total_images)
        test_count = int(0.1 * total_images)
        validation_count = total_images - train_count - test_count

        # Create subdirectories in train, test, and validation folders
        train_class_folder = os.path.join(train_folder, subfolder)
        test_class_folder = os.path.join(test_folder, subfolder)
        validation_class_folder = os.path.join(validation_folder, subfolder)
        os.makedirs(train_class_folder, exist_ok=True)
        os.makedirs(test_class_folder, exist_ok=True)
        os.makedirs(validation_class_folder, exist_ok=True)

        # Copy or move images to the respective folders
        for i, file_name in enumerate(image_files):
            src_path = os.path.join(class_folder, file_name)
            if i < train_count:
                dst_path = os.path.join(train_class_folder, file_name)
            elif i < train_count + test_count:
                dst_path = os.path.join(test_class_folder, file_name)
            else:
                dst_path = os.path.join(validation_class_folder, file_name)
            shutil.copy(src_path, dst_path)

In [3]:
img_folder = "/content/drive/MyDrive/pyrack/data/footwear Images"
train_folder = "/content/drive/MyDrive/pyrack/data/train"
test_folder = "/content/drive/MyDrive/pyrack/data/test"
valid_folder = "/content/drive/MyDrive/pyrack/data/valid"
data_split(img_folder, train_folder, test_folder, valid_folder)