<a href="https://colab.research.google.com/github/LShahmiri/Computer_vision-Image_processing_Projects/blob/master/DatasetSplitter/DatasetSplitter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# File: dataset_splitter.py
# Split a dataset to 50% train 40% test and 10% valid
import os
import glob
import shutil
import random

def split_x_dataset(rootdir):
    """
    Splits a dataset of x images into training, testing, and validation subsets.

    Parameters:
        rootdir (str): The root directory of the x dataset.

    Returns:
        None
    """

    # Iterate through each folder in the root directory
    for folder in os.listdir(rootdir):
        dirName = os.path.join(rootdir, folder)
        # Create 'Test', 'Train', and 'Valid' subdirectories in the current folder
        os.mkdir(os.path.join(dirName, 'Test'))
        os.mkdir(os.path.join(dirName, 'Train'))
        os.mkdir(os.path.join(dirName, 'Valid'))

        # Count the number of files in the current folder
        count = sum(1 for path in os.listdir(dirName) if os.path.isfile(os.path.join(dirName, path)))
        # Get a list of all filenames in the current folder
        list_train = [os.path.basename(file) for file in glob.glob(os.path.join(str(dirName), '*.*'))]

        # Randomly sample 50% of the files for training
        sampled_list_train = random.sample(list_train, int((50 * count) / 100))

        # Move the sampled files to the 'Train' subdirectory
        for file in sampled_list_train:
            shutil.move(os.path.join(dirName, dirName, file), os.path.join(dirName, 'Train'))

        # Duplicate list_train (possibly a typo?)
        list_train.extend(list_train)

        # Randomly sample another 50% (of the remaining files) for training
        sampled_list_train = random.sample(list_train, int((50 * count) / 100))

        # Update the count for the remaining files after training samples are removed
        count = count - int((50 * count) / 100)

        # Get a new list of all filenames after the training samples have been moved
        list_Test = [os.path.basename(file) for file in glob.glob(os.path.join(str(dirName), '*.*'))]

        # Randomly sample 80% of the remaining files for testing
        sampled_list_train = random.sample(list_Test, int((80 * count) / 100))

        # Move the sampled files to the 'Test' subdirectory
        for file in sampled_list_train:
            shutil.move(os.path.join(dirName, dirName, file), os.path.join(dirName, 'Test'))

        # Get a new list of all filenames after the testing samples have been moved
        list_valid = [os.path.basename(file) for file in glob.glob(os.path.join(str(dirName), '*.*'))]

        # Move all the remaining files (after training and testing samples) to the 'Valid' subdirectory
        for file in list_valid:
            shutil.move(os.path.join(dirName, dirName, file), os.path.join(dirName, 'Valid'))

# Example usage:
# root_directory = 'c:/--'
# split_x_dataset(root_directory)
