In [1]:
import os
import shutil
from math import ceil

In [2]:
def split_folder(source_directory, target_base_directory, num_subfolders=100):
    """
    Splits the files in the source directory into evenly divided subfolders.

    Args:
        source_directory (str): Path to the source directory containing the files.
        target_base_directory (str): Base path for the target subfolders.
        num_subfolders (int): Number of subfolders to create.

    Returns:
        None
    """
    # Ensure the source directory exists
    if not os.path.exists(source_directory):
        print(f"Error: Source directory '{source_directory}' does not exist.")
        return

    # Get a list of all files in the source directory
    all_files = [f for f in os.listdir(source_directory) if os.path.isfile(os.path.join(source_directory, f))]

    # Calculate the number of files per subfolder
    total_files = len(all_files)
    if total_files == 0:
        print(f"No files found in the source directory '{source_directory}'.")
        return

    files_per_subfolder = ceil(total_files / num_subfolders)
    print(f"Total files: {total_files}, Files per subfolder: {files_per_subfolder}")

    # Create the subfolders and distribute the files
    for i in range(1, num_subfolders + 1):
        subfolder_name = f"subfolder_{i}"
        subfolder_path = os.path.join(target_base_directory, subfolder_name)
        os.makedirs(subfolder_path, exist_ok=True)
        print(f"Created subfolder: {subfolder_path}")

        # Move the appropriate files to the subfolder
        start_index = (i - 1) * files_per_subfolder
        end_index = min(i * files_per_subfolder, total_files)

        for file in all_files[start_index:end_index]:
            source_file = os.path.join(source_directory, file)
            target_file = os.path.join(subfolder_path, file)
            try:
                shutil.move(source_file, target_file)
                print(f"Moved: {source_file} -> {target_file}")
            except Exception as e:
                print(f"Failed to move {source_file}: {e}")

    print("Folder splitting complete!")

In [None]:
# Example Usage
if __name__ == "__main__":
    SOURCE_DIRECTORY = "/Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_curated"  # Replace with your source folder path
    TARGET_BASE_DIRECTORY = "/Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_split"  # Replace with your target folder path

    # Split the folder into 10 subfolders
    split_folder(SOURCE_DIRECTORY, TARGET_BASE_DIRECTORY, num_subfolders=100)

Total files: 111477, Files per subfolder: 1115
Created subfolder: /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_split/subfolder_1
Moved: /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_curated/000001.png_face1.jpg -> /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_split/subfolder_1/000001.png_face1.jpg
Moved: /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_curated/000003.png_face1.jpg -> /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_split/subfolder_1/000003.png_face1.jpg
Moved: /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_curated/000004.png_face1.jpg -> /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_split/subfolder_1/000004.png_face1.jpg
Moved: /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/celeba_dataset_curated/000005.png_face1.jpg -> /Volumes/JavaAOT/Documents/AI/ml_expressions/img_datasets/c