In [1]:
import os
import shutil
from math import ceil

In [2]:
def split_folder(source_directory, target_base_directory, num_subfolders=10):
    """
    Splits the files in the source directory into evenly divided subfolders.

    Args:
        source_directory (str): Path to the source directory containing the files.
        target_base_directory (str): Base path for the target subfolders.
        num_subfolders (int): Number of subfolders to create.

    Returns:
        None
    """
    # Ensure the source directory exists
    if not os.path.exists(source_directory):
        print(f"Source directory '{source_directory}' does not exist.")
        return

    # Get a list of all files in the source directory
    all_files = [f for f in os.listdir(source_directory) if os.path.isfile(os.path.join(source_directory, f))]

    # Calculate the number of files per subfolder
    total_files = len(all_files)
    if total_files == 0:
        print(f"No files found in the source directory '{source_directory}'.")
        return

    files_per_subfolder = ceil(total_files / num_subfolders)
    print(f"Total files: {total_files}, Files per subfolder: {files_per_subfolder}")

    # Create the subfolders and distribute the files
    for i in range(1, num_subfolders + 1):
        subfolder_name = f"flickr_images_{i}"
        subfolder_path = os.path.join(target_base_directory, subfolder_name)
        os.makedirs(subfolder_path, exist_ok=True)
        print(f"Created subfolder: {subfolder_path}")

        # Move the appropriate files to the subfolder
        start_index = (i - 1) * files_per_subfolder
        end_index = min(i * files_per_subfolder, total_files)

        for file in all_files[start_index:end_index]:
            source_file = os.path.join(source_directory, file)
            target_file = os.path.join(subfolder_path, file)
            try:
                shutil.move(source_file, target_file)
                print(f"Moved: {source_file} -> {target_file}")
            except Exception as e:
                print(f"Failed to move {source_file}: {e}")

    print("Folder splitting complete!")

In [3]:
# Example Usage
if __name__ == "__main__":
    # Path to the source folder
    SOURCE_DIRECTORY = "/home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images"
    # Path to the base target directory
    TARGET_BASE_DIRECTORY = "/home/natalyagrokh/img_datasets/temp_scraped_images"
    
    # Split the folder into 10 subfolders
    split_folder(SOURCE_DIRECTORY, TARGET_BASE_DIRECTORY, num_subfolders=10)

Total files: 43683, Files per subfolder: 4369
Created subfolder: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_1
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_2452.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_1/image_2452.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_18800.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_1/image_18800.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_13935.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_1/image_13935.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_23931.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_1/image_23931.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_6293.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_1/image_

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_9491.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_10/image_9491.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_22746.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_10/image_22746.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_31246.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_10/image_31246.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_4343.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_10/image_4343.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_27654.jpg -> /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images_10/image_27654.jpg
Moved: /home/natalyagrokh/img_datasets/temp_scraped_images/flickr_images/image_9625.jpg -> /home/natalyagrokh/img_datas