In [5]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Specify the path to your dataset
dataset_path = 'bw_output'

# Ensure the main dataset directory exists
if not os.path.exists(dataset_path):
    print(f"The folder '{dataset_path}' does not exist.")
    exit()

# Create the dataset folder outside the bw_output directory
dataset_root = os.path.join(os.path.dirname(dataset_path), 'dataset')
os.makedirs(dataset_root, exist_ok=True)

# Create train and test directories within the dataset folder
train_path = os.path.join(dataset_root, 'train')
test_path = os.path.join(dataset_root, 'test')
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)

# Iterate over each subfolder in the main dataset folder
for subfolder in os.listdir(dataset_path):
    subfolder_path = os.path.join(dataset_path, subfolder)

    # Skip non-directory entries
    if not os.path.isdir(subfolder_path):
        continue

    # Get the list of images for the class
    images = os.listdir(subfolder_path)

    # Check if the class has at least one image
    if len(images) == 0:
        print(f"Skipping subfolder '{subfolder}' as it has no images.")
        continue

    # Split the images into train and test sets
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)

    # Move images to the train directory
    for image in train_images:
        src_path = os.path.join(subfolder_path, image)
        dest_path = os.path.join(train_path, subfolder, image)
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        shutil.move(src_path, dest_path)

    # Move images to the test directory
    for image in test_images:
        src_path = os.path.join(subfolder_path, image)
        dest_path = os.path.join(test_path, subfolder, image)
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        shutil.move(src_path, dest_path)

print("Dataset split into train and test sets within the 'dataset' folder.")


Dataset split into train and test sets within the 'dataset' folder.
