In [3]:
import os
import shutil
import random
from pathlib import Path

# Define the paths
data_folder = Path("AugmentedData")
train_folder = Path("train")
val_folder = Path("val")
test_folder = Path("test")

# Create train, val, and test directories if they don't exist
train_folder.mkdir(exist_ok=True)
val_folder.mkdir(exist_ok=True)
test_folder.mkdir(exist_ok=True)

# Get the list of class directories
class_dirs = [class_dir for class_dir in data_folder.iterdir() if class_dir.is_dir()]

for class_dir in class_dirs:
    # Create class directories within train, val, and test directories
    train_class_dir = train_folder / class_dir.name
    val_class_dir = val_folder / class_dir.name
    test_class_dir = test_folder / class_dir.name
    train_class_dir.mkdir(exist_ok=True)
    val_class_dir.mkdir(exist_ok=True)
    test_class_dir.mkdir(exist_ok=True)

    # Get the list of image files within the class directory
    image_files = list(class_dir.glob("*.jpg")) + list(class_dir.glob("*.png"))  # Add more extensions if needed

    # Shuffle the image files randomly
    random.shuffle(image_files)

    # Calculate the sizes of train, val, and test datasets
    total_images = len(image_files)
    train_size = int(total_images * 0.7)
    val_size = int(total_images * 0.1)
    test_size = total_images - train_size - val_size

    # Split the image files into train, val, and test datasets
    train_files = image_files[:train_size]
    val_files = image_files[train_size:train_size + val_size]
    test_files = image_files[train_size + val_size:]

    # Move the files to their respective directories
    for file in train_files:
        shutil.copy2(file, train_class_dir)
    for file in val_files:
        shutil.copy2(file, val_class_dir)
    for file in test_files:
        shutil.copy2(file, test_class_dir)
