In [4]:
import os
import random
import shutil
from pathlib import Path

# Set random seed for reproducibility
random.seed(42)

# Define source and destination directories
SOURCE_DIR = 'crop pictures/data'
DEST_DIR = 'crop pictures'

# Define split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Ensure total = 1.0
assert abs((train_ratio + val_ratio + test_ratio) - 1.0) < 1e-6, "Ratios must sum to 1.0"

# Get class names (folder names)
classes = [d for d in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, d))]

# Create output directory structure
for split in ['train', 'val', 'test']:
    for class_name in classes:
        Path(os.path.join(DEST_DIR, split, class_name)).mkdir(parents=True, exist_ok=True)

# Process each class
for class_name in classes:
    class_path = os.path.join(SOURCE_DIR, class_name)
    images = os.listdir(class_path)
    images = [img for img in images if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(images)

    total = len(images)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    train_imgs = images[:train_end]
    val_imgs = images[train_end:val_end]
    test_imgs = images[val_end:]

    # Copy files
    for img_name in train_imgs:
        shutil.copy(os.path.join(class_path, img_name), os.path.join(DEST_DIR, 'train', class_name, img_name))
    for img_name in val_imgs:
        shutil.copy(os.path.join(class_path, img_name), os.path.join(DEST_DIR, 'val', class_name, img_name))
    for img_name in test_imgs:
        shutil.copy(os.path.join(class_path, img_name), os.path.join(DEST_DIR, 'test', class_name, img_name))

print("✅ Dataset split into train, val, and test sets.")


✅ Dataset split into train, val, and test sets.
