In [1]:
import os

In [2]:
import shutil
import random
from pathlib import Path

def create_train_val_test_split(source_dir, output_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    """
    Split dataset into train/validation/test folders with specified ratios.
    
    Args:
        source_dir: Path to the source directory containing class folders
        output_dir: Path to the output directory where train/val/test folders will be created
        train_ratio: Proportion of data for training (default: 0.7)
        val_ratio: Proportion of data for validation (default: 0.2)
        test_ratio: Proportion of data for testing (default: 0.1)
    """
    source_path = Path(source_dir)
    output_path = Path(output_dir)
    
    # Create main output directory
    output_path.mkdir(exist_ok=True)
    
    # Create train, val, test directories
    train_dir = output_path / "train"
    val_dir = output_path / "val"
    test_dir = output_path / "test"
    
    train_dir.mkdir(exist_ok=True)
    val_dir.mkdir(exist_ok=True)
    test_dir.mkdir(exist_ok=True)
    
    # Process each class folder
    for class_folder in source_path.iterdir():
        if class_folder.is_dir():
            class_name = class_folder.name
            
            # Create class directories in train/val/test
            (train_dir / class_name).mkdir(exist_ok=True)
            (val_dir / class_name).mkdir(exist_ok=True)
            (test_dir / class_name).mkdir(exist_ok=True)
            
            # Get all files in the class folder
            files = [f for f in class_folder.iterdir() if f.is_file()]
            random.shuffle(files)
            
            # Calculate split indices
            total_files = len(files)
            train_end = int(total_files * train_ratio)
            val_end = train_end + int(total_files * val_ratio)
            
            # Split files
            train_files = files[:train_end]
            val_files = files[train_end:val_end]
            test_files = files[val_end:]
            
            # Copy files to respective directories
            for file_path in train_files:
                shutil.copy2(file_path, train_dir / class_name / file_path.name)
            
            for file_path in val_files:
                shutil.copy2(file_path, val_dir / class_name / file_path.name)
            
            for file_path in test_files:
                shutil.copy2(file_path, test_dir / class_name / file_path.name)
            
            print(f"Class '{class_name}': {len(train_files)} train, {len(val_files)} val, {len(test_files)} test")

# Example usage:
create_train_val_test_split("img", "dataset_split", train_ratio=0.7, val_ratio=0.2, test_ratio=0.1)


Class 'Beer Bottles': 3500 train, 1000 val, 500 test
Class 'Plastic Bottles': 3500 train, 1000 val, 500 test
Class 'Soda Bottle': 3500 train, 1000 val, 500 test
Class 'Water Bottle': 3500 train, 1000 val, 500 test
Class 'Wine Bottle': 3500 train, 1000 val, 500 test
