In [1]:
import os
import shutil
import random
from pathlib import Path

# Define paths
base_dir = "/Users/rohanshenoy/Desktop/targetidyolo"
images_dir = os.path.join(base_dir, "images")
labels_dir = os.path.join(base_dir, "labels")

# Create train, val, test directories
for parent_dir in [images_dir, labels_dir]:
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(parent_dir, split), exist_ok=True)

# Get all image filenames (assuming they're all valid images)
image_files = [f for f in os.listdir(images_dir) if not os.path.isdir(os.path.join(images_dir, f))]

# Shuffle the files to ensure random distribution
random.shuffle(image_files)

# Define split ratios (70% train, 20% val, 10% test)
train_ratio = 0.7
val_ratio = 0.2
# test_ratio is implicitly 0.1

# Calculate split points
num_files = len(image_files)
train_split = int(num_files * train_ratio)
val_split = int(num_files * (train_ratio + val_ratio))

# Split the files
train_files = image_files[:train_split]
val_files = image_files[train_split:val_split]
test_files = image_files[val_split:]

# Function to move files to their respective directories
def move_files(file_list, split_name):
    for filename in file_list:
        # Get the base filename without extension
        base_name = os.path.splitext(filename)[0]
        
        # Move image file
        image_src = os.path.join(images_dir, filename)
        image_dst = os.path.join(images_dir, split_name, filename)
        
        # Look for corresponding label file (assuming same name but might have different extension)
        label_filename = None
        for ext in ['.txt', '.xml', '.json']:  # Common label file extensions
            potential_label = base_name + ext
            if os.path.exists(os.path.join(labels_dir, potential_label)):
                label_filename = potential_label
                break
        
        # If we found a label file, move it
        if label_filename:
            label_src = os.path.join(labels_dir, label_filename)
            label_dst = os.path.join(labels_dir, split_name, label_filename)
            shutil.move(label_src, label_dst)
        
        # Move the image file
        shutil.move(image_src, image_dst)

# Move files to their respective directories
move_files(train_files, "train")
move_files(val_files, "val")
move_files(test_files, "test")

print(f"Dataset split complete:")
print(f"Train: {len(train_files)} files ({train_ratio*100:.0f}%)")
print(f"Validation: {len(val_files)} files ({val_ratio*100:.0f}%)")
print(f"Test: {len(test_files)} files ({(1-train_ratio-val_ratio)*100:.0f}%)")

Dataset split complete:
Train: 756 files (70%)
Validation: 215 files (20%)
Test: 109 files (10%)
