In [5]:
import os
import shutil
import random

def split_dataset_with_labels(images_dir, labels_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    assert train_ratio + val_ratio + test_ratio == 1, "Ratios must sum to 1."
    
    # List all image files (supporting .jpg and .png)
    image_files = [f for f in os.listdir(images_dir) if f.endswith(('.JPG','.JPEG','.PNG','.jpg', '.png'))]
    random.shuffle(image_files)
    print(len(image_files))
    
    # Calculate split sizes
    train_count = int(len(image_files) * train_ratio)
    val_count = int(len(image_files) * val_ratio)
    
    train_images = image_files[:train_count]
    val_images = image_files[train_count:train_count + val_count]
    test_images = image_files[train_count + val_count:]

    # Split datasets and copy files
    for split_name, split_images in zip(
        ['train', 'val', 'test'], [train_images, val_images, test_images]
    ):
        # Create output directories
        split_images_dir = os.path.join(output_dir, split_name, 'images')
        split_labels_dir = os.path.join(output_dir, split_name, 'labels')
        os.makedirs(split_images_dir, exist_ok=True)
        os.makedirs(split_labels_dir, exist_ok=True)
        
        for image_file in split_images:
            # Copy image
            src_image = os.path.join(images_dir, image_file)
            dest_image = os.path.join(split_images_dir, image_file)
            shutil.copy(src_image, dest_image)
            
            # Copy corresponding label
            label_file = os.path.splitext(image_file)[0] + ".txt"  # Match the image name with .txt extension
            src_label = os.path.join(labels_dir, label_file)
            dest_label = os.path.join(split_labels_dir, label_file)
            
            if os.path.exists(src_label):  # Ensure the label file exists
                shutil.copy(src_label, dest_label)

# Define paths
images_dir = "C:\\Users\\Lenovo\\Documents\\yolo\\dataset\\images"
labels_dir = "C:\\Users\\Lenovo\\Documents\\yolo\\dataset\\labels"  
output_dir = "C:\\Users\\Lenovo\\Documents\\yolomodel\\dataset"

# Call the function
split_dataset_with_labels(images_dir, labels_dir, output_dir)


682
