In [4]:
import os
import shutil
import random

In [5]:
# usage
image_dir = "../prepared_dataset/score/images"
label_dir = "../prepared_dataset/score/labels"
output_dir = "../prepared_dataset/score/trainning"
split_ratios = {
    'train': 0.7,  # 70% of the data
    'test': 0.2,   # 20% of the data
    'val': 0.1     # 10% of the data
}

In [6]:
def restructure_dataset(image_dir, label_dir, output_dir, split_ratios):
    """
    Restructure image and label directories for object detection training.

    Parameters:
    - image_dir: Directory containing image files.
    - label_dir: Directory containing annotation label files.
    - output_dir: Directory to save the restructured dataset.
    - split_ratios: Dictionary with keys 'train', 'test', 'val' and their corresponding percentage values.
    """
    # Validate split_ratios
    if sum(split_ratios.values()) != 1.0:
        raise ValueError("Split ratios must sum to 1.0")

    # Get all image and label files
    images = sorted([f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))])
    labels = sorted([f for f in os.listdir(label_dir) if os.path.isfile(os.path.join(label_dir, f))])

    if len(images) != len(labels):
        raise ValueError("Number of images and labels do not match")

    # Pair images with their corresponding labels
    data_pairs = list(zip(images, labels))

    # Shuffle data pairs
    random.shuffle(data_pairs)

    # Split data into train, test, and val sets
    total_count = len(data_pairs)
    train_count = int(total_count * split_ratios['train'])
    test_count = int(total_count * split_ratios['test'])

    train_data = data_pairs[:train_count]
    test_data = data_pairs[train_count:train_count + test_count]
    val_data = data_pairs[train_count + test_count:]

    splits = {
        'train': train_data,
        'test': test_data,
        'val': val_data
    }

    # Create output directories
    for split in splits:
        os.makedirs(os.path.join(output_dir, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, 'labels'), exist_ok=True)

    # Copy files to respective directories
    for split, data in splits.items():
        for img_file, lbl_file in data:
            shutil.copy(os.path.join(image_dir, img_file), os.path.join(output_dir, split, 'images', img_file))
            shutil.copy(os.path.join(label_dir, lbl_file), os.path.join(output_dir, split, 'labels', lbl_file))

    print("Dataset restructuring complete.")

## Execute

In [7]:
restructure_dataset(image_dir, label_dir, output_dir, split_ratios)

Dataset restructuring complete.
