In [22]:
import os
import shutil
import random

In [23]:
def create_dir_structure(output_path):
    """
    Creates the required directory structure.
    """
    dirs = [
        os.path.join(output_path, 'images/train'),
        os.path.join(output_path, 'images/val'),
        os.path.join(output_path, 'labels/train'),
        os.path.join(output_path, 'labels/val')
    ]

    for dir_path in dirs:
        os.makedirs(dir_path, exist_ok=True)

In [24]:
def get_file_pairs(base_path):
    """
    Gets the pairs of image and label files
    """
    files = os.listdir(base_path)
    image_files = [f for f in files if f.endswith(('.png', 'jpg', '.jpeg'))]
    label_files = [f for f in files if f.endswith('.txt')]

    image_label_pairs = []
    for img_file in image_files:
        label_file = img_file.rsplit('.', 1)[0] + '.txt'
        if label_file in label_files:
            image_label_pairs.append((img_file, label_file))

    return image_label_pairs

In [25]:
def split_data(image_label_pairs, train_ratio=0.8):
    """
    Splits the data into training and validation sets
    """
    random.shuffle(image_label_pairs)
    split_index = int(len(image_label_pairs) * train_ratio)
    train_pairs = image_label_pairs[:split_index]
    val_pairs = image_label_pairs[split_index:]

    return train_pairs, val_pairs

In [26]:
def copy_files(pairs, base_path, output_path, dataset_type):
    """
    Copies the files to the appropriate directories
    """
    for img_file, lbl_file in pairs:
        shutil.copy(os.path.join(base_path, img_file), os.path.join(output_path, f'images/{dataset_type}', img_file))
        shutil.copy(os.path.join(base_path, lbl_file), os.path.join(output_path, f'labels/{dataset_type}', lbl_file))

In [27]:
base_path = 'raw_dataset'
output_path = 'my_data'
create_dir_structure(output_path)

image_label_pairs = get_file_pairs(base_path)
train_pairs, val_pairs = split_data(image_label_pairs)

copy_files(train_pairs, base_path, output_path, 'train')
copy_files(val_pairs, base_path, output_path, 'val')