In [None]:
import os
import shutil
import random

In [None]:
def split_data_matching_names(data_dir, output_dir, train_ratio=0.7, test_ratio=0.15, val_ratio=0.15):
    persons = ['Devashree', 'Harshu', 'Meenal']

    for name in persons:
        images_dir = os.path.join(data_dir, 'images', name)
        labels_dir = os.path.join(data_dir, 'labels', name)

        if not os.path.exists(images_dir) or not os.path.exists(labels_dir):
            print(f"Skipping {name}: Missing image or label folder")
            continue

        image_files = sorted(os.listdir(images_dir))
        label_files = sorted(os.listdir(labels_dir))

        # Create subset folders for each person
        for subset in ['train', 'test', 'valid']:
            os.makedirs(os.path.join(output_dir, subset, 'images', name), exist_ok=True)
            os.makedirs(os.path.join(output_dir, subset, 'labels', name), exist_ok=True)

        # Match image-label pairs based on name
        matching_pairs = []
        for image_file in image_files:
            base_name = os.path.splitext(image_file)[0]
            label_file = base_name + '.json'
            if label_file in label_files:
                matching_pairs.append((image_file, label_file))

        random.shuffle(matching_pairs)

        total_pairs = len(matching_pairs)
        train_split = int(total_pairs * train_ratio)
        test_split = int(total_pairs * (train_ratio + test_ratio))

        for i, (image_file, label_file) in enumerate(matching_pairs):
            image_path = os.path.join(images_dir, image_file)
            label_path = os.path.join(labels_dir, label_file)

            if i < train_split:
                subset = 'train'
            elif i < test_split:
                subset = 'test'
            else:
                subset = 'valid'

            shutil.copy(image_path, os.path.join(output_dir, subset, 'images', name, image_file))
            shutil.copy(label_path, os.path.join(output_dir, subset, 'labels', name, label_file))

        print(f"{name}: {total_pairs} matched files split into train/test/valid.")

In [None]:
data_dir = 'Dataset'
output_dir = 'Split-Data'

In [None]:
split_data_matching_names(data_dir, output_dir)
print("Data split successfully (matching names)!")