In [1]:
import os
import random
import shutil


In [2]:

def create_directory_structure(base_dir, sub_dirs):
    """Create necessary directories if they don't exist."""
    for sub_dir in sub_dirs:
        os.makedirs(os.path.join(base_dir, sub_dir), exist_ok=True)


In [5]:
def split_dataset(train_dir, test_dir, test_size=0.2):
    """
    Splits the dataset from train directory to test directory.

    Args:
        train_dir (str): Path to the train directory containing 'images' and 'labels' subdirectories.
        test_dir (str): Path to the test directory where 'images' and 'labels' subdirectories will be created.
        test_size (float): Proportion of the dataset to include in the test split (default is 0.2).
    """
    train_images_dir = os.path.join(train_dir, "images")
    train_labels_dir = os.path.join(train_dir, "labels")
    test_images_dir = os.path.join(test_dir, "images")
    test_labels_dir = os.path.join(test_dir, "labels")
    create_directory_structure(test_dir, ["images", "labels"])
    image_files = os.listdir(train_images_dir)
    random.shuffle(image_files)  
    num_test_samples = int(len(image_files) * test_size)
    test_image_files = image_files[:num_test_samples]

    for image_file in test_image_files:
        src_image_path = os.path.join(train_images_dir, image_file)
        src_label_path = os.path.join(train_labels_dir, image_file.replace('.jpg', '.txt'))  
        dest_image_path = os.path.join(test_images_dir, image_file)
        dest_label_path = os.path.join(test_labels_dir, image_file.replace('.jpg', '.txt'))
        if os.path.exists(src_image_path):
            shutil.move(src_image_path, dest_image_path)
        if os.path.exists(src_label_path):
            shutil.move(src_label_path, dest_label_path)
        if os.path.exists(src_image_path):
            os.remove(src_image_path)
        if os.path.exists(src_label_path):
            os.remove(src_label_path)

    print(f"Moved and deleted {num_test_samples} images and labels from the train directory to the test directory.")


In [6]:

# Set the paths
train_directory = r"E:\My Research Project\CODE\DATA\COCO\train"
test_directory = r"E:\My Research Project\CODE\DATA\COCO\test"
split_dataset(train_directory, test_directory, test_size=0.2)


Moved and deleted 18625 images and labels from the train directory to the test directory.
