In [5]:
import os
import shutil
import random

# Set the paths to train, valid, and test
dataset_path = "./dataset"
train_path = os.path.join(dataset_path, "train")
valid_path = os.path.join(dataset_path, "valid")
test_path = os.path.join(dataset_path, "test")

# Ensure test directories exist
os.makedirs(os.path.join(test_path, "images"), exist_ok=True)
os.makedirs(os.path.join(test_path, "labels"), exist_ok=True)

# Get all images from TRAIN set
image_files = sorted(os.listdir(os.path.join(train_path, "images")))
label_files = sorted(os.listdir(os.path.join(train_path, "labels")))

# Ensure images and labels match
image_files = [img for img in image_files if img.lower().endswith(('.jpg', '.png', '.jpeg'))]
label_files = [lbl for lbl in label_files if lbl.lower().endswith('.txt')]

# Shuffle data
random.shuffle(image_files)

# Split sizes
total_images = len(image_files)
test_size = int(total_images * 0.10)  # 10% for testing

# Select test images and labels
test_images = image_files[:test_size]
test_labels = [img.replace(".jpg", ".txt").replace(".png", ".txt").replace(".jpeg", ".txt") for img in test_images]

# Move files to test set
for img, lbl in zip(test_images, test_labels):
    shutil.move(os.path.join(train_path, "images", img), os.path.join(test_path, "images", img))
    shutil.move(os.path.join(train_path, "labels", lbl), os.path.join(test_path, "labels", lbl))

print(f"Moved {test_size} images and labels to the test set!") 

Moved 56 images and labels to the test set!
