In [1]:
import os
import random
import shutil

In [2]:
# === CONFIG ===
image_dir = "images"  
label_dir = "yolo annotations"  
output_base = "dataset"   
split_ratio = 0.8

In [3]:
# === SETUP ===
train_img_dir = os.path.join(output_base, "images/train")
test_img_dir = os.path.join(output_base, "images/test")
train_lbl_dir = os.path.join(output_base, "labels/train")
test_lbl_dir = os.path.join(output_base, "labels/test")

os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(test_img_dir, exist_ok=True)
os.makedirs(train_lbl_dir, exist_ok=True)
os.makedirs(test_lbl_dir, exist_ok=True)

In [4]:
# === GET ALL IMAGES ===
images =os.listdir(image_dir)
len(images)

5192

In [5]:
# === SHUFFLE AND SPLIT ===
random.shuffle(images)
split_idx = int(len(images) * split_ratio)
train_files = images[:split_idx]
test_files = images[split_idx:]
len(train_files), len(test_files)

(4153, 1039)

In [6]:
# === MOVE FILES ===
def move_files(image_list, img_dst, lbl_dst):
    for img_name in image_list:
        label_name = os.path.splitext(img_name)[0] + ".txt"

        src_img = os.path.join(image_dir, img_name)
        src_lbl = os.path.join(label_dir, label_name)

        dst_img = os.path.join(img_dst, img_name)
        dst_lbl = os.path.join(lbl_dst, label_name)

        if os.path.exists(src_lbl):
            shutil.move(src_img, dst_img)
            shutil.move(src_lbl, dst_lbl)
        else:
            print(f"⚠️ Warning: Label file not found for {img_name}")

In [7]:
# Copy training and validation files
move_files(train_files, train_img_dir, train_lbl_dir)
move_files(test_files, test_img_dir, test_lbl_dir)

print(f"✅ Dataset split complete: {len(train_files)} train, {len(test_files)} val")
print(f"Output structure is saved under: {output_base}/images/ and {output_base}/labels/")


✅ Dataset split complete: 4153 train, 1039 val
Output structure is saved under: dataset/images/ and dataset/labels/
