In [1]:
import os
import re
import shutil
import random
from PIL import Image

# Paths (UPDATE THESE)
image_dir = "../yolo_dataset/04_bounding_box_expert"  # Update to where your .tif images are
dataset_dir = "../yolo_dataset"
train_ratio = 0.8  # 80% training, 20% validation

# Create YOLO directories
train_img_dir = os.path.join(dataset_dir, "images/train")
val_img_dir = os.path.join(dataset_dir, "images/val")
train_lbl_dir = os.path.join(dataset_dir, "labels/train")
val_lbl_dir = os.path.join(dataset_dir, "labels/val")

for d in [train_img_dir, val_img_dir, train_lbl_dir, val_lbl_dir]:
    os.makedirs(d, exist_ok=True)

# Regex to extract bounding boxes from filenames
pattern = re.compile(r"\[(\d+), (\d+), (\d+), (\d+)\]")

# Get all images
image_files = [f for f in os.listdir(image_dir) if f.endswith(".tif")]
random.shuffle(image_files)  # Shuffle dataset

# Split dataset
train_count = int(len(image_files) * train_ratio)
train_files = image_files[:train_count]
val_files = image_files[train_count:]

# Function to process files
def process_images(image_files, img_dest, lbl_dest):
    for filename in image_files:
        match = pattern.search(filename)
        if not match:
            print(f"Skipping {filename}, no bounding box found.")
            continue

        # Extract bounding box
        xmin, ymin, xmax, ymax = map(int, match.groups())

        # Load image to get dimensions
        img_path = os.path.join(image_dir, filename)
        with Image.open(img_path) as img:
            img_width, img_height = img.size

        # Convert to YOLO format
        x_center = ((xmin + xmax) / 2) / img_width
        y_center = ((ymin + ymax) / 2) / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height

        # Save label
        label_filename = os.path.splitext(filename)[0] + ".txt"
        label_path = os.path.join(lbl_dest, label_filename)
        with open(label_path, "w") as f:
            f.write(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

        # Move image
        shutil.copy(img_path, os.path.join(img_dest, filename))

# Process train & val sets
process_images(train_files, train_img_dir, train_lbl_dir)
process_images(val_files, val_img_dir, val_lbl_dir)

print("Dataset preparation complete!")


Dataset preparation complete!
