In [2]:
import os
import shutil
from pathlib import Path

# Root paths
RAW_ROOT = "/Users/sc/Desktop/GP/Arva-Autonomous-Robotic-System-for-Smart-Agriculture/Python/Merged Yolo Pipeline/agridatasets"
MERGED_ROOT = os.path.join(RAW_ROOT, "merged datasets")

# Dataset folders
DATASETS = {
    "plant": "plant_raw",
    "pest": "pest_raw",
    "road": "road_raw"
}

# Mapping dataset prefixes to final class IDs
CLASS_MAP = {
    "plant": None,  # plant classes 0-10 are correct already
    "pest": 11,     # all labels → class 11
    "road": 12      # all labels → class 12
}

# Splits in raw datasets
SPLITS = ["train", "valid", "test"]

IMG_EXTS = [".jpg", ".jpeg", ".png"]

# Create merged folders
for split in SPLITS:
    os.makedirs(os.path.join(MERGED_ROOT, split, "images"), exist_ok=True)
    os.makedirs(os.path.join(MERGED_ROOT, split, "labels"), exist_ok=True)

# Merge images and labels
for ds_name, ds_folder in DATASETS.items():
    ds_path = os.path.join(RAW_ROOT, ds_folder)

    for split in SPLITS:
        img_dir = Path(ds_path) / split / "images"
        lbl_dir = Path(ds_path) / split / "labels"

        if not img_dir.exists():
            print(f"Warning: {ds_name} {split} images folder not found: {img_dir}")
            continue

        for img in img_dir.iterdir():
            if img.suffix.lower() not in IMG_EXTS:
                continue

            new_name = f"{ds_name}_{img.name}"
            shutil.copy(img, os.path.join(MERGED_ROOT, split, "images", new_name))

            src_lbl = lbl_dir / img.with_suffix(".txt").name
            dst_lbl = os.path.join(MERGED_ROOT, split, "labels", new_name.replace(img.suffix, ".txt"))

            if src_lbl.exists():
                # Read label and remap class IDs if needed
                with open(src_lbl, "r") as f:
                    lines = f.readlines()

                new_lines = []
                for line in lines:
                    if not line.strip():
                        continue
                    parts = line.strip().split()
                    if CLASS_MAP[ds_name] is not None:
                        parts[0] = str(CLASS_MAP[ds_name])
                    new_lines.append(" ".join(parts))

                with open(dst_lbl, "w") as f:
                    f.write("\n".join(new_lines))
            else:
                # create empty label if none exists
                open(dst_lbl, "w").close()

print("✅ Merge and class remapping complete")

# Validation: check all class IDs are valid
VALID_IDS = set(range(13))  # 0-12
for split in SPLITS:
    lbl_dir = os.path.join(MERGED_ROOT, split, "labels")
    for file in os.listdir(lbl_dir):
        if not file.endswith(".txt"):
            continue
        with open(os.path.join(lbl_dir, file)) as f:
            for line in f:
                if not line.strip():
                    continue
                cls_id = int(line.split()[0])
                assert cls_id in VALID_IDS, f"Invalid class ID {cls_id} in file {file}"

print("✅ All class IDs are valid. Merged dataset is ready for YOLOv8.")


KeyboardInterrupt: 