## **Merging images and annotations among teammates**


In [1]:
from pathlib import Path
from PIL import Image
import random, re, json, shutil

# defining directories
ROOT = Path("dataset")
MERGED = Path("merged_dataset")
IMG_EXTS = {".png"}

# config for balancing not_rubbish images with rubbish images
ADD_NOT_RUBBISH_BALANCED = True
# not_rubbish:rubbish ratio
MAX_NOT_RUBBISH_PER_POSITIVE = 0.2 # 1 here means 1:1 ratio

# train/val split ratio
TRAIN_RATIO = 0.8

# random seed for deterministic splits
random.seed(42)

# creating merged directories
(MERGED / "images" / "train").mkdir(parents=True, exist_ok=True)
(MERGED / "images" / "val").mkdir(parents=True, exist_ok=True)
(MERGED / "labels" / "train").mkdir(parents=True, exist_ok=True)
(MERGED / "labels" / "val").mkdir(parents=True, exist_ok=True)

print("setup success")

setup success


In [2]:
# labelme to YOLO conversion helpers
def unique_name(teammate_name: str, orig_name: str) -> str:
    """
    create a unique, filesystem-friendly base name: teammate_origbasename (no extension)
    prevents file overwriting
    """
    base = Path(orig_name).stem
    # using regex sanitize teammate name and base to avoid spaces/slashes
    t = re.sub(r'[^a-zA-Z0-9_-]+', '_', teammate_name.strip())
    b = re.sub(r'[^a-zA-Z0-9_-]+', '_', base)
    return f"{t}_{b}"


def bbox_from_points(points):
    """
    LabelMe: shape.points can be 2 points (rect) or a polygon list
    return(xmin, ymin, xmax, ymax)
    """

    xs = [p[0] for p in points]
    ys = [p[1] for p in points]
    return min(xs), min(ys), max(xs), max(ys)


def to_yolo_line(cls_id, bbox, img_w, img_h):
    """
    YOLO labels must be normalized to [0, -1] and in center-width-height format.
    """
    xmin, ymin, xmax, ymax = bbox

    # "clamp" coords to image bounds (no negatives/out-of-range)
    xmin, ymin = max(0, xmin), max(0, ymin)
    xmax, ymax = min(img_w, xmax), min(img_h, ymax)
    
    # convert to YOLO (normalized)
    xc = ((xmin + xmax) / 2) / img_w
    yc = ((ymin + ymax) / 2) / img_h
    w = (xmax - xmin) / img_w
    h = (ymax - ymin) / img_h

    # return in a format the YOLO expects
    return f"{cls_id} {xc} {yc} {w} {h}"


def is_image_file(p: Path) -> bool:
    return p.suffix.lower() in IMG_EXTS

In [3]:
# finding teammates folders, collect files
teammates = [d for d in ROOT.iterdir() if d.is_dir()]
print("teammate folders:", [t.name for t in teammates])

teammate folders: ['Teammate1', 'RATANAKVISAL_HENG']


In [4]:
# Collect JSONs and build a per-teammate index by (imagePath or image stem)

json_entries = []  # list of (teammate_name, json_path, data)
classes_set = set() # dynamically collect the unique class names (e.g. mattress, cans, bottles, couch, toys).

for tdir in teammates:
    # for each teammate directoy
    # look for an 'annotations' subfolder where LabelMe JSONs live.
    ann_dir = tdir / "annotations"
    if not ann_dir.exists():
        # if a teammate hasn't provide annotations yet, skip
        continue

    # iterate over all .json files in that annotations folder
    for jp in ann_dir.glob("*.json"):
        try:
            # read and parse the JSON file into a Python dict
            data = json.loads(jp.read_text())
        except Exception as e:
            # if the JSON is malformed or unreadable, don't crash the pipeline
            # print a help message and skip the file
            print(f"Skipping bad JSON: {jp} ({e})")
            continue

        # store the trio (who/where/what) so later steps can:
        # - find the corresponding image in that teammate's 'rubbush/' folder
        # - convert shapes -> YOLO labels
        json_entries.append((tdir.name, jp, data))

        # collect class labels from the JSON
        # LabelMe stores shapes (boxes or polygons) under 'shapes'
        for sh in data.get("shapes", []):
            lab = sh.get("label")
            if lab:
                # add to a set to ensure each class appears only one overall.
                classes_set.add(lab)


# sort the unordered set so class indices are stsable and reproducible
CLASSES = sorted(classes_set)

# diagnostics
print(f"\nDiscovered {len(CLASSES)} classes:", CLASSES)
print(f"Total annotation files discovered: {len(json_entries)}")


Discovered 20 classes: ['aluminium_cans', 'blanket', 'car_bumper', 'cardboard', 'couch', 'furniture_scraps', 'garbage', 'glass_bottle', 'jug', 'litter', 'mattress', 'paper_cup', 'plastic_bottle', 'rubbish_bag', 'rug', 'stray_trolley', 'syringe', 'toilet_seat', 'torn_paper', 'trolley']
Total annotation files discovered: 63


In [5]:
# merge annotated images (rubbish with JSON), then add balanced not_rubbish

labels_written = 0
images_copied = 0

# book-keeping:
merged_list = []        # all merged image paths
pos_image_ids = set()   # stems of positive (rubbish) images after unique-naming
neg_image_ids = set()   # stems of negative (not_rubbish) images after unique-naming

def find_rubbish_image(teammate_dir: Path, image_filename: str):
    """
    given a teammate's root folder and an annotation's image filename (imagePath),
    try to find the *actual* image in teammate/rubbish/.
    1) first attempt exact match (same name + extension)
    2) if not found, fall back to matching by stem (ignore extension), so 'foo.jpg' can match 'foo.PNG'
    """
    rub_dir = teammate_dir / "rubbish"
    
    # exact filename match
    if not rub_dir.exists(): return None
    cand = rub_dir / image_filename
    if cand.exists(): return cand

    # fallback: match by stem (any extension)
    stem = Path(image_filename).stem
    for p in rub_dir.iterdir():
        if is_image_file(p) and p.stem == stem:
            return p
    return None


# merge ONLY annotated rubbish images ----
for teammate_name, jp, data in json_entries:
    # prefer labelmel's recorded image name; if missing try same name as JSON but '.jpg'
    image_fn = data.get("imagePath") or jp.with_suffix(".jpg").name  # heuristic fallback
    teammate_dir = ROOT / teammate_name

    # find the matching rubbish image file for this annotation JSON
    img_path = find_rubbish_image(teammate_dir, image_fn)
    if img_path is None:
        # if we can't find the image, skip without error
        print(f"No matching rubbish image for JSON: {jp.name} (expected {image_fn})")
        continue

    # open to get size for normalization when writing to YOLO labels
    try:
        with Image.open(img_path) as im:
            w, h = im.size
    except Exception as e:
        # don't let a corrupt image through
        print(f"Cannot open image {img_path}: {e}")
        continue

    # a unique base name to avoid collisions between teammates
    ub = unique_name(teammate_name, img_path.name)  # unique base
    out_img = (MERGED / "images" / f"{ub}{img_path.suffix.lower()}")

    # copy the image into the merged dataset if we haven't yet
    if not out_img.exists():
        shutil.copy2(img_path, out_img)
        images_copied += 1
        merged_list.append(out_img)
        pos_image_ids.add(out_img.stem) # remember this positive's stem

    # write YOLO label
    out_lbl = (MERGED / "labels" / f"{ub}.txt")
    with open(out_lbl, "a") as f:
        # each shape in LabelMe is a labled region
        for sh in data.get("shapes", []):
            lab = sh.get("label")
            pts = sh.get("points", [])
            # skip if label is empty, no points, or label not in our discovered class list
            if not lab or not pts or lab not in CLASSES:
                continue

            # map class name -> numeric class id via CLASSES ordering
            cls_id = CLASSES.index(lab)

            # convert polygon/rect points into a tight bounding box
            xmin, ymin, xmax, ymax = bbox_from_points(pts)

            # convert that box into YOLO normalized "class xc, yc, w, h" line
            line = to_yolo_line(cls_id, (xmin, ymin, xmax, ymax), w, h)
            if line:
                f.write(line + "\n")
                labels_written += 1


# diagnose
num_positives = len(pos_image_ids)
print(f"Positive (annotated rubbish) images merged: {num_positives}")
print(f"YOLO label lines written: {labels_written}")

# ddd balanced not_rubbish (empty labels) ----
if ADD_NOT_RUBBISH_BALANCED:
    # Gather all available not_rubbish images across teammates
    pool = []
    for tdir in [d for d in ROOT.iterdir() if d.is_dir()]:
        nr_dir = tdir / "not_rubbish"
        if not nr_dir.exists(): 
            continue
        for p in nr_dir.iterdir():
            if is_image_file(p):
                pool.append((tdir.name, p))
    print(f"Found {len(pool)} candidate not_rubbish images across teammates.")

    need = int(num_positives * MAX_NOT_RUBBISH_PER_POSITIVE)
    if need <= 0:
        print("No negatives requested (need <= 0).")
    else:
        # Sample without replacement (cap at available)
        random.shuffle(pool)
        take = min(need, len(pool))
        sampled = pool[:take]

        added_nr = 0
        for teammate_name, p in sampled:
            # build a unique name for the negative image too
            ub = unique_name(teammate_name, p.name)
            out_img = (MERGED / "images" / f"{ub}{p.suffix.lower()}")

            # if by change a positive already used this unique ame, skip
            if out_img.exists():   # avoid collision with a positive of same ub
                continue
            shutil.copy2(p, out_img)
            open(MERGED / "labels" / f"{ub}.txt", "w").close()  # empty label
            neg_image_ids.add(out_img.stem)
            merged_list.append(out_img)
            added_nr += 1

        print(f"Added not_rubbish (negatives): {added_nr} (requested {need})")


Positive (annotated rubbish) images merged: 63
YOLO label lines written: 182
Found 1314 candidate not_rubbish images across teammates.
Added not_rubbish (negatives): 12 (requested 12)


In [6]:
   # save classes and data.yaml (YOYO-ready)
   # classes.txt

(CLASS_TXT := MERGED / "classes.txt").write_text("\n".join(CLASSES))
print("wrote", CLASS_TXT)

# data.yaml for YOLO/Ultralytics
yaml_text = f"""# Auto-generated data.yaml
path: {MERGED.resolve()}
train: images/train
val: images/val

names:
"""

for i, name in enumerate(CLASSES):
    yaml_text += f" {i}: {name}\n"

(DATA_YAML := MERGED / "data.yaml").write_text(yaml_text)
print("wrote", DATA_YAML)

wrote merged_dataset/classes.txt
wrote merged_dataset/data.yaml


In [7]:
# split merged images into train/val and move pairs

all_imgs = sorted((MERGED / "images").glob("*.*"))
random.shuffle(all_imgs)

split_idx = int(TRAIN_RATIO * len(all_imgs))
train_imgs = all_imgs[:split_idx]
val_imgs = all_imgs[split_idx:]

def move_pair(img_path: Path, phase: str):
    dst_img = MERGED / "images" / phase / img_path.name
    lbl_src = MERGED / "labels" / (img_path.stem + ".txt")
    dst_lbl = MERGED / "labels" / phase / (img_path.stem + ".txt")
    shutil.move(str(img_path), str(dst_img))
    if lbl_src.exists():
        shutil.move(str(lbl_src), str(dst_lbl))


for p in train_imgs:
    move_pair(p, "train")
for p in val_imgs:
    move_pair(p, "val")

print(f"train images: {len(train_imgs)}")
print(f"val images: {len(val_imgs)}")

train images: 60
val images: 15


## **Model Development**

In [8]:
from ultralytics import YOLO

DATA_YAML = MERGED / "data.yaml"

assert MERGED.exists(), "merged_dataset not found"
assert DATA_YAML.exists(), "data.yaml missing"

print("using dataset:", MERGED.resolve())
print(DATA_YAML.read_text())

using dataset: /Users/notvisal/Desktop/AI_ENG/FinalProject/COS40007-Smart-City-Civil-and-Construction-Engineering/merged_dataset
# Auto-generated data.yaml
path: /Users/notvisal/Desktop/AI_ENG/FinalProject/COS40007-Smart-City-Civil-and-Construction-Engineering/merged_dataset
train: images/train
val: images/val

names:
 0: aluminium_cans
 1: blanket
 2: car_bumper
 3: cardboard
 4: couch
 5: furniture_scraps
 6: garbage
 7: glass_bottle
 8: jug
 9: litter
 10: mattress
 11: paper_cup
 12: plastic_bottle
 13: rubbish_bag
 14: rug
 15: stray_trolley
 16: syringe
 17: toilet_seat
 18: torn_paper
 19: trolley



In [9]:
# YOLOv8 model size selection
# start with 'n' (nano) for quick iteration; we can upgrade to 's' or 'm' for a bigger model size

base_weights = "yolov8n.pt" 
model = YOLO(base_weights)

# model summary
model

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_s

In [None]:
import numpy

# training starts here
# important:
#   - epoch: 50
#   - imgsz: 640
#   - batch: typically 8-32
#   = device: "0" for gpu, or "cpu" to force CPU

results = model.train(
    data=str(DATA_YAML),
    epochs=90,
    imgsz=960,
    batch=16,
    workers=4,
    patience=8, # early stopping
    optimizer="AdamW",
    lr0=0.002, # initial LR
    # mild colot augmentation
    hsv_h=0.015, # hue
    hsv_s=0.7, # saturation
    hsv_v=0.4, # vibrance
    mosaic=0.1, # keep small (street scenes can get weird with high mosaic)
    weight_decay=0.0005,
    project="runs_theme1",
    name="yolov8n_rubbish",
    device="cpu"
)

Ultralytics 8.3.217 🚀 Python-3.10.7 torch-2.2.2 CPU (Apple M1 Pro)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=merged_dataset/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=90, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=960, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.002, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=0.1, multi_scale=False, name=yolov8n_rubbish, nbs=64, nms=False, opset=None, optimize=False, optimizer=AdamW, overlap_mask=True, patience=8, perspective=0.0, plots=True, pose

In [None]:
# validate on the Val split and generate plots
val_results = model.val(
    data=str(DATA_YAML),
    imgsz=640,
    split="val",
    project="runs_theme1",
    name="yolov8n_rubbish_val",
    device="cpu"
)

print("validation metrics:")
print(val_results)

print("artifacts saved to:", val_results.save_dir)

Ultralytics 8.3.216 🚀 Python-3.12.3 torch-2.8.0 CPU (Apple M1 Pro)
Model summary (fused): 72 layers, 3,009,548 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1684.2±781.6 MB/s, size: 1685.2 KB)
[K[34m[1mval: [0mScanning /Users/notvisal/Desktop/AI_ENG/FinalProject/COS40007-Smart-City-Civil-and-Construction-Engineering/merged_dataset/labels/val.cache... 26 images, 12 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 26/26 81.0Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 0.5it/s 4.1s11.0s
                   all         26         21       0.34      0.325      0.359      0.143
        aluminium_cans          1          1       0.39       0.78      0.497      0.298
             cardboard          1          2     0.0801      0.321      0.112     0.0156
      furniture_scraps          6          6      0.184      0.154      0.171     0.0898
               garbage  

In [None]:
val_dir = MERGED / "images" / "val"
sample_imgs = list(val_dir.glob("*.*"))

pred = model.predict(
    source=[str(p) for p in sample_imgs],
    imgsz=640,
    conf=0.25,
    iou=0.5,
    save=True,
    project="runs_theme1",
    name="yolov8n_rubbish_preview",
    device="cpu"
)

print("preview images saved to:", pred[0].save_dir if pred else "no predictions")


0: 640x640 1 cardboard, 1 furniture_scraps, 145.7ms
1: 640x640 (no detections), 145.7ms
2: 640x640 (no detections), 145.7ms
3: 640x640 (no detections), 145.7ms
4: 640x640 (no detections), 145.7ms
5: 640x640 (no detections), 145.7ms
6: 640x640 (no detections), 145.7ms
7: 640x640 (no detections), 145.7ms
8: 640x640 (no detections), 145.7ms
9: 640x640 (no detections), 145.7ms
10: 640x640 2 cardboards, 1 mattress, 145.7ms
11: 640x640 (no detections), 145.7ms
12: 640x640 1 litter, 145.7ms
13: 640x640 (no detections), 145.7ms
14: 640x640 (no detections), 145.7ms
15: 640x640 (no detections), 145.7ms
16: 640x640 (no detections), 145.7ms
17: 640x640 (no detections), 145.7ms
18: 640x640 (no detections), 145.7ms
19: 640x640 (no detections), 145.7ms
20: 640x640 (no detections), 145.7ms
21: 640x640 (no detections), 145.7ms
22: 640x640 9 garbages, 1 rubbish_bag, 145.7ms
23: 640x640 (no detections), 145.7ms
24: 640x640 (no detections), 145.7ms
25: 640x640 (no detections), 145.7ms
Speed: 2.9ms prepro