## **Merging images and annotations among teammates**


In [1]:
IMG_EXTS = {".png", ".jpg", ".jpeg"}
ADD_NOT_RUBBISH_BALANCED = True      # include negatives
MAX_NOT_RUBBISH_PER_POSITIVE = 1.0   # start ~1:1 negatives
TRAIN_RATIO = 0.8


In [2]:
from pathlib import Path
from PIL import Image
import random, re, json, shutil
import xml.etree.ElementTree as ET

# defining directories
ROOT = Path("dataset")
MERGED = Path("merged_dataset")
IMG_EXTS = {".png"}

# config for balancing not_rubbish images with rubbish images
ADD_NOT_RUBBISH_BALANCED = True
# not_rubbish:rubbish ratio
MAX_NOT_RUBBISH_PER_POSITIVE = 0.1 # 1 here means 1:1 ratio

# train/val split ratio
TRAIN_RATIO = 0.8

# random seed for deterministic splits
random.seed(42)

# creating merged directories
(MERGED / "images" / "train").mkdir(parents=True, exist_ok=True)
(MERGED / "images" / "val").mkdir(parents=True, exist_ok=True)
(MERGED / "labels" / "train").mkdir(parents=True, exist_ok=True)
(MERGED / "labels" / "val").mkdir(parents=True, exist_ok=True)

print("setup success")

setup success


In [3]:
# labelme to YOLO conversion helpers
def unique_name(teammate_name: str, orig_name: str) -> str:
    """
    create a unique, filesystem-friendly base name: teammate_origbasename (no extension)
    prevents file overwriting
    """
    base = Path(orig_name).stem
    # using regex sanitize teammate name and base to avoid spaces/slashes
    t = re.sub(r'[^a-zA-Z0-9_-]+', '_', teammate_name.strip())
    b = re.sub(r'[^a-zA-Z0-9_-]+', '_', base)
    return f"{t}_{b}"


def bbox_from_points(points):
    """
    LabelMe: shape.points can be 2 points (rect) or a polygon list
    return(xmin, ymin, xmax, ymax)
    """

    xs = [p[0] for p in points]
    ys = [p[1] for p in points]
    return min(xs), min(ys), max(xs), max(ys)


def to_yolo_line(cls_id, bbox, img_w, img_h):
    """
    YOLO labels must be normalized to [0, -1] and in center-width-height format.
    """
    xmin, ymin, xmax, ymax = bbox

    # "clamp" coords to image bounds (no negatives/out-of-range)
    xmin, ymin = max(0, xmin), max(0, ymin)
    xmax, ymax = min(img_w, xmax), min(img_h, ymax)
    
    # convert to YOLO (normalized)
    xc = ((xmin + xmax) / 2) / img_w
    yc = ((ymin + ymax) / 2) / img_h
    w = (xmax - xmin) / img_w
    h = (ymax - ymin) / img_h

    # return in a format the YOLO expects
    return f"{cls_id} {xc} {yc} {w} {h}"


def is_image_file(p: Path) -> bool:
    return p.suffix.lower() in IMG_EXTS

In [4]:
# finding teammates folders, collect files
teammates = [d for d in ROOT.iterdir() if d.is_dir()]
print("teammate folders:", [t.name for t in teammates])

teammate folders: ['Ayush Bhandari', 'Ratanakvisal Heng']


#### adding support for both JSON and XML annotations


In [5]:
# parse json annotations function
def parse_labelme_json(jp):
    """Parse LabelMe JSON -> unified dict."""
    data = json.loads(jp.read_text())
    # ensure required keys exist
    data.setdefault("imagePath", None)
    data.setdefault("imageWidth", data.get("imageWidth"))
    data.setdefault("imageHeight", data.get("imageHeight"))
    # LabelMe already has shapes with 'points'
    # we only keep rectangle/polygon by extracting 2-point rectangles for downstream
    unified_shapes = []
    for sh in data.get("shapes", []):
        lab = sh.get("label")
        pts = sh.get("points", [])
        if not lab or not pts:
            continue

        # normalize to 2-point rectangle [[xmin, ymin], [xmax, ymax]]
        xs = [p[0] for p in pts]
        ys = [p[1] for p in pts]
        rect = [[float(min(xs)), float(min(ys))], [float(max(xs)), float(max(ys))]]
        unified_shapes.append({"label": lab, "points": rect})
    
    return {
        "imagePath": data.get("imagePath"),
        "imageWidth": data.get("imageWidth"),
        "imageHeight": data.get("imageHeight"),
        "shapes": unified_shapes,
    }


# parse XML function
def parse_voc_xml(xp):
    """Parse Pascal VOC XML -> unified dict."""
    root = ET.fromstring(xp.read_text())

    # filenmae
    fn_node = root.find("filename")
    image_path = fn_node.text.strip() if fn_node is not None else None

    # size
    sz = root.find("size")
    w = h = None
    if sz is not None: 
        try:
            w = int(sz.find("width").text)
            h = int(sz.find("height").text)
        except Exception:
            pass

    # objects -> shapes
    unified_shapes = []
    for obj in root.findall("object"):
        name_node = obj.find("name")
        lab = name_node.text.strip() if name_node is not None else None
        bb = obj.find("bndbox")
        if lab and bb is not None:
            try:
                xmin = float(bb.find("xmin").text)
                ymin = float(bb.find("ymin").text)
                xmax = float(bb.find("xmax").text)
                ymax = float(bb.find("ymax").text)
            except Exception:
                continue

            # normalize to the same format as LabelMe: two-point rect
            rect = [[xmin, ymin], [xmax, ymax]]
            unified_shapes.append({"label": lab, "points": rect})
    
    return {
        "imagePath": image_path, # may need stem matching fallback later
        "imageWidth": w, 
        "imageHeight": h,
        "shapes": unified_shapes,
    }

In [6]:
# Collect JSONs and build a per-teammate index by (imagePath or image stem)

json_entries = []  # list of (teammate_name, json_path, data)
classes_set = set() # dynamically collect the unique class names (e.g. mattress, cans, bottles, couch, toys).


for tdir in teammates:
    # for each teammate directoy
    # look for an 'annotations' subfolder where LabelMe JSONs live.
    ann_dir = tdir / "annotations"
    if not ann_dir.exists():
        # if a teammate hasn't provide annotations yet, skip
        continue

    # iterate over all .json and .xml files in that annotations folder
    for ap in list(ann_dir.glob("*.json")) + list(ann_dir.glob("*.xml")):
        try:
            # read and parse the JSON file into a Python dict
            if ap.suffix.lower() == ".json":
                data = parse_labelme_json(ap)
            else: # ".xml"
                data = parse_voc_xml(ap)
        except Exception as e:
            # if the JSON is malformed or unreadable, don't crash the pipeline
            # print a help message and skip the file
            print(f"Skipping bad annotation files: {ap} ({e})")
            continue

        # store the trio (who/where/what) so later steps can:
        # - find the corresponding image in that teammate's 'rubbush/' folder
        # - convert shapes -> YOLO labels
        json_entries.append((tdir.name, ap, data))

        # collect class labels from the JSON
        # LabelMe stores shapes (boxes or polygons) under 'shapes'
        for sh in data.get("shapes", []):
            lab = sh.get("label")
            if lab:
                # add to a set to ensure each class appears only one overall.
                classes_set.add(lab)


# sort the unordered set so class indices are stsable and reproducible
CLASSES = sorted(classes_set)

# diagnostics
print(f"\nDiscovered {len(CLASSES)} classes:", CLASSES)
print(f"Total annotation files discovered: {len(json_entries)}")


Discovered 20 classes: ['aluminium_cans', 'blanket', 'car_bumper', 'cardboard', 'couch', 'furniture_scraps', 'garbage', 'glass_bottle', 'jug', 'litter', 'mattress', 'paper_cup', 'plastic_bottle', 'rubbish_bag', 'rug', 'stray_trolley', 'syringe', 'toilet_seat', 'torn_paper', 'trolley']
Total annotation files discovered: 63


In [7]:
# canonical classes + alias map and a normalizer
CANONICAL = [
    "mattress",
    "electrical_goods",
    "couch",
    "trolley",
    "toy",
    "clothes",
    "cardboard",
    "rubbish_bag",
    "furniture",
    "litter",
    "carton",
    "aluminium_cans",
    "blanket",
    "bottle"
]

# handcrafted aliases (typos and common name mismatches)
ALIASES = {
    # spacing/hyphens/underscores variants
    "rubbish bag": "rubbish_bag",
    
    # common mislabels / synonyms
    "stray_trolley": "trolley",
    "scrap": "litter",
    "jug": "carton",
    "garbage": "rubbish_bag",
    "furniture_scraps": "furniture",
    "chair": "furniture",
    "glass_bottle": "bottle",
    "plastic_bottle": "bottle",
    "torn_paper": "litter"
}

# pre-compute a lowercase lookup for canonical names
CANON_LOWER = {c.lower(): c for c in CANONICAL}

def canonicalize(raw_label: str, fuzzy=True):
    """
    Convert raw teammate label -> canonical class name or None if unknown.
    Strat:
        1) normalize spaces/hyphens/underscores, lowercase
        2) apply ALIASES
        3) exact match to CANONICAL
    """

    if not raw_label:
        return None
    
    s = raw_label.strip().lower()
    s = re.sub(r"[\s\-]+", "_", s) # spaces/hyphens -> underscore
    s = re.sub(r"__+", "_", s) # collapse doubles

    # direct alias
    if s in ALIASES:
        s = ALIASES[s]

    # exact canonical match
    if s in CANON_LOWER:
        return CANON_LOWER[s]
    
    return None

In [8]:
from collections import Counter, defaultdict

normalized_counts = Counter()
unknown_examples = defaultdict(int)

total_shapes_before = 0
total_shapes_after = 0

for i, (tname, ap, data) in enumerate(json_entries):
    new_shapes = []
    for sh in data.get("shapes", []):
        total_shapes_before += 1
        raw = sh.get("label")
        pts = sh.get("points")
        if not raw or not pts:
            continue

        canon = canonicalize(raw)
        if canon is None:
            # keep a small log to fix upstream labels later
            key = f"{raw} -> ? (in {ap.name})"
            unknown_examples[key] += 1
            continue

        normalized_counts[canon] += 1
        new_shapes.append({"label": canon, "points": sh["points"]})
    
    data["shapes"] = new_shapes
    json_entries[i] = (tname, ap, data)

    total_shapes_after += len(new_shapes)

print("normalization complete")
print("shapes before:", total_shapes_before, "after:", total_shapes_after)
print("\nclass counts after normalization:")
for cls in CANONICAL:
    print(f"{cls:16s} {normalized_counts[cls]}")

if unknown_examples:
    print("\n unknown/mismatched labels (top 15):")
    for k, v in list(sorted(unknown_examples.items(), key=lambda x: -x[1])):
        print(f" {v:4d} {k}")

else:
    print("\n all good")

normalization complete
shapes before: 182 after: 177

class counts after normalization:
mattress         7
electrical_goods 0
couch            4
trolley          6
toy              0
clothes          0
cardboard        28
rubbish_bag      25
furniture        43
litter           41
carton           2
aluminium_cans   9
blanket          2
bottle           10

 unknown/mismatched labels (top 15):
    1 toilet_seat -> ? (in rubbish-2198.json)
    1 syringe -> ? (in rubbish-2227.json)
    1 car_bumper -> ? (in rubbish-2232.json)
    1 paper_cup -> ? (in rubbish-2232.json)
    1 rug -> ? (in rubbish-2235.json)


In [9]:
# merge annotated images (rubbish with JSON), then add balanced not_rubbish

labels_written = 0
images_copied = 0

# book-keeping:
merged_list = []        # all merged image paths
pos_image_ids = set()   # stems of positive (rubbish) images after unique-naming
neg_image_ids = set()   # stems of negative (not_rubbish) images after unique-naming

def find_rubbish_image(teammate_dir: Path, image_filename: str):
    """
    given a teammate's root folder and an annotation's image filename (imagePath),
    try to find the *actual* image in teammate/rubbish/.
    1) first attempt exact match (same name + extension)
    2) if not found, fall back to matching by stem (ignore extension), so 'foo.jpg' can match 'foo.PNG'
    """
    rub_dir = teammate_dir / "rubbish"
    
    # exact filename match
    if not rub_dir.exists(): return None
    cand = rub_dir / image_filename
    if cand.exists(): return cand

    # fallback: match by stem (any extension)
    stem = Path(image_filename).stem
    for p in rub_dir.iterdir():
        if is_image_file(p) and p.stem == stem:
            return p
    return None


# merge ONLY annotated rubbish images ----
for teammate_name, jp, data in json_entries:
    # prefer labelmel's recorded image name; if missing try same name as JSON but '.jpg'
    image_fn = data.get("imagePath") or jp.with_suffix(".jpg").name  # heuristic fallback
    teammate_dir = ROOT / teammate_name

    # find the matching rubbish image file for this annotation JSON
    img_path = find_rubbish_image(teammate_dir, image_fn)
    if img_path is None:
        # if we can't find the image, skip without error
        print(f"No matching rubbish image for JSON: {jp.name} (expected {image_fn})")
        continue

    # open to get size for normalization when writing to YOLO labels
    try:
        with Image.open(img_path) as im:
            w, h = im.size
    except Exception as e:
        # don't let a corrupt image through
        print(f"Cannot open image {img_path}: {e}")
        continue

    # a unique base name to avoid collisions between teammates
    ub = unique_name(teammate_name, img_path.name)  # unique base
    out_img = (MERGED / "images" / f"{ub}{img_path.suffix.lower()}")

    # copy the image into the merged dataset if we haven't yet
    if not out_img.exists():
        shutil.copy2(img_path, out_img)
        images_copied += 1
        merged_list.append(out_img)
        pos_image_ids.add(out_img.stem) # remember this positive's stem

    # write YOLO label
    out_lbl = (MERGED / "labels" / f"{ub}.txt")
    with open(out_lbl, "a") as f:
        # each shape in LabelMe is a labled region
        for sh in data.get("shapes", []):
            lab = sh.get("label")
            pts = sh.get("points", [])
            # skip if label is empty, no points, or label not in our discovered class list
            if not lab or not pts or lab not in CLASSES:
                continue

            # map class name -> numeric class id via CLASSES ordering
            cls_id = CLASSES.index(lab)

            # convert polygon/rect points into a tight bounding box
            xmin, ymin, xmax, ymax = bbox_from_points(pts)

            # convert that box into YOLO normalized "class xc, yc, w, h" line
            line = to_yolo_line(cls_id, (xmin, ymin, xmax, ymax), w, h)
            if line:
                f.write(line + "\n")
                labels_written += 1


# diagnose
num_positives = len(pos_image_ids)
print(f"Positive (annotated rubbish) images merged: {num_positives}")
print(f"YOLO label lines written: {labels_written}")

# ddd balanced not_rubbish (empty labels) ----
if ADD_NOT_RUBBISH_BALANCED:
    # Gather all available not_rubbish images across teammates
    pool = []
    for tdir in [d for d in ROOT.iterdir() if d.is_dir()]:
        nr_dir = tdir / "not_rubbish"
        if not nr_dir.exists(): 
            continue
        for p in nr_dir.iterdir():
            if is_image_file(p):
                pool.append((tdir.name, p))
    print(f"Found {len(pool)} candidate not_rubbish images across teammates.")

    need = int(num_positives * MAX_NOT_RUBBISH_PER_POSITIVE)
    if need <= 0:
        print("No negatives requested (need <= 0).")
    else:
        # Sample without replacement (cap at available)
        random.shuffle(pool)
        take = min(need, len(pool))
        sampled = pool[:take]

        added_nr = 0
        for teammate_name, p in sampled:
            # build a unique name for the negative image too
            ub = unique_name(teammate_name, p.name)
            out_img = (MERGED / "images" / f"{ub}{p.suffix.lower()}")

            # if by change a positive already used this unique ame, skip
            if out_img.exists():   # avoid collision with a positive of same ub
                continue
            shutil.copy2(p, out_img)
            open(MERGED / "labels" / f"{ub}.txt", "w").close()  # empty label
            neg_image_ids.add(out_img.stem)
            merged_list.append(out_img)
            added_nr += 1

        print(f"Added not_rubbish (negatives): {added_nr} (requested {need})")


Positive (annotated rubbish) images merged: 63
YOLO label lines written: 122
Found 1392 candidate not_rubbish images across teammates.
Added not_rubbish (negatives): 6 (requested 6)


In [10]:
   # save classes and data.yaml (YOYO-ready)
   # classes.txt

(CLASS_TXT := MERGED / "classes.txt").write_text("\n".join(CLASSES))
print("wrote", CLASS_TXT)

# data.yaml for YOLO/Ultralytics
yaml_text = f"""# Auto-generated data.yaml
path: {MERGED.resolve()}
train: images/train
val: images/val

names:
"""

for i, name in enumerate(CLASSES):
    yaml_text += f" {i}: {name}\n"

(DATA_YAML := MERGED / "data.yaml").write_text(yaml_text)
print("wrote", DATA_YAML)

wrote merged_dataset\classes.txt
wrote merged_dataset\data.yaml


In [15]:
# split merged images into train/val and move pairs

all_imgs = sorted((MERGED / "images").glob("*.*"))
random.shuffle(all_imgs)

split_idx = int(TRAIN_RATIO * len(all_imgs))
train_imgs = all_imgs[:split_idx]
val_imgs = all_imgs[split_idx:]

def move_pair(img_path: Path, phase: str):
    dst_img = MERGED / "images" / phase / img_path.name
    lbl_src = MERGED / "labels" / (img_path.stem + ".txt")
    dst_lbl = MERGED / "labels" / phase / (img_path.stem + ".txt")
    shutil.move(str(img_path), str(dst_img))
    if lbl_src.exists():
        shutil.move(str(lbl_src), str(dst_lbl))


for p in train_imgs:
    move_pair(p, "train")
for p in val_imgs:
    move_pair(p, "val")

print(f"train images: {len(train_imgs)}")
print(f"val images: {len(val_imgs)}")

train images: 55
val images: 14


## **Data Augmentation & Data Balancing goes here**


In [11]:
from collections import Counter
from pathlib import Path

# Load names dynamically from the merged dataset (fallback to CANONICAL if missing)
names_path = MERGED / "classes.txt"
if names_path.exists():
    NAMES = [ln.strip() for ln in names_path.read_text().splitlines() if ln.strip()]
else:
    NAMES = CANONICAL[:]  # fallback
id2name = {i: n for i, n in enumerate(NAMES)}

train_img_dir = MERGED / "images" / "train"
train_lbl_dir = MERGED / "labels" / "train"

def read_labels(p: Path):
    boxes, clss = [], []
    if not p.exists(): return boxes, clss
    with open(p) as f:
        for ln in f:
            a = ln.strip().split()
            if len(a) != 5: continue
            cid = int(a[0]); x,y,w,h = map(float, a[1:])
            boxes.append([x,y,w,h]); clss.append(cid)
    return boxes, clss

counts = Counter()
imgs = 0
for imgp in sorted(train_img_dir.glob("*.*")):
    lblp = train_lbl_dir / f"{imgp.stem}.txt"
    _, clss = read_labels(lblp)
    for cid in clss:
        if cid not in id2name:  # guard against out-of-range IDs
            print(f"[WARN] {lblp.name}: class id {cid} not in 0..{len(NAMES)-1}")
            continue
        counts[id2name[cid]] += 1
    imgs += 1

print(f"TRAIN images: {imgs}")
print("TRAIN box counts per class:")
for c in NAMES:
    print(f"{c:18s} {counts[c]}")


TRAIN images: 55
TRAIN box counts per class:
aluminium_cans     4
blanket            0
car_bumper         0
cardboard          21
couch              4
furniture_scraps   0
garbage            0
glass_bottle       0
jug                0
litter             27
mattress           5
paper_cup          0
plastic_bottle     0
rubbish_bag        23
rug                0
stray_trolley      0
syringe            0
toilet_seat        0
torn_paper         0
trolley            4


In [None]:
# Automatic Data Augmentation & Balancing (TRAIN only, uses NAMES from classes.txt) 
# pip install albumentations==1.4.8 opencv-python
from pathlib import Path
from collections import Counter, defaultdict
import albumentations as A
import cv2, random, math

random.seed(42)

# Load NAMES from merged_dataset/classes.txt (same as report cell)
names_path = MERGED / "classes.txt"
if names_path.exists():
    NAMES = [ln.strip() for ln in names_path.read_text().splitlines() if ln.strip()]
else:
    NAMES = CANONICAL[:]
name2id = {c:i for i,c in enumerate(NAMES)}
id2name = {i:c for c,i in name2id.items()}

train_img_dir = MERGED / "images" / "train"
train_lbl_dir = MERGED / "labels" / "train"

AUTO_ALPHA = 0.60      # each class should reach at least 60% of current max
AUTO_MIN_FLOOR = 40    # never less than 40 boxes
MAX_AUG_PER_IMAGE = 3  # cap reuse of same seed

def load_yolo(p: Path):
    boxes, clss = [], []
    if not p.exists(): return boxes, clss
    with open(p) as f:
        for ln in f:
            a = ln.strip().split()
            if len(a) != 5: continue
            cid = int(a[0]); x,y,w,h = map(float, a[1:])
            boxes.append([x,y,w,h]); clss.append(cid)
    return boxes, clss

def save_yolo(p: Path, boxes, clss):
    with open(p, "w") as f:
        for (x,y,w,h), cid in zip(boxes, clss):
            f.write(f"{cid} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

# 1) current counts
box_counts = Counter()
img_list = sorted(train_img_dir.glob("*.*"))
for imgp in img_list:
    lblp = train_lbl_dir / f"{imgp.stem}.txt"
    _, clss = load_yolo(lblp)
    for cid in clss:
        if cid in id2name:
            box_counts[id2name[cid]] += 1
        else:
            print(f"[WARN] {lblp.name}: class id {cid} not in 0..{len(NAMES)-1}")

current_max = max((box_counts[c] for c in NAMES), default=0)

# 2) targets
targets = {}
for cls in NAMES:
    target = max(int(math.ceil(current_max * AUTO_ALPHA)), AUTO_MIN_FLOOR)
    targets[cls] = max(box_counts[cls], target)
auto_targets = {c:t for c,t in targets.items() if box_counts[c] < t}

print("AUTO policy:")
print(f"  current_max={current_max}, AUTO_ALPHA={AUTO_ALPHA}, AUTO_MIN_FLOOR={AUTO_MIN_FLOOR}")
print("\nCurrent -> Target boxes (TRAIN):")
for c in NAMES:
    mark = " *" if c in auto_targets else ""
    print(f"{c:18s} {box_counts[c]:4d} -> {targets[c]:4d}{mark}")
if not auto_targets:
    print("\nAll classes already meet targets. No augmentation needed.")

# 3) augmentation pipeline
aug = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.OneOf([A.RandomBrightnessContrast(0.2,0.2,p=1.0), A.CLAHE(p=1.0)], p=0.7),
    A.GaussNoise(var_limit=(5.0,30.0), p=0.3),
    A.MotionBlur(blur_limit=3, p=0.2),
    A.Affine(scale=(0.9,1.1), translate_percent=(0.0,0.05), rotate=(-3,3), shear=(-3,3), p=0.5),
    A.RandomResizedCrop(height=960, width=960, scale=(0.85,1.0), ratio=(0.9,1.1), p=0.5),
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'], min_area=4, min_visibility=0.20))

# 4) seed images by class
imgs_by_class = defaultdict(list)
for imgp in img_list:
    lblp = train_lbl_dir / f"{imgp.stem}.txt"
    boxes, clss = load_yolo(lblp)
    if not boxes: 
        continue
    present = {id2name[cid] for cid in clss if cid in id2name}
    for c in present:
        imgs_by_class[c].append((imgp, lblp))

# 5) augment
augmented_files = 0
seed_use = Counter()

for cls, tgt in auto_targets.items():
    need = tgt - box_counts[cls]
    cands = imgs_by_class.get(cls, [])
    if not cands:
        print(f"[WARN] No TRAIN images contain '{cls}'.")
        continue

    i = 0
    stagnation = 0
    made = True
    while need > 0 and stagnation < 2:
        imgp, lblp = cands[i % len(cands)]
        if seed_use[imgp] >= MAX_AUG_PER_IMAGE:
            i += 1
            if i % len(cands) == 0 and need > 0: stagnation += 1
            continue

        img = cv2.imread(str(imgp))
        if img is None:
            i += 1
            if i % len(cands) == 0 and need > 0: stagnation += 1
            continue

        boxes, clss = load_yolo(lblp)
        t = aug(image=img, bboxes=boxes, class_labels=clss)
        bbs, cls_aug = t["bboxes"], t["class_labels"]
        if not bbs:
            i += 1
            if i % len(cands) == 0 and need > 0: stagnation += 1
            continue

        # count only valid target-class boxes
        inc = sum(1 for cid in cls_aug if cid in id2name and id2name[cid] == cls)
        if inc == 0:
            i += 1
            if i % len(cands) == 0: stagnation += 1
            continue

        new_stem = f"{imgp.stem}_aug_{cls}_{need}"
        out_img = train_img_dir / f"{new_stem}{imgp.suffix}"
        out_lbl = train_lbl_dir / f"{new_stem}.txt"
        cv2.imwrite(str(out_img), t["image"])
        save_yolo(out_lbl, bbs, cls_aug)
        augmented_files += 1
        seed_use[imgp] += 1

        box_counts[cls] += inc
        need = tgt - box_counts[cls]
        i += 1
        made = True
        if i % len(cands) == 0 and need > 0:
            stagnation += 0 if made else 1
            made = False

    if need > 0:
        print(f"[INFO] Stopped early for '{cls}': remaining need={need}")

print(f"\nAugmented files created: {augmented_files}")
print("Final TRAIN box counts:")
for c in NAMES:
    print(f"{c:18s} {box_counts[c]}")


AUTO policy:
  current_max=27, AUTO_ALPHA=0.6, AUTO_MIN_FLOOR=40

Current -> Target boxes (TRAIN):
aluminium_cans        4 ->   40 *
blanket               0 ->   40 *
car_bumper            0 ->   40 *
cardboard            21 ->   40 *
couch                 4 ->   40 *
furniture_scraps      0 ->   40 *
garbage               0 ->   40 *
glass_bottle          0 ->   40 *
jug                   0 ->   40 *
litter               27 ->   40 *
mattress              5 ->   40 *
paper_cup             0 ->   40 *
plastic_bottle        0 ->   40 *
rubbish_bag          23 ->   40 *
rug                   0 ->   40 *
stray_trolley         0 ->   40 *
syringe               0 ->   40 *
toilet_seat           0 ->   40 *
torn_paper            0 ->   40 *
trolley               4 ->   40 *
[INFO] Stopped early for 'aluminium_cans': remaining need=24
[WARN] No TRAIN images contain 'blanket'.
[WARN] No TRAIN images contain 'car_bumper'.
[INFO] Stopped early for 'couch': remaining need=24
[WARN] No TRAIN image

In [14]:
from collections import Counter
from pathlib import Path  

lbl_dir = Path("merged_dataset/labels/train")
counts = Counter()

for f in lbl_dir.glob("*.txt"):
    for line in f.read_text().splitlines():
        parts = line.strip().split()
        if len(parts) == 5:
            counts[int(parts[0])] += 1

print("Total labels:", sum(counts.values()))
print("Per-class counts:", counts)


Total labels: 253
Per-class counts: Counter({9: 69, 3: 66, 13: 52, 10: 20, 0: 16, 4: 16, 19: 14})


In [20]:
import pandas as pd
from collections import Counter
from pathlib import Path

# Define canonical class list
class_names = [
    "aluminium_cans", "blanket", "car_bumper", "cardboard", "couch",
    "furniture_scraps", "garbage", "glass_bottle", "jug", "litter",
    "mattress", "paper_cup", "plastic_bottle", "rubbish_bag", "rug",
    "stray_trolley", "syringe", "toilet_seat", "torn_paper", "trolley"
]

# Folder containing YOLO labels
lbl_dir = Path("merged_dataset/labels/train")
counts = Counter()

# Count all class IDs
for f in lbl_dir.glob("*.txt"):
    for line in f.read_text().splitlines():
        parts = line.strip().split()
        if len(parts) == 5:
            counts[int(parts[0])] += 1

# Build readable summary
total_labels = sum(counts.values())
print(f"Total labels: {total_labels}\n")
print("Class Distribution:\n--------------------")

data = []
for i, name in enumerate(class_names):
    count = counts.get(i, 0)
    data.append({"Class ID": i, "Class Name": name, "Count": count})
    print(f"{i:>2}. {name:<18} -> {count:>3}")

# Check missing classes
missing = [name for i, name in enumerate(class_names) if i not in counts]
if missing:
    print("\nClasses not found in dataset:")
    print(", ".join(missing))
else:
    print("\nAll classes found!")

# Export to CSV
df = pd.DataFrame(data)
output_path = Path("class_distribution.csv")
df.to_csv(output_path, index=False)
print(f"\nCSV saved to: {output_path.resolve()}")


Total labels: 253

Class Distribution:
--------------------
 0. aluminium_cans     ->  16
 1. blanket            ->   0
 2. car_bumper         ->   0
 3. cardboard          ->  66
 4. couch              ->  16
 5. furniture_scraps   ->   0
 6. garbage            ->   0
 7. glass_bottle       ->   0
 8. jug                ->   0
 9. litter             ->  69
10. mattress           ->  20
11. paper_cup          ->   0
12. plastic_bottle     ->   0
13. rubbish_bag        ->  52
14. rug                ->   0
15. stray_trolley      ->   0
16. syringe            ->   0
17. toilet_seat        ->   0
18. torn_paper         ->   0
19. trolley            ->  14

Classes not found in dataset:
blanket, car_bumper, furniture_scraps, garbage, glass_bottle, jug, paper_cup, plastic_bottle, rug, stray_trolley, syringe, toilet_seat, torn_paper

CSV saved to: C:\Users\ayush\Desktop\Project_root\class_distribution.csv


## **Model Development**

In [22]:
from ultralytics import YOLO

DATA_YAML = MERGED / "data.yaml"

assert MERGED.exists(), "merged_dataset not found"
assert DATA_YAML.exists(), "data.yaml missing"

print("using dataset:", MERGED.resolve())
print(DATA_YAML.read_text())

using dataset: C:\Users\ayush\Desktop\Project_root\merged_dataset
# Auto-generated data.yaml
path: C:\Users\ayush\Desktop\Project_root\merged_dataset
train: images/train
val: images/val

names:
 0: aluminium_cans
 1: blanket
 2: car_bumper
 3: cardboard
 4: couch
 5: furniture_scraps
 6: garbage
 7: glass_bottle
 8: jug
 9: litter
 10: mattress
 11: paper_cup
 12: plastic_bottle
 13: rubbish_bag
 14: rug
 15: stray_trolley
 16: syringe
 17: toilet_seat
 18: torn_paper
 19: trolley



In [None]:
# YOLOv8 model size selection
# start with 'n' (nano) for quick iteration; we can upgrade to 's' or 'm' for a bigger model size

base_weights = "yolov8s.pt" 
model = YOLO(base_weights)

# model summary
model

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:09<00:00, 2.35MB/s]


YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_s

: 

In [None]:
import numpy

# training starts here
# important:
#   - epoch: 50
#   - imgsz: 640
#   - batch: typically 8-32
#   = device: "0" for gpu, or "cpu" to force CPU

results = model.train(
    data=str(DATA_YAML),
    epochs=90,
    imgsz=960,
    batch=8,
    workers=4,
    patience=8, # early stopping
    optimizer="AdamW",
    lr0=0.002, # initial LR
    # mild colot augmentation
    hsv_h=0.015, # hue
    hsv_s=0.7, # saturation
    hsv_v=0.4, # vibrance
    mosaic=0.1, # keep small (street scenes can get weird with high mosaic)
    weight_decay=0.0005,
    project="runs_theme2",
    name="yolov8s_rubbish",
    device="cpu"
)

New https://pypi.org/project/ultralytics/8.3.221 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.0  Python-3.10.11 torch-2.2.2+cpu CPU (12th Gen Intel Core(TM) i7-1255U)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=merged_dataset\data.yaml, epochs=90, time=None, patience=8, batch=8, imgsz=960, save=True, save_period=-1, cache=False, device=cpu, workers=4, project=runs_theme2, name=yolov8s_rubbish, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embe

100%|██████████| 755k/755k [00:00<00:00, 5.70MB/s]

Overriding model.yaml nc=80 with nc=20

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               





  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              
  8                  -1  1   1838080  ultralytics.nn.modules.block.C2f             [512, 512, 1, True]           
  9                  -1  1    656896  ultralytics.nn.modules.block.SPPF            [512, 512, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    591360  ultralytics.nn.modules.block.C2f             [768,

[34m[1mtrain: [0mScanning C:\Users\ayush\Desktop\Project_root\merged_dataset\labels\train... 117 images, 20 backgrounds, 0 corrupt: 100%|██████████| 117/117 [00:01<00:00, 87.46it/s]

[34m[1mtrain: [0mNew cache created: C:\Users\ayush\Desktop\Project_root\merged_dataset\labels\train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning C:\Users\ayush\Desktop\Project_root\merged_dataset\labels\val... 14 images, 4 backgrounds, 0 corrupt: 100%|██████████| 14/14 [00:00<00:00, 120.87it/s]

[34m[1mval: [0mNew cache created: C:\Users\ayush\Desktop\Project_root\merged_dataset\labels\val.cache





Plotting labels to runs_theme2\yolov8s_rubbish\labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.937) with parameter groups 63 weight(decay=0.0), 70 weight(decay=0.0005), 69 bias(decay=0.0)
Image sizes 960 train, 960 val
Using 0 dataloader workers
Logging results to [1mruns_theme2\yolov8s_rubbish[0m
Starting training for 90 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/90         0G      1.231      6.166      1.278         14        960:   7%|▋         | 1/15 [00:30<07:00, 30.07s/it]

In [None]:
# validate on the Val split and generate plots
val_results = model.val(
    data=str(DATA_YAML),
    imgsz=960,
    split="val",
    project="runs_theme1",
    name="yolov8s_rubbish_val",
    device="cpu"
)

print("validation metrics:")
print(val_results)

print("artifacts saved to:", val_results.save_dir)

Ultralytics 8.3.217 🚀 Python-3.10.7 torch-2.2.2 CPU (Apple M1 Pro)
Model summary (fused): 72 layers, 11,136,420 parameters, 0 gradients, 28.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 4858.6±2214.0 MB/s, size: 1977.1 KB)
[K[34m[1mval: [0mScanning /Users/notvisal/Desktop/AI_ENG/FinalProject/COS40007-Smart-City-Civil-and-Construction-Engineering/merged_dataset/labels/val.cache... 53 images, 26 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 53/53 121.5Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 4/4 0.1it/s 39.7s19.7s
                   all         53         65      0.313     0.0227     0.0357     0.0206
        aluminium_cans          3          3          0          0          0          0
            car_bumper          1          1          0          0          0          0
             cardboard          3          4          0          0    0.00309   0.000746
                car

In [None]:
val_dir = MERGED / "images" / "val"
sample_imgs = list(val_dir.glob("*.*"))

pred = model.predict(
    source=[str(p) for p in sample_imgs],
    imgsz=960,
    conf=0.25,
    iou=0.5,
    save=True,
    project="runs_theme1",
    name="yolov8s_rubbish_preview",
    device="cpu"
)

print("preview images saved to:", pred[0].save_dir if pred else "no predictions")


0: 960x960 3 cardboards, 11 cartons, 22 furnitures, 4 rubbish bags, 626.3ms
1: 960x960 1 cardboard, 2 cartons, 2 furnitures, 4 rubbish bags, 626.3ms
2: 960x960 10 cartons, 9 furnitures, 3 rubbish bags, 626.3ms
3: 960x960 5 cartons, 11 furnitures, 1 rubbish bag, 626.3ms
4: 960x960 1 cardboard, 5 cartons, 12 furnitures, 12 furniture_scrapss, 1 rubbish bag, 626.3ms
5: 960x960 4 cartons, 24 furnitures, 14 rubbish bags, 626.3ms
6: 960x960 6 cartons, 21 furnitures, 8 furniture_scrapss, 3 rubbish bags, 626.3ms
7: 960x960 11 cartons, 10 furnitures, 1 furniture_scraps, 626.3ms
8: 960x960 1 cardboard, 11 cartons, 28 furnitures, 12 furniture_scrapss, 1 rubbish bag, 626.3ms
9: 960x960 1 cardboard, 7 cartons, 6 furnitures, 1 rubbish bag, 626.3ms
10: 960x960 1 carton, 1 mattress, 626.3ms
11: 960x960 7 cartons, 3 furnitures, 626.3ms
12: 960x960 1 cardboard, 9 cartons, 11 furnitures, 1 rubbish bag, 626.3ms
13: 960x960 1 furniture, 3 rubbish bags, 626.3ms
14: 960x960 2 cartons, 1 furniture, 626.3ms
15