<a href="https://colab.research.google.com/github/Ibrahim-Ayaz/Agriculture-Computer-Vision-Object-Detection-Dataset/blob/main/agriculture_computer_vision_object_detection_end_to_end_projectipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Agriculture End-to-End Computer Vision Object Detection Project

This project engineers a full-stack, production-oriented computer vision pipeline for agricultural object detection, designed to operate on real-world field imagery under practical accuracy–performance constraints. The system integrates dataset ingestion, deterministic train/validation/test partitioning, large-scale model training, and batch inference into a reproducible workflow. A high-capacity transformer-based detector (RT-DETR-L, PyTorch) is optimised and validated using industry-standard detection metrics, including mAP, per-class precision and recall, IoU-based localisation analysis, and detection-aware confusion matrices. Beyond aggregate scores, the pipeline incorporates class-level error analysis and qualitative inspection to identify failure modes, assess model robustness, and validate suitability for deployment in applied agricultural monitoring and decision-support systems.

* For PyTorch, check out the following link: https://docs.pytorch.org/docs/stable/index.html

* For more about the model, check out the `torchvision` manual: https://docs.pytorch.org/vision/0.9/models.html

* For the metrics, check them out here: https://docs.pytorch.org/docs/stable/elastic/metrics.html

## Confirming access to a GPU

GPU availability is a prerequisite for running large-scale transformer detectors and for achieving feasible performance under real-world engineering constraints.

In [None]:
# Confirm GPU access
!nvidia-smi

## Installing required dependencies for our project

In [None]:
# Install required dependencies
import os, shutil, glob, random
from pathlib import Path
import yaml
import matplotlib.pyplot as plt
from PIL import Image

# Setup random seed for reproducibility
random.seed(42)

In [None]:
# Mount content dir to Drive
from google.colab import drive
drive.mount('/content/drive')

## Analysing each of our dir splits

Validates train/validation/test directory splits for structural integrity, label alignment, and class distribution consistency.

In [None]:
# Inspect the contents (everything) in the target Drive directory
DRIVE_ROOT = Path("/content/drive/MyDrive/DataSet")  # <-- change if needed

assert DRIVE_ROOT.exists(), f"Path not found: {DRIVE_ROOT}"
print("Drive dataset root:", DRIVE_ROOT)
print("Folders:", [p.name for p in DRIVE_ROOT.iterdir()])

SPLIT_DRIVE = DRIVE_ROOT / "split_dataset"
assert SPLIT_DRIVE.exists(), f"split_dataset not found at {SPLIT_DRIVE}"
print("split_dataset contents:", [p.name for p in SPLIT_DRIVE.iterdir()])

In [None]:
# Setup local dir path, and copy contents there
LOCAL_ROOT = Path("/content/yolo_data")
if LOCAL_ROOT.exists():
    shutil.rmtree(LOCAL_ROOT)

print("Copying split_dataset to local disk...")
shutil.copytree(SPLIT_DRIVE, LOCAL_ROOT)
print("Done. Local contents:", [p.name for p in LOCAL_ROOT.iterdir()])

In [None]:
# Find all the splits in each respective folder, and check its respective contents
def find_split_dirs(root: Path):
    # Look for train/val/test dirs
    candidates = {p.name.lower(): p for p in root.iterdir() if p.is_dir()}
    # Common names
    train = candidates.get("train") or candidates.get("training")
    val   = candidates.get("val") or candidates.get("valid") or candidates.get("validation")
    test  = candidates.get("test") or candidates.get("testing")

    # Sometimes, split_dataset may contain images/train, labels/train directly
    return train, val, test

def find_images_labels_dir(split_dir: Path):
    # Common patterns:
    # split_dir/images + split_dir/labels
    # split_dir/JPEGImages + split_dir/labels
    # split_dir/images/train etc (if split_dir itself is root)
    images = None
    labels = None

    for name in ["images", "jpegimages", "jpg", "imgs"]:
        p = split_dir / name
        if p.exists() and p.is_dir():
            images = p
            break
    # VOC style inside split
    if images is None and (split_dir / "JPEGImages").exists():
        images = split_dir / "JPEGImages"

    for name in ["labels", "label", "yolo_labels", "yololabels"]:
        p = split_dir / name
        if p.exists() and p.is_dir():
            labels = p
            break

    return images, labels

train_dir, val_dir, test_dir = find_split_dirs(LOCAL_ROOT)

print("Detected split dirs:")
print("train:", train_dir)
print("val  :", val_dir)
print("test :", test_dir)

assert train_dir is not None and val_dir is not None, "Could not find train/val folders inside split_dataset."

train_images, train_labels = find_images_labels_dir(train_dir)
val_images, val_labels     = find_images_labels_dir(val_dir)
test_images, test_labels   = (None, None)
if test_dir:
    test_images, test_labels = find_images_labels_dir(test_dir)

print("\nInside train:")
print(" images:", train_images)
print(" labels:", train_labels)

print("\nInside val:")
print(" images:", val_images)
print(" labels:", val_labels)

if test_dir:
    print("\nInside test:")
    print(" images:", test_images)
    print(" labels:", test_labels)

assert train_images and train_labels, "Train images/labels folders not found."
assert val_images and val_labels,     "Val images/labels folders not found."

## Counting the total number of samples + labels in each of out splits

In [None]:
def count_images(p: Path):
    exts = ["*.jpg", "*.jpeg", "*.png"]
    files = []
    for e in exts:
        files += list(p.glob(e))
    return len(files)

def count_labels(p: Path):
    return len(list(p.glob("*.txt")))

print("Train images:", count_images(train_images), "labels:", count_labels(train_labels))
print("Val images  :", count_images(val_images),   "labels:", count_labels(val_labels))
if test_dir:
    print("Test images :", count_images(test_images), "labels:", count_labels(test_labels))

## Getting our list of target labels (class names)

In [None]:
# Setup class names and contents for the YAML file
CLASS_NAMES = ["bud", "flower", "early fruit", "mid-growth", "mature"]

DATA_YAML = {
    "path": str(LOCAL_ROOT),
    "train": str(train_images.relative_to(LOCAL_ROOT)),
    "val": str(val_images.relative_to(LOCAL_ROOT)),
    "names": {i: name for i, name in enumerate(CLASS_NAMES)}
}

# Include test if exists
if test_dir and test_images:
    DATA_YAML["test"] = str(test_images.relative_to(LOCAL_ROOT))

yaml_path = LOCAL_ROOT / "data.yaml"
with open(yaml_path, "w") as f:
    yaml.safe_dump(DATA_YAML, f, sort_keys = False)

print("Wrote:", yaml_path)
print(open(yaml_path).read())

## Creating a preprocessing function to turn our YOLO bbox format into XYXY

 A robust preprocessing function is created to convert YOLO-normalised bounding box annotations into absolute XYXY pixel coordinates, ensuring numerical consistency and geometric correctness across the pipeline. Standardising bounding box representations is critical for accurate IoU calculation, confusion matrix construction, and precise ground-truth versus prediction comparisons during model evaluation.

In [None]:
# Create a preprocessing function to convert YOLO bboxes to XYXY format
def yolo_to_xyxy(xc, yc, w, h, img_w, img_h):
    # Normalised bboxes -> pixel corners
    x1 = (xc - w/2) * img_w
    y1 = (yc - h/2) * img_h
    x2 = (xc + w/2) * img_w
    y2 = (yc + h/2) * img_h
    return x1, y1, x2, y2

def show_sample(images_dir: Path, labels_dir: Path, n=3):
    img_files = []
    for ext in ["*.jpg","*.jpeg","*.png"]:
        img_files += list(images_dir.glob(ext))
    assert len(img_files) > 0, "No images found."
    picks = random.sample(img_files, k = min(n, len(img_files)))

    for img_path in picks:
        lbl_path = labels_dir / f"{img_path.stem}.txt"
        img = Image.open(img_path).convert("RGB")
        W, H = img.size

        plt.figure()
        plt.imshow(img)
        ax = plt.gca()

        if lbl_path.exists():
            with open(lbl_path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 5:
                        continue
                    cls = int(parts[0])
                    xc, yc, bw, bh = map(float, parts[1:])
                    x1,y1,x2,y2 = yolo_to_xyxy(xc, yc, bw, bh, W, H)
                    ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill = False, linewidth = 2))
                    ax.text(x1, y1, CLASS_NAMES[cls], bbox = dict(facecolor = "white", alpha = 0.7))
        else:
            ax.set_title("NO LABEL FILE FOUND")

        plt.axis("off")
        plt.show()

show_sample(train_images, train_labels, n = 3)

## Loading in our model

We will load the RT-DETR-L model for our use case, see the following for more about the model: https://docs.ultralytics.com/models/rtdetr/

In [None]:
# Install + load RT-DETR-L

!pip -q install -U ultralytics

from ultralytics import YOLO
from pathlib import Path
import os

# Fallbacks if earlier variables aren't defined
if "yaml_path" not in globals():
    yaml_path = Path("/content/yolo_data/data.yaml")
if "HAS_TEST" not in globals():
    HAS_TEST = False

assert Path(yaml_path).exists(), f"data.yaml not found at: {yaml_path}"

model = YOLO("rtdetr-l.pt")  # pretrained RT-DETR Large
print("Loaded: RT-DETR-L") # Corrected line
print("Using data:", yaml_path)

## Training our pretrained RT-DETR-L model

We fine-tune a pretrained RT-DETR-L transformer-based detector using task-specific data, optimising both classification and localisation performance. Transfer learning is employed to reduce training time, improve convergence stability, and maximize performance under practical compute constraints.


In [None]:
# Train model

results = model.train(
    data = str(yaml_path),
    epochs = 120,     # Bump to 150-200 if you want maximum mAP
    imgsz = 960,      # 640 faster; 960 often boosts mAP for small objects
    batch = 8,        # If OOM -> 4, 2, or 1
    device = 0,
    workers = 2,
    project = "/content/runs",
    name = "rtdetr_l_training"
)

## Evaluating our model on the test set

Evaluating the model is just as important as training it!

In [None]:
# Evaluate on val (and test if present)

val_metrics = model.val(data = str(yaml_path))
print("Val metrics object:", val_metrics)

# Test split eval (only if your data.yaml contains "test:")
if HAS_TEST:
    test_metrics = model.val(data = str(yaml_path), split = "test")
    print("Test metrics object:", test_metrics)

In [None]:
# Predict samples + save artifacts back to Drive

import random
import shutil

# Try to infer run directory
RUN_DIR = Path("/content/runs/detect/rtdetr_l_training")
WEIGHTS_DIR = RUN_DIR / "weights"
best = WEIGHTS_DIR / "best.pt"
last = WEIGHTS_DIR / "last.pt"

print("Run dir:", RUN_DIR)
print("Best exists:", best.exists(), "|", best)
print("Last exists:", last.exists(), "|", last)

# Pick some sample images for inference
# If you already have test_images/val_images variables, we’ll use them
sample_dir = None
if HAS_TEST and "test_images" in globals() and test_images is not None:
    sample_dir = Path(test_images)
elif "val_images" in globals() and val_images is not None:
    sample_dir = Path(val_images)
else:
    # Fallback: Use the train folder from yaml_path "path"+"val"
    # (this is a best-effort fallback; earlier stages usually define val_images)
    sample_dir = RUN_DIR  # Won't work unless you set val_images/test_images
    print("Note: sample_dir fallback may not point to images. Define val_images/test_images for clean inference.")

if sample_dir and sample_dir.exists():
    imgs = []
    for ext in ("*.jpg", "*.jpeg", "*.png"):
        imgs += list(sample_dir.glob(ext))
    imgs = sorted(imgs)
    if imgs:
        picks = random.sample(imgs, k = min(10, len(imgs)))
        _ = model.predict(picks, imgsz = 960, conf = 0.25, save = True)
        print("Predictions saved under:", Path("/content/runs/detect").resolve())
    else:
        print("No images found for inference in:", sample_dir)

# Save to Drive
DRIVE_SAVE = Path("/content/drive/MyDrive/rtdetr_l_outputs")
DRIVE_SAVE.mkdir(parents = True, exist_ok = True)

# Save weights + data.yaml
if best.exists():
    shutil.copy2(best, DRIVE_SAVE / "best.pt")
if last.exists():
    shutil.copy2(last, DRIVE_SAVE / "last.pt")
shutil.copy2(Path(yaml_path), DRIVE_SAVE / "data.yaml")

# Save key training artifacts if present
for f in ["results.csv", "results.png", "confusion_matrix.png", "F1_curve.png", "PR_curve.png"]:
    p = RUN_DIR / f
    if p.exists():
        shutil.copy2(p, DRIVE_SAVE / p.name)

print("Saved to Drive:", DRIVE_SAVE)
print("Files:", [p.name for p in DRIVE_SAVE.iterdir()])

## Inspecting ground-truth bboxes vs our model's

Visually compare ground-truth and predicted bounding boxes to assess localisation accuracy and identify qualitative failure modes.

In [None]:
# Compare Ground Truth vs RT-DETR-L Predictions (side-by-side)

import random
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from pathlib import Path

# Setup hyperparameters
NUM_SAMPLES = 5
CONF_THR = 0.25

# Pick dataset split
if "HAS_TEST" in globals() and HAS_TEST and "test_images" in globals():
    IMG_DIR = Path(test_images)
    LAB_DIR = Path(test_labels)
else:
    IMG_DIR = Path(val_images)
    LAB_DIR = Path(val_labels)

assert IMG_DIR.exists() and LAB_DIR.exists(), "Image/label dirs not found"

def yolo_to_xyxy(xc, yc, w, h, W, H):
    x1 = (xc - w/2) * W
    y1 = (yc - h/2) * H
    x2 = (xc + w/2) * W
    y2 = (yc + h/2) * H
    return x1, y1, x2, y2

def draw_gt(ax, img_path, label_path):
    img = Image.open(img_path).convert("RGB")
    W, H = img.size
    ax.imshow(img)
    ax.set_title("Ground Truth")
    ax.axis("off")

    if label_path.exists():
        with open(label_path) as f:
            for line in f:
                cls, xc, yc, bw, bh = map(float, line.split())
                cls = int(cls)
                x1, y1, x2, y2 = yolo_to_xyxy(xc, yc, bw, bh, W, H)
                ax.add_patch(
                    plt.Rectangle((x1, y1), x2 - x1, y2 - y1,
                                  fill = False, linewidth=2)
                )
                ax.text(
                    x1, y1, CLASS_NAMES[cls],
                    bbox = dict(facecolor = "white", alpha = 0.7),
                    fontsize = 9
                )

def draw_pred(ax, img_path):
    img = Image.open(img_path).convert("RGB")
    ax.imshow(img)
    ax.set_title("RT-DETR-L Prediction")
    ax.axis("off")

    result = model.predict(img_path, conf = CONF_THR, imgsz = 960, verbose = False)[0]

    if result.boxes is not None:
        boxes = result.boxes.xyxy.cpu().numpy()
        scores = result.boxes.conf.cpu().numpy()
        clss = result.boxes.cls.cpu().numpy().astype(int)

        for (x1, y1, x2, y2), sc, cls in zip(boxes, scores, clss):
            ax.add_patch(
                plt.Rectangle((x1, y1), x2-x1, y2-y1,
                              fill=False, linewidth=2)
            )
            ax.text(
                x1, y1,
                f"{CLASS_NAMES[cls]} {sc:.2f}",
                bbox = dict(facecolor = "yellow", alpha = 0.7),
                fontsize = 9
            )

img_files = []
for ext in ("*.jpg","*.jpeg","*.png"):
    img_files += list(IMG_DIR.glob(ext))

assert img_files, "No images found"
samples = random.sample(img_files, k = min(NUM_SAMPLES, len(img_files)))

for img_path in samples:
    lbl_path = LAB_DIR / f"{img_path.stem}.txt"

    fig, axes = plt.subplots(1, 2, figsize = (14, 6))
    draw_gt(axes[0], img_path, lbl_path)
    draw_pred(axes[1], img_path)
    plt.tight_layout()
    plt.show()

## Visualising other metrics as well

Other visualisations of extended evaluation metrics — including per-class precision and recall, IoU distributions, and confusion-based error patterns — to enable detailed diagnostic analysis of model behaviour. By exposing class-level performance trends and failure characteristics, these visualizations support informed engineering decisions around model tuning, robustness, and deployment readiness.


In [None]:
# Per-class Precision / Recall for RT-DETR-L vs YOLO Ground Truth (IoU matching)

import numpy as np
from pathlib import Path
from PIL import Image
import random
import pandas as pd
import matplotlib.pyplot as plt

# Setup required hyperparameters
CONF_THR = 0.25     # Prediction confidence threshold
IOU_THR  = 0.50     # IoU threshold for a "match"
MAX_IMAGES = None   # Set to an int (e.g., 500) to evaluate faster; None = all images
IMG_SIZE = 960      # Should match training/eval size (or 640 if you trained with 640)

# Choose split: test if available, else val
if "HAS_TEST" in globals() and HAS_TEST and "test_images" in globals() and test_images is not None:
    IMG_DIR = Path(test_images)
    LAB_DIR = Path(test_labels)
    SPLIT_NAME = "test"
else:
    IMG_DIR = Path(val_images)
    LAB_DIR = Path(val_labels)
    SPLIT_NAME = "val"

assert IMG_DIR.exists() and LAB_DIR.exists(), "Image/label dirs not found."
assert "CLASS_NAMES" in globals(), "CLASS_NAMES not defined."
num_classes = len(CLASS_NAMES)

def yolo_txt_to_xyxy(label_path: Path, W: int, H: int):
    """Read YOLO labels and return list of (cls, [x1,y1,x2,y2]) in pixel coords."""
    gts = []
    if not label_path.exists():
        return gts
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            cls = int(float(parts[0]))
            xc, yc, bw, bh = map(float, parts[1:])
            x1 = (xc - bw/2) * W
            y1 = (yc - bh/2) * H
            x2 = (xc + bw/2) * W
            y2 = (yc + bh/2) * H
            # clamp
            x1, y1 = max(0.0, x1), max(0.0, y1)
            x2, y2 = min(float(W-1), x2), min(float(H-1), y2)
            if x2 > x1 and y2 > y1:
                gts.append((cls, np.array([x1, y1, x2, y2], dtype = np.float32)))
    return gts

def iou_xyxy(a, b):
    """IoU for two [x1,y1,x2,y2] boxes."""
    xA = max(a[0], b[0]); yA = max(a[1], b[1])
    xB = min(a[2], b[2]); yB = min(a[3], b[3])
    inter_w = max(0.0, xB - xA)
    inter_h = max(0.0, yB - yA)
    inter = inter_w * inter_h
    area_a = max(0.0, a[2]-a[0]) * max(0.0, a[3]-a[1])
    area_b = max(0.0, b[2]-b[0]) * max(0.0, b[3]-b[1])
    union = area_a + area_b - inter
    return 0.0 if union <= 0 else float(inter / union)

def match_class_greedy(gt_boxes, pred_boxes, iou_thr):
    """
    Greedy matching for one class:
    gt_boxes: list of boxes
    pred_boxes: list of boxes
    returns TP, FP, FN
    """
    if len(gt_boxes) == 0 and len(pred_boxes) == 0:
        return 0, 0, 0
    if len(gt_boxes) == 0:
        return 0, len(pred_boxes), 0
    if len(pred_boxes) == 0:
        return 0, 0, len(gt_boxes)

    used_gt = [False] * len(gt_boxes)
    tp = 0
    fp = 0

    # If you want "best practice", sort predictions by confidence before matching.
    # Here, pred_boxes is already filtered; we'll assume it was confidence-sorted upstream when created.
    for pb in pred_boxes:
        best_iou = 0.0
        best_j = -1
        for j, gb in enumerate(gt_boxes):
            if used_gt[j]:
                continue
            i = iou_xyxy(pb, gb)
            if i > best_iou:
                best_iou = i
                best_j = j
        if best_iou >= iou_thr and best_j >= 0:
            tp += 1
            used_gt[best_j] = True
        else:
            fp += 1

    fn = used_gt.count(False)
    return tp, fp, fn

# Collect necessary files
img_files = []
for ext in ("*.jpg", "*.jpeg", "*.png"):
    img_files += list(IMG_DIR.glob(ext))
img_files = sorted(img_files)

if MAX_IMAGES is not None:
    img_files = img_files[:MAX_IMAGES]

assert len(img_files) > 0, f"No images found in {IMG_DIR}"

print(f"Evaluating split: {SPLIT_NAME} | Images: {len(img_files)} | CONF_THR={CONF_THR} | IOU_THR={IOU_THR}")

# Counters per class
TP = np.zeros(num_classes, dtype = np.int64)
FP = np.zeros(num_classes, dtype = np.int64)
FN = np.zeros(num_classes, dtype = np.int64)

for idx, img_path in enumerate(img_files):
    img = Image.open(img_path).convert("RGB")
    W, H = img.size
    lbl_path = LAB_DIR / f"{img_path.stem}.txt"

    # Ground truth per class
    gt_all = yolo_txt_to_xyxy(lbl_path, W, H)
    gt_by_class = [[] for _ in range(num_classes)]
    for cls, box in gt_all:
        if 0 <= cls < num_classes:
            gt_by_class[cls].append(box)

    # Predictions (Ultralytics)
    r = model.predict(str(img_path), conf = CONF_THR, imgsz = IMG_SIZE, verbose = False)[0]
    pred_by_class = [[] for _ in range(num_classes)]
    if r.boxes is not None and len(r.boxes) > 0:
        boxes = r.boxes.xyxy.cpu().numpy()
        confs = r.boxes.conf.cpu().numpy()
        clss  = r.boxes.cls.cpu().numpy().astype(int)

        # Sort preds by confidence descending (important for greedy matching)
        order = np.argsort(-confs)
        boxes, confs, clss = boxes[order], confs[order], clss[order]

        for b, c in zip(boxes, clss):
            if 0 <= c < num_classes:
                pred_by_class[c].append(b.astype(np.float32))

    # Match per class
    for c in range(num_classes):
        tp, fp, fn = match_class_greedy(gt_by_class[c], pred_by_class[c], IOU_THR)
        TP[c] += tp
        FP[c] += fp
        FN[c] += fn

# Calculate respective metrics
precision = TP / np.maximum(TP + FP, 1)
recall    = TP / np.maximum(TP + FN, 1)
f1        = 2 * precision * recall / np.maximum(precision + recall, 1e-12)

df = pd.DataFrame({
    "class": CLASS_NAMES,
    "TP": TP,
    "FP": FP,
    "FN": FN,
    "precision": precision,
    "recall": recall,
    "f1": f1
})

# Macro averages
macro = df[["precision", "recall", "f1"]].mean()
print("\nPer-class metrics:")
display(df)

print("\nMacro avg:")
display(macro)

# Plot the DataFrame + graphs
plt.figure(figsize=(10,5))
x = np.arange(num_classes)
plt.bar(x - 0.2, df["precision"].values, width = 0.4, label = "Precision")
plt.bar(x + 0.2, df["recall"].values, width = 0.4, label = "Recall")
plt.xticks(x, CLASS_NAMES, rotation = 30, ha = "right")
plt.ylim(0, 1.0)
plt.title(f"Per-class Precision/Recall ({SPLIT_NAME}) @ IoU={IOU_THR}, conf={CONF_THR}")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Enhanced Detection Analysis: Normalised Confusion Matrix + Per-class IoU + Error Breakdown
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image

# Setup hyperparameters
CONF_THR = 0.25
IOU_THR  = 0.50
IMG_SIZE = 960        # Use same as training/eval (640 or 960)
MAX_IMAGES = None     # Set e.g. 500 for faster debug; None = all

# Choose split
if "HAS_TEST" in globals() and HAS_TEST and "test_images" in globals() and test_images is not None:
    IMG_DIR = Path(test_images)
    LAB_DIR = Path(test_labels)
    SPLIT = "test"
else:
    IMG_DIR = Path(val_images)
    LAB_DIR = Path(val_labels)
    SPLIT = "val"

assert IMG_DIR.exists() and LAB_DIR.exists(), "Image/label dirs not found."
assert "CLASS_NAMES" in globals(), "CLASS_NAMES not defined."
assert "model" in globals(), "model not loaded."

num_classes = len(CLASS_NAMES)
BG = num_classes  # background index
labels_cm = CLASS_NAMES + ["background"]

def yolo_to_xyxy(xc, yc, w, h, W, H):
    x1 = (xc - w/2) * W
    y1 = (yc - h/2) * H
    x2 = (xc + w/2) * W
    y2 = (yc + h/2) * H
    # clamp
    x1 = max(0.0, min(x1, W-1.0))
    y1 = max(0.0, min(y1, H-1.0))
    x2 = max(0.0, min(x2, W-1.0))
    y2 = max(0.0, min(y2, H-1.0))
    if x2 <= x1 or y2 <= y1:
        return None
    return np.array([x1, y1, x2, y2], dtype = np.float32)

def iou_xyxy(a, b):
    xA = max(a[0], b[0]); yA = max(a[1], b[1])
    xB = min(a[2], b[2]); yB = min(a[3], b[3])
    inter = max(0.0, xB-xA) * max(0.0, yB-yA)
    area_a = max(0.0, a[2]-a[0]) * max(0.0, a[3]-a[1])
    area_b = max(0.0, b[2]-b[0]) * max(0.0, b[3]-b[1])
    union = area_a + area_b - inter
    return 0.0 if union <= 0 else float(inter / union)

# Calculate metrics for confusion matrix
cm = np.zeros((num_classes + 1, num_classes + 1), dtype=np.int64)

TP = np.zeros(num_classes, dtype = np.int64)
FP = np.zeros(num_classes, dtype = np.int64)
FN = np.zeros(num_classes, dtype = np.int64)

# IoU stats for true positives per class
iou_sum = np.zeros(num_classes, dtype = np.float64)
iou_cnt = np.zeros(num_classes, dtype = np.int64)

# Get the list of images
img_files = []
for ext in ("*.jpg","*.jpeg","*.png"):
    img_files += list(IMG_DIR.glob(ext))
img_files = sorted(img_files)

if MAX_IMAGES is not None:
    img_files = img_files[:MAX_IMAGES]

assert len(img_files) > 0, f"No images found in {IMG_DIR}"

print(f"Running enhanced analysis on {len(img_files)} images ({SPLIT}) | conf={CONF_THR} | IoU={IOU_THR}")

for img_path in img_files:
    img = Image.open(img_path).convert("RGB")
    W, H = img.size
    lbl_path = LAB_DIR / f"{img_path.stem}.txt"

    # Load GT
    gt = []
    if lbl_path.exists():
        with open(lbl_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls = int(float(parts[0]))
                xc, yc, bw, bh = map(float, parts[1:])
                box = yolo_to_xyxy(xc, yc, bw, bh, W, H)
                if box is None:
                    continue
                if 0 <= cls < num_classes:
                    gt.append((cls, box))

    gt_used = [False] * len(gt)

    # Calculate model preds
    r = model.predict(str(img_path), conf = CONF_THR, imgsz = IMG_SIZE, verbose = False)[0]
    preds = []
    if r.boxes is not None and len(r.boxes) > 0:
        p_boxes = r.boxes.xyxy.cpu().numpy()
        p_confs = r.boxes.conf.cpu().numpy()
        p_cls   = r.boxes.cls.cpu().numpy().astype(int)

        order = np.argsort(-p_confs)  # Sort description by confidence for greedy matching
        for i in order:
            c = int(p_cls[i])
            if 0 <= c < num_classes:
                preds.append((c, p_boxes[i].astype(np.float32), float(p_confs[i])))

    # Greedy match each pred to best unused GT (any class)
    for p_c, p_b, _ in preds:
        best_iou = 0.0
        best_j = -1
        for j, (g_c, g_b) in enumerate(gt):
            if gt_used[j]:
                continue
            val = iou_xyxy(p_b, g_b)
            if val > best_iou:
                best_iou = val
                best_j = j

        if best_iou >= IOU_THR and best_j >= 0:
            g_c = gt[best_j][0]
            # Confusion matrix entry: GT -> Pred
            cm[g_c, p_c] += 1

            # Update TP/FP/FN + IoU stats
            if p_c == g_c:
                TP[g_c] += 1
                iou_sum[g_c] += best_iou
                iou_cnt[g_c] += 1
            else:
                # Misclassification: counts as FP for predicted class, FN for GT class
                FP[p_c] += 1
                FN[g_c] += 1

            gt_used[best_j] = True
        else:
            # Unmatched prediction -> false positive (background is GT)
            cm[BG, p_c] += 1
            FP[p_c] += 1

    # Unmatched GT -> false negative (pred background)
    for used, (g_c, _) in zip(gt_used, gt):
        if not used:
            cm[g_c, BG] += 1
            FN[g_c] += 1

# Normalise our confusion matrix be between 0-1
# Row-normalised by GT (each row sums to 1 if row has samples)
row_sums = cm.sum(axis = 1, keepdims = True).astype(np.float64)
cm_norm = np.divide(cm, np.maximum(row_sums, 1.0))

# Calculate the IOU per class
iou_avg = np.divide(iou_sum, np.maximum(iou_cnt, 1))

# Create error breakdown table
precision = TP / np.maximum(TP + FP, 1)
recall    = TP / np.maximum(TP + FN, 1)
f1        = 2 * precision * recall / np.maximum(precision + recall, 1e-12)

# Top confusion for each GT class (excluding diagonal & background column)
top_confusions = []
for g in range(num_classes):
    row = cm[g, :num_classes].copy()
    row[g] = 0  # exclude correct
    if row.sum() == 0:
        top_confusions.append("-")
    else:
        p = int(np.argmax(row))
        top_confusions.append(f"{CLASS_NAMES[g]} → {CLASS_NAMES[p]} ({row[p]})")

df = pd.DataFrame({
    "class": CLASS_NAMES,
    "TP": TP,
    "FP": FP,
    "FN": FN,
    "precision": precision,
    "recall": recall,
    "f1": f1,
    "avg_iou_TP": iou_avg,
    "top_confusion": top_confusions
})

print("\n Class-wise error breakdown")
display(df)

print("\nMacro averages:")
display(df[["precision","recall","f1","avg_iou_TP"]].mean())

# Plot individual displays
# 1) Raw confusion matrix
plt.figure(figsize = (9, 7))
plt.imshow(cm, aspect = "auto")
plt.title(f"Confusion Matrix (counts) | {SPLIT} | IoU={IOU_THR} conf={CONF_THR}")
plt.xticks(range(num_classes + 1), labels_cm, rotation = 30, ha = "right")
plt.yticks(range(num_classes + 1), labels_cm)
plt.colorbar()
plt.tight_layout()
plt.show()

# 2) Normalized confusion matrix (row-normalized by GT)
plt.figure(figsize = (9, 7))
plt.imshow(cm_norm, aspect = "auto", vmin = 0, vmax = 1)
plt.title(f"Confusion Matrix (row-normalized by GT) | {SPLIT} | IoU={IOU_THR} conf={CONF_THR}")
plt.xticks(range(num_classes+1), labels_cm, rotation = 30, ha = "right")
plt.yticks(range(num_classes+1), labels_cm)
plt.colorbar()
plt.tight_layout()
plt.show()

# 3) Per-class avg IoU bar chart
plt.figure(figsize = (9, 4))
plt.bar(range(num_classes), iou_avg)
plt.xticks(range(num_classes), CLASS_NAMES, rotation = 30, ha = "right")
plt.ylim(0, 1.0)
plt.title(f"Per-class Average IoU on True Positives | {SPLIT}")
plt.tight_layout()
plt.show()

In [None]:
# Ultralytics automatically saves loss curves here:

from PIL import Image
import matplotlib.pyplot as plt
from pathlib import Path

# Corrected RUN_DIR to match the actual training output path
RUN_DIR = Path("/content/runs/rtdetr_l_training")

img = Image.open(RUN_DIR / "results.png")
plt.figure(figsize = (8,6))
plt.imshow(img)
plt.axis("off")
plt.title("Training & Validation Loss Curves")
plt.show()