In [1]:
from roboflow import Roboflow
import supervision as sv
import cv2
import numpy as np
import os




In [2]:
def read_yolo_labels(label_path, img_shape):
    """Read YOLO labels (x_center, y_center, width, height) normalized -> absolute xyxy."""
    H, W = img_shape[:2]
    boxes = []
    classes = []
    with open(label_path, "r") as f:
        for line in f:
            print("Label line: ", line)
            parts = list(map(float, line.strip().split()))
            cls, x, y, w, h = parts[:5]  # ignore polygons if exist
            x1 = (x - w / 2) * W
            y1 = (y - h / 2) * H
            x2 = (x + w / 2) * W
            y2 = (y + h / 2) * H
            boxes.append([x1, y1, x2, y2])
            classes.append(int(cls))
    return np.array(boxes), np.array(classes)

In [3]:
def roboflow_to_xyxy(preds):
    """Convert Roboflow predictions (x, y, width, height) to xyxy absolute boxes."""
    boxes, classes, confs = [], [], []
    for p in preds:
        x1 = p["x"] - p["width"] / 2
        y1 = p["y"] - p["height"] / 2
        x2 = p["x"] + p["width"] / 2
        y2 = p["y"] + p["height"] / 2
        boxes.append([x1, y1, x2, y2])
        classes.append(p["class_id"])
        confs.append(p["confidence"])
    return np.array(boxes), np.array(classes), np.array(confs)


In [4]:
def roboflow_pred_to_yolo(pred, img_width, img_height):
    x_center = pred["x"] / img_width
    y_center = pred["y"] / img_height
    width = pred["width"] / img_width
    height = pred["height"] / img_height
    return f"{pred['class_id']} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"


In [5]:
from supervision.detection.utils.iou_and_nms import box_iou_batch

def compute_miou(pred_boxes, gt_boxes):
    """Compute mean IoU between predicted and ground-truth boxes."""
    if len(pred_boxes) == 0 or len(gt_boxes) == 0:
        return 0.0
    ious = box_iou_batch(pred_boxes, gt_boxes)
    return np.mean(ious.max(axis=1))

In [6]:
from supervision.metrics.mean_average_precision import MeanAveragePrecision

def calculate_map(detections_pred, detections_gt):
    """Calculate mAP (mean average precision) for object detection."""
    # Create the metric
    map_metric = MeanAveragePrecision()

    # Add your predictions and ground-truth Detections
    map_metric.update(predictions=detections_pred, targets=detections_gt)

    # Compute results
    return map_metric.compute()

In [7]:
from supervision.metrics.detection import ConfusionMatrix

def calculate_precision_recall(predictions_pred, predictions_gt, class_names, iou_threshold=0.5, conf_threshold=0.25):
    """
    Compute Precision and Recall using Supervision v0.26.1 ConfusionMatrix.
    """
    cm = ConfusionMatrix.from_detections(
        predictions=[predictions_pred],
        targets=[predictions_gt],
        classes=class_names,
        conf_threshold=conf_threshold,
        iou_threshold=iou_threshold,
    )
    M = cm.matrix
    # matrix shape = (n_classes+1, n_classes+1)
    # last row/col = background/missed detections

    num_classes = len(class_names)
    tp = np.diag(M[:num_classes, :num_classes])           # true positives per class
    fp = M[:num_classes, num_classes]                     # false positives per class
    fn = M[num_classes, :num_classes]                     # false negatives per class

    precision = tp / (tp + fp + 1e-16)
    recall = tp / (tp + fn + 1e-16)
    f1 = 2 * precision * recall / (precision + recall + 1e-16)

    # Replace NaN (if no samples) with 0
    precision = np.nan_to_num(precision)
    recall = np.nan_to_num(recall)
    f1 = np.nan_to_num(f1)

    mean_precision = precision.mean()
    mean_recall = recall.mean()
    mean_f1 = f1.mean()

    print(f"Confusion Matrix:\n{M}")
    print(f"Per-class precision: {precision}")
    print(f"Per-class recall:    {recall}")
    print(f"Per-class F1:        {f1}")
    print(f"Mean Precision: {mean_precision:.4f}, Mean Recall: {mean_recall:.4f}, Mean F1: {mean_f1:.4f}")

    return mean_precision, mean_recall, mean_f1
    return cm


In [8]:
# --- CONFIGURATION ---
API_KEY = "ooPxxzLT1xgs8CWEiMjY"
PROJECT = "container-detection-1v0zy"
VERSION = 7
CONFIDENCE = 40
OVERLAP = 30


BASE_DIR = "/home/emma/facultad/pps/datasets/containers/raw/container-detection.v7i.yolov11/test"
IMAGES_DIR = os.path.join(BASE_DIR, "images")
LABELS_DIR = os.path.join(BASE_DIR, "labels")
OUTPUT_DIR = f"/home/emma/facultad/pps/validacion/containers/{PROJECT}"

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "images"), exist_ok=True)


In [None]:


def predict_model():
    # --- INIT MODEL ---
    rf = Roboflow(api_key=API_KEY)
    project = rf.workspace().project(PROJECT)
    model = project.version(VERSION).model
    image_files = [f for f in os.listdir(IMAGES_DIR) if f.endswith((".jpg", ".png", ".jpeg"))]
    total_ious = []

    for img_name in image_files:
        image_path = os.path.join(IMAGES_DIR, img_name)
        label_path = os.path.join(LABELS_DIR, img_name.replace(".jpg", ".txt").replace(".png", ".txt"))

        image = cv2.imread(image_path)
        if image is None:
            print(f"⚠️ Skipping unreadable image: {img_name}")
            continue

        if not os.path.exists(label_path):
            print(f"⚠️ Missing label for {img_name}")
            continue

        # Predict
        result = model.predict(image_path, confidence=CONFIDENCE, overlap=OVERLAP).json()

        # Save the prediction result to OUTPUT_DIR/predictions
        predictions_dir = os.path.join(OUTPUT_DIR, "predictions")
        os.makedirs(predictions_dir, exist_ok=True)
        pred_filename = f"{os.path.splitext(img_name)[0]}.json"
        pred_out_path = os.path.join(predictions_dir, pred_filename)
        with open(pred_out_path, "w") as f:
            import json
            json.dump(result, f, indent=2)

        preds = result.get("predictions", [])

        if not preds:
            print(f"⚠️ No detections for {img_name}")
            continue

        gt_boxes, gt_classes = read_yolo_labels(label_path, image.shape)
        pred_boxes, pred_classes, pred_conf = roboflow_to_xyxy(preds)
        miou = compute_miou(pred_boxes, gt_boxes)
        total_ious.append(miou)

        # Annotate
        gt_det = sv.Detections(xyxy=gt_boxes, class_id=gt_classes)
        pred_det = sv.Detections(xyxy=pred_boxes, confidence=pred_conf, class_id=pred_classes)
        gt_annotator = sv.BoxAnnotator(color=sv.Color.GREEN) ## GREEN
        pred_annotator = sv.BoxAnnotator(color=sv.Color.RED) ## RED
        label_annotator = sv.LabelAnnotator(text_color=sv.Color.WHITE, text_position=sv.Position.TOP_LEFT)

        annotated = gt_annotator.annotate(scene=image.copy(), detections=gt_det)
        annotated = pred_annotator.annotate(scene=annotated, detections=pred_det)
        labels = [f"{p['class']} ({p['confidence']:.2f})" for p in preds]
        annotated = label_annotator.annotate(scene=annotated, detections=pred_det, labels=labels)

        out_path = os.path.join(OUTPUT_DIR,"images", f"{os.path.splitext(img_name)[0]}_annotated.jpg")
        cv2.imwrite(out_path, annotated)

    # --- SUMMARY ---
    if total_ious:
        print(f"\n✅ Processed {len(total_ious)} images.")
        print(f"📊 Mean IoU across dataset: {np.mean(total_ious):.4f}")
    else:
        print("No valid images processed.")

In [10]:
predict_model()

loading Roboflow workspace...
loading Roboflow project...
Label line:  0 0.321051459375 0.3447248078125 0.1945213359375 0.8638888921874999 0.754343590625 0.8964017765625 0.678025121875 0.1429368890625 0.321051459375 0.3447248078125
Label line:  0 0.6802884609375 0.4987980765625 0.677884615625 0.4362980765625 0.6358173078125 0.346153846875 0.390625 0.4447115390625 0.368990384375 0.4615384609375 0.25 0.50360576875 0.2427884609375 0.5540865390625 0.252403846875 0.5733173078125 0.252403846875 0.6165865390625 0.24038461562500002 0.71514423125 0.2512019234375 0.7331730765625 0.2824519234375 0.75 0.368990384375 0.7548076921875 0.3641826921875 0.76201923125 0.45552884687499995 0.7764423078125 0.4675480765625 0.7644230765625 0.4987980765625 0.7644230765625 0.6394230765625 0.7800480765625 0.6550480765625 0.7644230765625 0.669471153125 0.76923076875 0.6802884609375 0.7584134609375 0.6802884609375 0.4987980765625
Label line:  0 0.9326923078125 0.700721153125 0.8954326921875 0.26201923125 0.7872596

In [15]:
import os
import json
import cv2
import numpy as np
from tqdm import tqdm
import supervision as sv

# ---------------------------------------------------------------------
# --- EXISTING HELPER FUNCTIONS (keep exactly as you provided) ---
# ---------------------------------------------------------------------

# read_yolo_labels, roboflow_to_xyxy, roboflow_pred_to_yolo,
# compute_miou, calculate_map, calculate_precision_recall
# (we assume these are already imported exactly as you pasted them)
# ---------------------------------------------------------------------

def evaluate_predictions(predictions_dir, labels_dir, class_names):
    """
    Evaluate all Roboflow prediction JSONs against YOLO-format ground-truth labels.
    """

    pred_files = [
        f for f in os.listdir(predictions_dir)
        if f.endswith(".json")
    ]

    print(f"📂 Found {len(pred_files)} prediction files in: {predictions_dir}")
    if not pred_files:
        print("❌ No predictions found.")
        return None

    all_mious = []
    image_metrics = []

    all_pred_detections = []
    all_gt_detections = []

    processed = 0
    skipped = 0

    for pred_file in tqdm(pred_files, desc="Evaluating images"):
        base_name = os.path.splitext(pred_file)[0]
        pred_path = os.path.join(predictions_dir, pred_file)
        label_path = os.path.join(labels_dir, f"{base_name}.txt")

        if not os.path.exists(label_path):
            print(f"⚠️ Missing label for {base_name}")
            skipped += 1
            continue

        # --- Load prediction JSON ---
        with open(pred_path, "r") as f:
            pred_data = json.load(f)

        preds = pred_data.get("predictions", [])
        if not preds:
            print(f"⚠️ No predictions for {base_name}")
            skipped += 1
            continue

        # --- Infer image shape ---
        # If Roboflow JSON includes width/height, use them directly
        img_width = int(preds[0]["image"]["width"]) if "image" in preds[0] else int(preds[0]["width"])
        img_height = int(preds[0]["image"]["height"]) if "image" in preds[0] else int(preds[0]["height"])
        img_shape = (img_height, img_width, 3)

        # --- Parse labels & predictions ---
        gt_boxes, gt_classes = read_yolo_labels(label_path, img_shape)
        pred_boxes, pred_classes, pred_conf = roboflow_to_xyxy(preds)

        # --- Build supervision Detections ---
        gt_det = sv.Detections(xyxy=gt_boxes, class_id=gt_classes)
        pred_det = sv.Detections(xyxy=pred_boxes, confidence=pred_conf, class_id=pred_classes)

        # --- Per-image IoU ---
        miou = compute_miou(pred_boxes, gt_boxes)
        all_mious.append(miou)

        # --- Per-image metrics ---
        if len(pred_boxes) > 0 and len(gt_boxes) > 0:
            precision, recall, f1 = calculate_precision_recall(pred_det, gt_det, class_names)
        else:
            precision, recall, f1 = 0.0, 0.0, 0.0

        image_metrics.append({
            "image": base_name,
            "miou": miou,
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "num_pred": len(pred_boxes),
            "num_gt": len(gt_boxes),
        })

        # Store detections for global metrics
        all_pred_detections.append(pred_det)
        all_gt_detections.append(gt_det)

        processed += 1

    print(f"\n📊 Processed: {processed}, Skipped: {skipped}")

    if not all_pred_detections or not all_gt_detections:
        print("❌ No valid detections for evaluation.")
        return None

    # ---------------------------------------------------------------------
    # --- Merge detections for global metrics ---
    # ---------------------------------------------------------------------
    def merge_detections(dets_list):
        """Concatenate multiple sv.Detections into one."""
        if not dets_list:
            return sv.Detections.empty()
        xyxy = np.concatenate([d.xyxy for d in dets_list if len(d.xyxy) > 0], axis=0)
        class_id = np.concatenate([d.class_id for d in dets_list if d.class_id is not None], axis=0)
        confidence = np.concatenate(
            [d.confidence for d in dets_list if d.confidence is not None], axis=0
        ) if any(d.confidence is not None for d in dets_list) else None
        return sv.Detections(xyxy=xyxy, class_id=class_id, confidence=confidence)

    merged_pred = merge_detections(all_pred_detections)
    merged_gt = merge_detections(all_gt_detections)

    # ---------------------------------------------------------------------
    # --- Global metrics ---
    # ---------------------------------------------------------------------
    map_result = calculate_map(merged_pred, merged_gt)
    overall_precision, overall_recall, overall_f1 = calculate_precision_recall(
        merged_pred, merged_gt, class_names
    )
    mean_iou = float(np.mean(all_mious)) if all_mious else 0.0

    # ---------------------------------------------------------------------
    # --- Final results dictionary ---
    # ---------------------------------------------------------------------
    results = {
        "overall_metrics": {
            "mAP@50": getattr(map_result, "map50", np.nan),
            "mAP@75": getattr(map_result, "map75", np.nan),
            "mAP@50-95": getattr(map_result, "map", np.nan),
            "precision": overall_precision,
            "recall": overall_recall,
            "f1": overall_f1,
            "mean_iou": mean_iou,
            "total_images": processed,
            "skipped_images": skipped,
        },
        "per_image_metrics": image_metrics,
        "class_names": class_names,
    }

    print("\n✅ Evaluation complete.")
    print(f"📈 Mean IoU: {mean_iou:.4f}")
    print(f"📈 Precision: {overall_precision:.4f}, Recall: {overall_recall:.4f}, F1: {overall_f1:.4f}")
    print(f"📈 mAP@50: {map_result.map50:.4f}, mAP@75: {map_result.map75:.4f}")

    return results

In [18]:
PREDICTIONS_DIR = os.path.join(OUTPUT_DIR, "predictions")
CLASS_NAMES = ["Healthy Container"]
results = evaluate_predictions(PREDICTIONS_DIR, LABELS_DIR, CLASS_NAMES)
if results:
    import pandas as pd
    df = pd.DataFrame(results["per_image_metrics"])
    df.to_csv(os.path.join(OUTPUT_DIR, "evaluation_report.csv"), index=False)
    print("💾 Saved detailed metrics to evaluation_report.csv")

📂 Found 47 prediction files in: /home/emma/facultad/pps/validacion/containers/container-detection-1v0zy/predictions


Evaluating images: 100%|██████████| 47/47 [00:00<00:00, 331.98it/s]

Label line:  0 0.8353674921875 0.3965602140625 0.5302980625 0.0596299625 0.0552811671875 0.0207533890625 0.045628668750000004 0.9356481499999999 0.8799440296875 0.9686011421874999 0.9342713921874999 0.8834429765625 0.8353674921875 0.3965602140625
Confusion Matrix:
[[0. 1.]
 [1. 0.]]
Per-class precision: [0.]
Per-class recall:    [0.]
Per-class F1:        [0.]
Mean Precision: 0.0000, Mean Recall: 0.0000, Mean F1: 0.0000
Label line:  0 0.861778846875 0.939903846875 0.846153846875 0.924278846875 0.7139423078125 0.051682692187500004 0.5204326921875 0.0985576921875 0.4699519234375 0.10336538437500001 0.4675480765625 0.09375 0.4459134609375 0.09375 0.421875 0.12019230781250001 0.3112980765625 0.146634615625 0.21995192343749997 0.153846153125 0.193509615625 0.177884615625 0.0012019234375 0.14423076875000002 0 0.52764423125 0.05528846093749999 0.9987980765625 0.1574519234375 1 0.205528846875 0.9855769234375 0.2259615390625 0.9891826921875 0.1802884609375 0.9987980765625 0.5625 0.9987980765625 




Confusion Matrix:
[[ 3. 45.]
 [50.  0.]]
Per-class precision: [0.0625]
Per-class recall:    [0.05660377]
Per-class F1:        [0.05940594]
Mean Precision: 0.0625, Mean Recall: 0.0566, Mean F1: 0.0594

✅ Evaluation complete.
📈 Mean IoU: 0.0802
📈 Precision: 0.0625, Recall: 0.0566, F1: 0.0594
📈 mAP@50: 0.0046, mAP@75: 0.0000
💾 Saved detailed metrics to evaluation_report.csv
