In [22]:
import os, json
from inference_sdk import InferenceHTTPClient
import cv2
import supervision as sv
import numpy as np
import pandas as pd
import dotenv
import yaml

def load_config(config_path="config.yaml"):
    """
    Load configuration from YAML file and resolve environment variables.
    
    Args:
        config_path (str): Path to the YAML configuration file
        
    Returns:
        dict: Configuration dictionary with api_key, model, and test_dir
    """
    dotenv.load_dotenv()
    
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    
    # Resolve environment variables
    if '${API_KEY}' in str(config.get('api_key', '')):
        config['api_key'] = os.getenv("API_KEY")
    
    return config

# Load configuration
config = load_config()
API_KEY = config['api_key']
MODEL = config['model']
TEST_DIR = config['test_dir']
PREDICTIONS_DIR = ".predictions_json"

client = InferenceHTTPClient(api_url="https://detect.roboflow.com", api_key=API_KEY)

os.makedirs(PREDICTIONS_DIR, exist_ok=True)

def remove_json_extension(filename):
    """
    Remove the .json extension from a filename.
    
    Args:
        filename (str): The filename to process
        
    Returns:
        str: The filename with .json extension removed
        
    Example:
        file.json -> file
    """
    if filename.endswith('.json'):
        return filename[:-5]  # Remove '.json'
    return filename


def remove_image_extension(filename):
    """
    Remove image extensions (.jpg, .png, .jpeg) from a filename.
    
    Args:
        filename (str): The filename to process
        
    Returns:
        str: The filename with image extension removed
        
    Example:
        image.jpg -> image
        photo.png -> photo
    """
    image_extensions = ['.jpg', '.png', '.jpeg']
    for ext in image_extensions:
        if filename.endswith(ext):
            return filename[:-len(ext)]
    return filename

# def box_iou_matrix(a, b):
#         """
#         Returns matrix of IoUs between two sets of boxes (NxM).
#         """
#         ious = np.zeros((len(a), len(b)))
#         for i, boxA in enumerate(a):
#             xA1, yA1, xA2, yA2 = boxA
#             for j, boxB in enumerate(b):
#                 xB1, yB1, xB2, yB2 = boxB
#                 inter_x1 = max(xA1, xB1)
#                 inter_y1 = max(yA1, yB1)
#                 inter_x2 = min(xA2, xB2)
#                 inter_y2 = min(yA2, yB2)
#                 inter_w = max(0, inter_x2 - inter_x1)
#                 inter_h = max(0, inter_y2 - inter_y1)
#                 inter_area = inter_w * inter_h
#                 areaA = (xA2 - xA1) * (yA2 - yA1)
#                 areaB = (xB2 - xB1) * (yB2 - yB1)
#                 union = areaA + areaB - inter_area
#                 ious[i, j] = inter_area / union if union > 0 else 0
#         return ious


# def box_iou_matrix(boxA, boxB):
#     print(boxA, boxB)
#     # determine the (x, y)-coordinates of the intersection rectangle
#     xA = max(boxA[0], boxB[0])
#     yA = max(boxA[1], boxB[1])
#     xB = min(boxA[2], boxB[2])
#     yB = min(boxA[3], boxB[3])

#     # compute the area of intersection rectangle
#     interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
#     if interArea == 0:
#         return 0
#     # compute the area of both the prediction and ground-truth
#     # rectangles
#     boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
#     boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))

#     # compute the intersection over union by taking the intersection
#     # area and dividing it by the sum of prediction + ground-truth
#     # areas - the interesection area
#     iou = interArea / float(boxAArea + boxBArea - interArea)

#     # return the intersection over union value
#     return iou

def box_iou_matrix(bb1, bb2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.

    Parameters
    ----------
    bb1 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    bb2 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner

    Returns
    -------
    float
        in [0, 1]
    """
    # Validate and fix bounding box coordinates if needed
    def validate_and_fix_bbox(bb):
        x1, x2 = bb['x1'], bb['x2']
        y1, y2 = bb['y1'], bb['y2']
        
        # Ensure x1 < x2 and y1 < y2
        if x1 > x2:
            x1, x2 = x2, x1
        if y1 > y2:
            y1, y2 = y2, y1
            
        return {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}
    
    bb1 = validate_and_fix_bbox(bb1)
    bb2 = validate_and_fix_bbox(bb2)

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [23]:

images_dir = os.path.join(TEST_DIR, "images")

for filename in os.listdir(images_dir):
    if not filename.endswith((".jpg", ".png", ".jpeg")):
        continue
    img_path = os.path.join(images_dir, filename)
    result = client.infer(img_path, model_id=MODEL)
    with open(os.path.join(PREDICTIONS_DIR, remove_image_extension(filename) + ".json"), "w") as f:
        json.dump(result, f)

In [24]:
def read_yolo_labels(label_path, img_shape):
    """
    Reads YOLOv11 (detection or segmentation) labels and returns box info.
    """
    h, w = img_shape[:2]
    boxes = []
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue  # skip malformed line
            cls, x, y, bw, bh = map(float, parts[:5])

            box = {
                "class": int(cls),
                "x": x * w,
                "y": y * h,
                "width": bw * w,
                "height": bh * h
            }

            # If segmentation points exist, add them
            if len(parts) > 5:
                coords = list(map(float, parts[5:]))
                # convert normalized pairs into pixel coords
                points = [(coords[i] * w, coords[i+1] * h) for i in range(0, len(coords), 2)]
                box["points"] = points

            boxes.append(box)
    return boxes



In [25]:
import numpy as np
import supervision as sv

def compute_metrics(pred_boxes, gt_boxes, img_shape, iou_threshold=0.5):
    """
    Compare Roboflow predictions (list of dicts) vs YOLOv11 ground truth.
    Returns (precision, recall, mean_iou)
    """
    h, w = img_shape[:2]

    # Convert predictions to dictionary format for IoU calculation
    pred_dicts = []
    for pred in pred_boxes:
        x1 = pred["x"] - pred["width"] / 2
        y1 = pred["y"] - pred["height"] / 2
        x2 = pred["x"] + pred["width"] / 2
        y2 = pred["y"] + pred["height"] / 2
        pred_dicts.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2})

    # Convert ground truth boxes to dictionary format
    gt_dicts = []
    for box in gt_boxes:
        x1 = box["x"] - box["width"] / 2
        y1 = box["y"] - box["height"] / 2
        x2 = box["x"] + box["width"] / 2
        y2 = box["y"] + box["height"] / 2
        gt_dicts.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2})

    # Compute IoU matrix using the new function
    iou_matrix = np.zeros((len(pred_dicts), len(gt_dicts)))
    for i, pred_box in enumerate(pred_dicts):
        for j, gt_box in enumerate(gt_dicts):
            iou_matrix[i, j] = box_iou_matrix(pred_box, gt_box)

    # Compute matches based on IoU threshold
    matches = (iou_matrix > iou_threshold).sum()
    tp = matches
    fp = len(pred_dicts) - tp
    fn = len(gt_dicts) - tp

    # Metrics
    precision = tp / (tp + fp + 1e-6)
    recall = tp / (tp + fn + 1e-6)
    mean_iou = iou_matrix.max(axis=1).mean() if len(iou_matrix) > 0 else 0

    return precision, recall, mean_iou


In [26]:
results = []

for filename in os.listdir(PREDICTIONS_DIR):
    json_path = os.path.join(PREDICTIONS_DIR, filename)
    img_name = remove_json_extension(filename)
    label_path = os.path.join(TEST_DIR, "labels", img_name + ".txt")
    img_path = os.path.join(images_dir, img_name + ".jpg")

    preds = json.load(open(json_path))
    img = cv2.imread(img_path)
    gt_boxes = read_yolo_labels(label_path, img.shape)
    pred_boxes = preds["predictions"]
    
    precision, recall, mean_iou = compute_metrics(pred_boxes, gt_boxes, img.shape)
    results.append({"image": img_name, "precision": precision, "recall": recall, "mean_iou": mean_iou })

df = pd.DataFrame(results)
print(df.describe())

       precision     recall   mean_iou
count  13.000000  13.000000  13.000000
mean    0.823114   0.724345   0.720405
std     0.213328   0.338651   0.210105
min     0.500000   0.125000   0.385133
25%     0.600000   0.500000   0.580431
50%     0.999999   0.999999   0.767307
75%     0.999999   0.999999   0.919325
max     1.000000   1.000000   0.954000
