In [1]:
import os, json
from inference_sdk import InferenceHTTPClient
import cv2
import supervision as sv
import numpy as np
import pandas as pd
import dotenv
import yaml
from datetime import datetime

def load_config(config_path="config.yaml"):
    """
    Load configuration from YAML file and resolve environment variables.
    
    Args:
        config_path (str): Path to the YAML configuration file
        
    Returns:
        dict: Configuration dictionary with api_key, models, and test_dir
    """
    dotenv.load_dotenv()
    
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    
    # Resolve environment variables
    if '${API_KEY}' in str(config.get('api_key', '')):
        config['api_key'] = os.getenv("API_KEY")
    
    return config

# Load configuration
config = load_config()
API_KEY = config['api_key']
MODELS = config['models']
BASE_TEST_DIR = config['test_dir']

client = InferenceHTTPClient(api_url="https://detect.roboflow.com", api_key=API_KEY)

def remove_json_extension(filename):
    """
    Remove the .json extension from a filename.
    
    Args:
        filename (str): The filename to process
        
    Returns:
        str: The filename with .json extension removed
        
    Example:
        file.json -> file
    """
    if filename.endswith('.json'):
        return filename[:-5]  # Remove '.json'
    return filename


def remove_image_extension(filename):
    """
    Remove image extensions (.jpg, .png, .jpeg) from a filename.
    
    Args:
        filename (str): The filename to process
        
    Returns:
        str: The filename with image extension removed
        
    Example:
        image.jpg -> image
        photo.png -> photo
    """
    image_extensions = ['.jpg', '.png', '.jpeg']
    for ext in image_extensions:
        if filename.endswith(ext):
            return filename[:-len(ext)]
    return filename

def box_iou_matrix(bb1, bb2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.

    Parameters
    ----------
    bb1 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    bb2 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner

    Returns
    -------
    float
        in [0, 1]
    """
    # Validate and fix bounding box coordinates if needed
    def validate_and_fix_bbox(bb):
        x1, x2 = bb['x1'], bb['x2']
        y1, y2 = bb['y1'], bb['y2']
        
        # Ensure x1 < x2 and y1 < y2
        if x1 > x2:
            x1, x2 = x2, x1
        if y1 > y2:
            y1, y2 = y2, y1
            
        return {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}
    
    bb1 = validate_and_fix_bbox(bb1)
    bb2 = validate_and_fix_bbox(bb2)

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou



In [2]:

def generate_predictions_for_model(model_name, test_dir):
    """
    Generate predictions for a specific model and test directory.
    
    Args:
        model_name (str): Name of the model to use for inference
        test_dir (str): Path to the test directory containing images
        
    Returns:
        str: Path to the predictions directory for this model
    """
    images_dir = os.path.join(test_dir, "images")
    predictions_dir = f".predictions_{model_name.replace('/', '_')}"
    
    os.makedirs(predictions_dir, exist_ok=True)
    
    for filename in os.listdir(images_dir):
        if not filename.endswith((".jpg", ".png", ".jpeg")):
            continue
        img_path = os.path.join(images_dir, filename)
        result = client.infer(img_path, model_id=model_name)
        with open(os.path.join(predictions_dir, remove_image_extension(filename) + ".json"), "w") as f:
            json.dump(result, f)
    
    return predictions_dir

In [3]:
def read_yolo_labels(label_path, img_shape):
    """
    Reads YOLOv11 (detection or segmentation) labels and returns box info.
    """
    h, w = img_shape[:2]
    boxes = []
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue  # skip malformed line
            cls, x, y, bw, bh = map(float, parts[:5])

            box = {
                "class": int(cls),
                "x": x * w,
                "y": y * h,
                "width": bw * w,
                "height": bh * h
            }

            # If segmentation points exist, add them
            if len(parts) > 5:
                coords = list(map(float, parts[5:]))
                # convert normalized pairs into pixel coords
                points = [(coords[i] * w, coords[i+1] * h) for i in range(0, len(coords), 2)]
                box["points"] = points

            boxes.append(box)
    return boxes



In [4]:
import numpy as np
import supervision as sv

def compute_metrics(pred_boxes, gt_boxes, img_shape, iou_threshold=0.5):
    """
    Compare Roboflow predictions (list of dicts) vs YOLOv11 ground truth.
    Returns (precision, recall, mean_iou)
    """
    h, w = img_shape[:2]

    # Convert predictions to dictionary format for IoU calculation
    pred_dicts = []
    for pred in pred_boxes:
        x1 = pred["x"] - pred["width"] / 2
        y1 = pred["y"] - pred["height"] / 2
        x2 = pred["x"] + pred["width"] / 2
        y2 = pred["y"] + pred["height"] / 2
        pred_dicts.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2})

    # Convert ground truth boxes to dictionary format
    gt_dicts = []
    for box in gt_boxes:
        x1 = box["x"] - box["width"] / 2
        y1 = box["y"] - box["height"] / 2
        x2 = box["x"] + box["width"] / 2
        y2 = box["y"] + box["height"] / 2
        gt_dicts.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2})

    # Compute IoU matrix using the new function
    iou_matrix = np.zeros((len(pred_dicts), len(gt_dicts)))
    for i, pred_box in enumerate(pred_dicts):
        for j, gt_box in enumerate(gt_dicts):
            iou_matrix[i, j] = box_iou_matrix(pred_box, gt_box)

    # Compute matches based on IoU threshold
    matches = (iou_matrix > iou_threshold).sum()
    tp = matches
    fp = len(pred_dicts) - tp
    fn = len(gt_dicts) - tp

    # Metrics
    precision = tp / (tp + fp + 1e-6)
    recall = tp / (tp + fn + 1e-6)

    mean_iou = 0
    if(iou_matrix.size > 0 and len(iou_matrix) > 0):
        mean_iou = iou_matrix.max(axis=1).mean()


    return precision, recall, mean_iou


In [5]:
def compute_metrics_for_model(model_name, predictions_dir, test_dir):
    """
    Compute metrics for a specific model.
    
    Args:
        model_name (str): Name of the model
        predictions_dir (str): Path to the predictions directory
        test_dir (str): Path to the test directory
        
    Returns:
        pd.DataFrame: DataFrame with results for this model
    """
    results = []
    images_dir = os.path.join(test_dir, "images")
    
    for filename in os.listdir(predictions_dir):
        json_path = os.path.join(predictions_dir, filename)
        img_name = remove_json_extension(filename)
        label_path = os.path.join(test_dir, "labels", img_name + ".txt")
        img_path = os.path.join(images_dir, img_name + ".jpg")

        preds = json.load(open(json_path))
        img = cv2.imread(img_path)
        print(img_path, img)
        gt_boxes = read_yolo_labels(label_path, img.shape)
        pred_boxes = preds["predictions"]
        
        precision, recall, mean_iou = compute_metrics(pred_boxes, gt_boxes, img.shape)
        results.append({
            "model": model_name,
            "image": img_name, 
            "precision": precision, 
            "recall": recall, 
            "mean_iou": mean_iou 
        })
    
    return pd.DataFrame(results)

# Process all models using common test directory
all_results = []
model_summaries = {}

for model_config in MODELS:
    model_name = model_config['name']
    # Use the common test directory for all models
    test_dir = BASE_TEST_DIR
    
    print(f"Processing model: {model_name}")
    print(f"Test directory: {test_dir}")
    
    # Generate predictions
    predictions_dir = generate_predictions_for_model(model_name, test_dir)
    
    # Compute metrics
    model_results = compute_metrics_for_model(model_name, predictions_dir, test_dir)
    all_results.append(model_results)
    
    # Store summary statistics
    model_summaries[model_name] = {
        'count': len(model_results),
        'mean_precision': model_results['precision'].mean(),
        'mean_recall': model_results['recall'].mean(),
        'mean_iou': model_results['mean_iou'].mean(),
        'std_precision': model_results['precision'].std(),
        'std_recall': model_results['recall'].std(),
        'std_iou': model_results['mean_iou'].std()
    }
    
    print(f"Model {model_name} - Mean Precision: {model_summaries[model_name]['mean_precision']:.3f}, Mean Recall: {model_summaries[model_name]['mean_recall']:.3f}, Mean IoU: {model_summaries[model_name]['mean_iou']:.3f}")
    print()

# Combine all results
combined_df = pd.concat(all_results, ignore_index=True)
print("Combined Results Summary:")
print(combined_df.groupby('model')[['precision', 'recall', 'mean_iou']].describe())

Processing model: container-detection-1v0zy/7
Test directory: /home/emma/facultad/pps/datasets/containers/raw/container-detection.v7i.yolov11/test
/home/emma/facultad/pps/datasets/containers/raw/container-detection.v7i.yolov11/test/images/CSNU1552180-2-_jpg.rf.6895f427655a3472deea3fb811cac76d.jpg [[[124  80  19]
  [120  76  15]
  [116  72  11]
  ...
  [ 17  14   9]
  [ 17  14   9]
  [ 17  14   9]]

 [[123  79  18]
  [119  75  14]
  [115  71  10]
  ...
  [ 17  14   9]
  [ 17  14   9]
  [ 17  14   9]]

 [[122  78  17]
  [118  74  13]
  [115  71  10]
  ...
  [ 17  14   9]
  [ 17  14   9]
  [ 17  14   9]]

 ...

 [[196 161  88]
  [196 161  88]
  [194 159  86]
  ...
  [ 11   9   8]
  [ 11   9   8]
  [ 11   9   8]]

 [[196 161  88]
  [195 160  87]
  [192 157  84]
  ...
  [ 11   9   8]
  [ 11   9   8]
  [ 11   9   8]]

 [[196 161  88]
  [195 160  87]
  [191 156  83]
  ...
  [ 11   9   8]
  [ 11   9   8]
  [ 11   9   8]]]
/home/emma/facultad/pps/datasets/containers/raw/container-detection.v7i.

In [6]:
def generate_markdown_report(model_summaries, combined_df, base_test_dir):
    """
    Generate a markdown report with results for each model.
    
    Args:
        model_summaries (dict): Summary statistics for each model
        combined_df (pd.DataFrame): Combined results dataframe
        base_test_dir (str): Base test directory path
        
    Returns:
        str: Markdown report content
    """
    # Count total images
    images_dir = os.path.join(base_test_dir, "images")
    total_images = len([f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
    
    # Get dataset link (assuming it's in the path structure)
    dataset_path = base_test_dir
    dataset_link = f"Dataset: {dataset_path}"
    
    report = f"""# Model Validation Report

**Generated on:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

## Dataset Information
- **Path:** {dataset_link}
- **Total Images:** {total_images}
- **Note:** All models are tested against the same common dataset

## Model Results

"""
    
    for model_name, summary in model_summaries.items():
        report += f"""### Model: {model_name}

| Metric | Mean | Std Dev | Min | Max |
|--------|------|---------|-----|-----|
| Precision | {summary['mean_precision']:.3f} | {summary['std_precision']:.3f} | {combined_df[combined_df['model'] == model_name]['precision'].min():.3f} | {combined_df[combined_df['model'] == model_name]['precision'].max():.3f} |
| Recall | {summary['mean_recall']:.3f} | {summary['std_recall']:.3f} | {combined_df[combined_df['model'] == model_name]['recall'].min():.3f} | {combined_df[combined_df['model'] == model_name]['recall'].max():.3f} |
| Mean IoU | {summary['mean_iou']:.3f} | {summary['std_iou']:.3f} | {combined_df[combined_df['model'] == model_name]['mean_iou'].min():.3f} | {combined_df[combined_df['model'] == model_name]['mean_iou'].max():.3f} |

**Sample Count:** {summary['count']}

"""
    
    # Add comparison table
    report += """## Model Comparison

| Model | Mean Precision | Mean Recall | Mean IoU | Sample Count |
|-------|----------------|-------------|----------|--------------|
"""
    
    for model_name, summary in model_summaries.items():
        report += f"| {model_name} | {summary['mean_precision']:.3f} | {summary['mean_recall']:.3f} | {summary['mean_iou']:.3f} | {summary['count']} |\n"
    
    return report

# Generate and save the report
report_content = generate_markdown_report(model_summaries, combined_df, BASE_TEST_DIR)

# Save report to file
report_filename = f"model_validation_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
with open(report_filename, 'w') as f:
    f.write(report_content)

print(f"Report saved to: {report_filename}")
print("\n" + "="*50)
print("MARKDOWN REPORT:")
print("="*50)
print(report_content)


Report saved to: model_validation_report_20251025_221824.md

MARKDOWN REPORT:
# Model Validation Report

**Generated on:** 2025-10-25 22:18:24

## Dataset Information
- **Path:** Dataset: /home/emma/facultad/pps/datasets/containers/raw/container-detection.v7i.yolov11/test
- **Total Images:** 47
- **Note:** All models are tested against the same common dataset

## Model Results

### Model: container-detection-1v0zy/7

| Metric | Mean | Std Dev | Min | Max |
|--------|------|---------|-----|-----|
| Precision | 0.000 | 0.000 | 0.000 | 0.000 |
| Recall | 0.000 | 0.000 | 0.000 | 0.000 |
| Mean IoU | 0.127 | 0.102 | 0.000 | 0.354 |

**Sample Count:** 47

## Model Comparison

| Model | Mean Precision | Mean Recall | Mean IoU | Sample Count |
|-------|----------------|-------------|----------|--------------|
| container-detection-1v0zy/7 | 0.000 | 0.000 | 0.127 | 47 |

