In [None]:
!pip install roboflow opencv-python-headless matplotlib requests tqdm scikit-learn pandas numpy pillow

In [None]:
import os
import cv2
import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
from tqdm import tqdm
import requests
import io
from typing import Dict, List, Tuple, Optional
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score,
    average_precision_score, precision_recall_curve
)
from roboflow import Roboflow

In [None]:
# Configuration
CONFIG = {
    'roboflow_api_key': "K0xg5GEEinqPgaqjKKzz",
    'workspace': "matyworkspace",
    'project': "damagedhealthytrafficsigns",
    'version': 9,
    #'roboflow_api_key': "LcrgBgwaWa5TXeG1Vtxa",
    #'workspace': "matyworkspace",
    #'project': "just-traffic-signs-45axx",
    #'version': 3,
    'prediction_url': os.environ.get('PREDICTION_URL', 'http://localhost:3000/predict'),
    'class_names': ["damaged", "healthy"],
    'size_groups': 4,
    'iou_threshold': 0.5,
    'confidence_threshold': 0.4,
    'analysis_property': 'size'
}

In [None]:
# Load dataset from Roboflow
rf = Roboflow(api_key=CONFIG['roboflow_api_key'])
project = rf.workspace(CONFIG['workspace']).project(CONFIG['project'])
version = project.version(CONFIG['version'])
dataset = version.download("yolov8")

# Set up paths
image_dir = os.path.join(dataset.location, "test", "images")
label_dir = os.path.join(dataset.location, "test", "labels")
original_image_paths = glob.glob(os.path.join(image_dir, "*.jpg"))

print(f"Loaded {len(original_image_paths)} images from dataset")

In [None]:
def calculate_sign_size_percentage(bbox: Dict, image_width: int, image_height: int) -> float:
    """
    Calculate the percentage of the full image that a bounding box represents.
    
    Args:
        bbox: Bounding box dictionary with x1, y1, x2, y2 coordinates
        image_width: Width of the image in pixels
        image_height: Height of the image in pixels
    
    Returns:
        Percentage of the image area occupied by the bounding box
    """
    bbox_width = bbox["x2"] - bbox["x1"]
    bbox_height = bbox["y2"] - bbox["y1"]
    bbox_area = bbox_width * bbox_height
    image_area = image_width * image_height
    
    size_percentage = (bbox_area / image_area) * 100
    return size_percentage

In [None]:
def analyze_dataset_sizes(image_paths: List[str], label_dir: str) -> List[Dict]:
    """
    Analyze the sizes of traffic signs in the dataset using ground truth data only.
    """
    size_data = []
    
    print(f"Analyzing sizes for {len(image_paths)} images...")
    
    for image_path in tqdm(image_paths, desc="Analyzing image sizes"):
        # Load image to get dimensions
        image = cv2.imread(image_path)
        h_img, w_img = image.shape[:2]
        
        # Load ground truth bounding boxes
        filename = os.path.basename(image_path).replace('.jpg', '.txt')
        label_path = os.path.join(label_dir, filename)
        
        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                for line in f:
                    class_id, cx, cy, w, h = map(float, line.strip().split())
                    x_center = cx * w_img
                    y_center = cy * h_img
                    width = w * w_img
                    height = h * h_img
                    x1 = int(x_center - width / 2)
                    y1 = int(y_center - height / 2)
                    x2 = int(x_center + width / 2)
                    y2 = int(y_center + height / 2)
                    
                    bbox = {"x1": x1, "y1": y1, "x2": x2, "y2": y2}
                    size_percentage = calculate_sign_size_percentage(bbox, w_img, h_img)
                    
                    size_data.append({
                        "image": image_path,
                        "class": CONFIG['class_names'][int(class_id)],
                        "size_percentage": size_percentage,
                        "image_width": w_img,
                        "image_height": h_img
                    })
    
    return size_data

def plot_dataset_size_histogram_with_groups(size_data: List[Dict], num_groups: int = 4, 
                                           save_path: str = None, figsize=(8, 4)):
    """
    Plot histogram of sign sizes from dataset with group boundaries marked.
    """
    if not size_data:
        print("No size data available to plot")
        return
    
    # Extract size percentages and create groups
    sizes = [d['size_percentage'] for d in size_data]
    sizes_sorted = sorted(sizes)
    
    # Calculate group boundaries using quantiles
    group_size = len(sizes_sorted) // num_groups
    boundaries = []
    
    for i in range(num_groups + 1):
        if i == 0:
            boundaries.append(min(sizes_sorted))
        elif i == num_groups:
            boundaries.append(max(sizes_sorted))
        else:
            idx = i * group_size
            if idx < len(sizes_sorted):
                boundaries.append(sizes_sorted[idx])
            else:
                boundaries.append(max(sizes_sorted))
    
    # Count items in each group for labeling
    group_counts = []
    for i in range(num_groups):
        min_size = boundaries[i]
        max_size = boundaries[i+1]
        
        if i == 0:  # First group, include minimum
            count = sum(1 for s in sizes if min_size <= s <= max_size)
        else:  # Other groups, exclude minimum to avoid overlap
            count = sum(1 for s in sizes if min_size < s <= max_size)
        
        group_counts.append(count)
    
    # Create the histogram
    fig, (ax2) = plt.subplots(1, 1, figsize=figsize)
    colors = ['red', 'green', 'orange', 'purple', 'brown']
    
    group_data_for_box = []
    group_labels = []
    
    for i in range(num_groups):
        min_size = boundaries[i]
        max_size = boundaries[i+1]
        
        if i == 0:  # First group, include minimum
            group_sizes = [s for s in sizes if min_size <= s <= max_size]
        else:  # Other groups, exclude minimum to avoid overlap
            group_sizes = [s for s in sizes if min_size < s <= max_size]
        
        group_data_for_box.append(group_sizes)
        group_labels.append(f"Group {i+1}\n{min_size:.2f}% - {max_size:.2f}%")
    
    box_plot = ax2.boxplot(group_data_for_box, labels=group_labels, patch_artist=True)
    
    # Color the boxes
    for i, patch in enumerate(box_plot['boxes']):
        patch.set_facecolor(colors[i % len(colors)])
        patch.set_alpha(0.7)
    
    ax2.set_xlabel('Size Groups', fontweight='bold')
    ax2.set_ylabel('Sign Size (% of image area)', fontweight='bold')
    ax2.set_title('Size Distribution by Groups', fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Saved dataset size histogram to {save_path}")
    
    plt.show()
    
    # Print statistics
    print("\nDataset Size Statistics:")
    print("=" * 60)
    print(f"Total signs: {len(sizes)}")
    print(f"Min size: {min(sizes):.2f}%")
    print(f"Max size: {max(sizes):.2f}%") 
    print(f"Mean size: {np.mean(sizes):.2f}%")
    print(f"Median size: {np.median(sizes):.2f}%")
    print(f"Std deviation: {np.std(sizes):.2f}%")
    
    for i in range(num_groups):
        print(f"Group {i+1}: {boundaries[i]:.2f}% - {boundaries[i+1]:.2f}% ({group_counts[i]} signs)")

# Analyze dataset sizes and plot histogram
print("Analyzing dataset sign sizes...")
dataset_size_data = analyze_dataset_sizes(original_image_paths, label_dir)

# Plot dataset size distribution
dataset_plots_dir = os.path.join(dataset.location, "dataset_analysis")
os.makedirs(dataset_plots_dir, exist_ok=True)
dataset_histogram_path = os.path.join(dataset_plots_dir, 'dataset_size_histogram.png')

plot_dataset_size_histogram_with_groups(dataset_size_data, CONFIG['size_groups'], dataset_histogram_path)


In [None]:
def send_image_for_prediction(image_path: str) -> Dict:
    """
    Send image to prediction API and return results.
    """
    with open(image_path, "rb") as f:
        image_data = f.read()
        files = {
            'image': ('image.jpg', io.BytesIO(image_data), 'image/jpeg')
        }
        response = requests.post(
            CONFIG['prediction_url'],
            files=files,
            timeout=30
        )
        return response.json()

def compute_iou(boxA: Dict, boxB: Dict) -> float:
    """
    Compute Intersection over Union (IoU) of two bounding boxes.
    """
    xA = max(boxA["x1"], boxB["x1"])
    yA = max(boxA["y1"], boxB["y1"])
    xB = min(boxA["x2"], boxB["x2"])
    yB = min(boxA["y2"], boxB["y2"])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA["x2"] - boxA["x1"]) * (boxA["y2"] - boxA["y1"])
    boxBArea = (boxB["x2"] - boxB["x1"]) * (boxB["y2"] - boxB["y1"])

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

In [None]:
def process_predictions_with_sizes(image_paths: List[str]) -> List[Dict]:
    """
    Generate predictions for original images and calculate sign sizes.
    """
    results = []
    
    print(f"Processing predictions for {len(image_paths)} images...")
    
    for original_path in tqdm(image_paths, desc="Processing images"):
        image = cv2.imread(original_path)
        h_img, w_img = image.shape[:2]
        
        filename = os.path.basename(original_path).replace('.jpg', '.txt')
        label_path = os.path.join(label_dir, filename)
        
        gt_bboxes = []
        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                for line in f:
                    class_id, cx, cy, w, h = map(float, line.strip().split())
                    x_center = cx * w_img
                    y_center = cy * h_img
                    width = w * w_img
                    height = h * h_img
                    x1 = int(x_center - width / 2)
                    y1 = int(y_center - height / 2)
                    x2 = int(x_center + width / 2)
                    y2 = int(y_center + height / 2)
                    gt_bboxes.append({
                        "class": CONFIG['class_names'][int(class_id)],
                        "x1": x1, "y1": y1, "x2": x2, "y2": y2
                    })
        
        try:
            predictions = send_image_for_prediction(original_path)
            pred_bboxes = [{**p['box'], **p} for p in predictions]
        except Exception as e:
            print(f"Error getting predictions for {original_path}: {e}")
            pred_bboxes = []
        
        gt_bboxes_matched = [False] * len(gt_bboxes)
        
        for pred in pred_bboxes:
            class_score = pred['cls_score']
            healthy_score = pred['confidence'] * class_score
            damaged_score = pred['confidence'] * (1 - class_score)

            if healthy_score < CONFIG['confidence_threshold'] and damaged_score < CONFIG['confidence_threshold']:
                predicted_label = "background"
            elif healthy_score > damaged_score:
                predicted_label = "healthy"
            else:
                predicted_label = "damaged"
            
            best_iou = 0
            best_gt_idx = None
            actual_label = "unknown"
            
            for i, gt in enumerate(gt_bboxes):
                iou = compute_iou(pred, gt)
                if iou >= CONFIG['iou_threshold'] and iou > best_iou:
                    best_iou = iou
                    best_gt_idx = i
                    actual_label = gt["class"]
            
            if best_gt_idx is not None:
                gt_bboxes_matched[best_gt_idx] = True
                size_percentage = calculate_sign_size_percentage(gt_bboxes[best_gt_idx], w_img, h_img)
            else:
                size_percentage = None  # No ground truth match, will be filtered out
            
            if best_gt_idx is not None:
                results.append({
                    "image": original_path,
                    "actual_bbox": gt_bboxes[best_gt_idx],
                    "predicted_bbox": {
                        "x1": int(pred["x1"]),
                        "y1": int(pred["y1"]),
                        "x2": int(pred["x2"]),
                        "y2": int(pred["y2"])
                    },
                    "actual": actual_label,
                    "predicted": predicted_label,
                    "class_score": class_score,
                    "confidence": pred["confidence"],
                    "iou": best_iou,
                    "size_percentage": size_percentage,
                    "image_width": w_img,
                    "image_height": h_img
                })
        
        # Add unmatched ground truth bboxes as false negatives (missed detections)
        for i, matched in enumerate(gt_bboxes_matched):
            if not matched:
                size_percentage = calculate_sign_size_percentage(gt_bboxes[i], w_img, h_img)
                results.append({
                    "image": original_path,
                    "actual_bbox": gt_bboxes[i],
                    "predicted_bbox": {},
                    "actual": gt_bboxes[i]["class"],
                    "predicted": "none",  # Missed detection
                    "class_score": 0,
                    "confidence": 0,
                    "iou": 0,
                    "size_percentage": size_percentage,
                    "image_width": w_img,
                    "image_height": h_img
                })
    
    return results

print("Processing original images for size analysis...")
all_results = process_predictions_with_sizes(original_image_paths)

print(f"Generated {len(all_results)} prediction results with size information")

In [None]:
def create_size_groups(results: List[Dict], num_groups: int = 4) -> Dict:
    """
    Create size groups with equal number of predictions in each group.
    
    Args:
        results: List of prediction results with size_percentage
        num_groups: Number of size groups to create
    
    Returns:
        Dictionary with size groups and their boundaries
    """
    sizes = [r['size_percentage'] for r in results]
    sizes_sorted = sorted(sizes)
    
    group_size = len(sizes_sorted) // num_groups
    boundaries = []
    
    for i in range(num_groups + 1):
        if i == 0:
            boundaries.append(min(sizes_sorted))
        elif i == num_groups:
            boundaries.append(max(sizes_sorted))
        else:
            idx = i * group_size
            if idx < len(sizes_sorted):
                boundaries.append(sizes_sorted[idx])
            else:
                boundaries.append(max(sizes_sorted))
    
    size_groups = {}
    group_stats = []
    
    for i in range(num_groups):
        group_name = f"size_group_{i+1}"
        min_size = boundaries[i]
        max_size = boundaries[i+1]
        
        group_results = []
        for result in results:
            size = result['size_percentage']
            if i == 0:  # First group, include minimum
                if min_size <= size <= max_size:
                    group_results.append(result)
            else:  # Other groups, exclude minimum to avoid overlap
                if min_size < size <= max_size:
                    group_results.append(result)
        
        size_groups[group_name] = {
            'results': group_results,
            'min_size': min_size,
            'max_size': max_size,
            'size_range': f"{min_size:.2f}% - {max_size:.2f}%",
            'count': len(group_results)
        }
        
        group_stats.append({
            'group': group_name,
            'min_size': min_size,
            'max_size': max_size,
            'count': len(group_results)
        })
    
    # Print group statistics
    print("\nSize Group Statistics:")
    print("=" * 60)
    for stat in group_stats:
        print(f"{stat['group']:15} | Range: {stat['min_size']:6.2f}% - {stat['max_size']:6.2f}% | Count: {stat['count']:4d}")
    
    return size_groups

print("Creating size groups...")
size_groups = create_size_groups(all_results, CONFIG['size_groups'])

base_dir = os.path.join(dataset.location, f"{CONFIG['analysis_property']}_analysis")
os.makedirs(base_dir, exist_ok=True)

predictions_file = os.path.join(base_dir, "predictions_results.json")
with open(predictions_file, "w") as f:
    json.dump(all_results, f, indent=2)

size_groups_file = os.path.join(base_dir, "size_groups.json")
size_groups_serializable = {}
for group_name, group_data in size_groups.items():
    size_groups_serializable[group_name] = {
        'min_size': group_data['min_size'],
        'max_size': group_data['max_size'],
        'size_range': group_data['size_range'],
        'count': group_data['count']
    }

with open(size_groups_file, "w") as f:
    json.dump(size_groups_serializable, f, indent=2)

print(f"\nSaved {len(all_results)} prediction results to {predictions_file}")
print(f"Saved size group information to {size_groups_file}")

## Metrics Calculation

In [None]:
def calculate_recall_metrics(group_results: List[Dict]) -> Dict:
    """
    Calculate recall metrics for a size group.
    Focus on recall since we're interested in detection performance.
    """
    if not group_results:
        return {
            'recall_healthy': 0.0,
            'recall_damaged': 0.0,
            'recall_overall': 0.0,
            'total_healthy': 0,
            'total_damaged': 0,
            'detected_healthy': 0,
            'detected_damaged': 0,
            'sample_count': 0
        }
    
    total_healthy = sum(1 for r in group_results if r['actual'] == 'healthy')
    total_damaged = sum(1 for r in group_results if r['actual'] == 'damaged')
    
    detected_healthy = sum(1 for r in group_results 
                          if r['actual'] == 'healthy' and r['predicted'] == 'healthy')
    detected_damaged = sum(1 for r in group_results 
                          if r['actual'] == 'damaged' and r['predicted'] == 'damaged')
    
    undetected_healthy = sum(1 for r in group_results
                             if r['actual'] == 'healthy' and r['predicted'] == 'none')
    undetected_damaged = sum(1 for r in group_results
                                if r['actual'] == 'damaged' and r['predicted'] == 'none')
    undetected_background_healthy = sum(1 for r in group_results
                                  if r['actual'] == 'healthy' and r['predicted'] == 'background')
    undetected_background_damaged = sum(1 for r in group_results
                                  if r['actual'] == 'damaged' and r['predicted'] == 'background')
    
    recall_healthy = detected_healthy / total_healthy if total_healthy > 0 else 0.0
    recall_damaged = detected_damaged / total_damaged if total_damaged > 0 else 0.0
    
    recall_overall = (recall_healthy + recall_damaged) / 2.0 if (total_healthy > 0 or total_damaged > 0) else 0.0
    
    return {
        'recall_healthy': float(recall_healthy),
        'recall_damaged': float(recall_damaged),
        'recall_overall': float(recall_overall),
        'total_healthy': int(total_healthy),
        'total_damaged': int(total_damaged),
        'detected_healthy': int(detected_healthy),
        'detected_damaged': int(detected_damaged),
        'sample_count': len(group_results),
        'undetected_healthy': int(undetected_healthy),
        'undetected_damaged': int(undetected_damaged),
        'undetected_background_healthy': int(undetected_background_healthy),
        'undetected_background_damaged': int(undetected_background_damaged)
    }

def calculate_size_group_metrics(size_groups: Dict) -> Dict:
    """
    Calculate recall metrics for all size groups.
    """
    group_metrics = {}
    
    print("\nCalculating metrics for size groups:")
    print("=" * 80)
    
    for group_name, group_data in size_groups.items():
        metrics = calculate_recall_metrics(group_data['results'])
        
        metrics.update({
            'min_size': group_data['min_size'],
            'max_size': group_data['max_size'],
            'size_range': group_data['size_range'],
            'avg_size': np.mean([r['size_percentage'] for r in group_data['results']])
        })
        
        group_metrics[group_name] = metrics
        
        print(f"{group_name:15} | Range: {metrics['size_range']:15} | "
              f"Samples: {metrics['sample_count']:3d} | "
              f"Recall H: {metrics['recall_healthy']:.3f} | "
              f"Recall D: {metrics['recall_damaged']:.3f} | "
              f"Overall: {metrics['recall_overall']:.3f}")
    
    return group_metrics

group_metrics = calculate_size_group_metrics(size_groups)

In [None]:
def plot_recall_by_size_groups(group_metrics: Dict, save_path: str = None, figsize=(12, 8)):
    """
    Plot recall metrics as bar plots for each size group.
    """
    group_names = []
    recall_healthy = []
    recall_damaged = []
    recall_overall = []
    size_ranges = []
    total_healthy_counts = []
    total_damaged_counts = []
    
    sorted_groups = sorted(group_metrics.items(), key=lambda x: x[1]['avg_size'])
    
    for group_name, metrics in sorted_groups:
        group_names.append(group_name.replace('size_group_', 'Group '))
        recall_healthy.append(metrics['recall_healthy'])
        recall_damaged.append(metrics['recall_damaged'])
        recall_overall.append(metrics['recall_overall'])
        size_ranges.append(metrics['size_range'])
        total_healthy_counts.append(metrics['total_healthy'])
        total_damaged_counts.append(metrics['total_damaged'])
    
    fig, axes = plt.subplots(1, 3, figsize=figsize)
    
    x = np.arange(len(group_names))
    width = 0.6
    
    # Plot 1: Group composition (count of healthy vs damaged)
    axes[0].bar(x, total_damaged_counts, width, label='Damaged', alpha=0.8, color='red')
    axes[0].bar(x, total_healthy_counts, width, label='Healthy', alpha=0.8, color='green', bottom=total_damaged_counts)
    
    axes[0].set_xlabel('Size Group', fontweight='bold')
    axes[0].set_ylabel('Traffic Sign Count', fontweight='bold')
    axes[0].set_title('a) Group Composition by Class', fontweight='bold')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(group_names, rotation=45)
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    for i, (h, d) in enumerate(zip(total_healthy_counts, total_damaged_counts)):
        if d > 0:  
            axes[0].text(i, d/2, f'{d}', ha='center', va='center', fontweight='bold', color='white')
        if h > 0:  
            axes[0].text(i, d + h/2, f'{h}', ha='center', va='center', fontweight='bold', color='white')
    
    # Plot 2: Individual class recalls
    x_offset = np.arange(len(group_names))
    axes[1].bar(x_offset - width/4, recall_healthy, width/2, label='Healthy', alpha=0.8, color='green')
    axes[1].bar(x_offset + width/4, recall_damaged, width/2, label='Damaged', alpha=0.8, color='red')
    
    axes[1].set_xlabel('Size Group', fontweight='bold')
    axes[1].set_ylabel('Recall Score', fontweight='bold')
    axes[1].set_title('b) Recall by Class', fontweight='bold')
    axes[1].set_xticks(x_offset)
    axes[1].set_xticklabels(group_names, rotation=45)
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    axes[1].set_ylim(0, 1.0)
    
    for i, h in enumerate(recall_healthy):
        if h > 0.05:
            axes[1].text(i - width/4, h + 0.02, f'{h:.2f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
    for i, d in enumerate(recall_damaged):
        if d > 0.05:
            axes[1].text(i + width/4, d + 0.02, f'{d:.2f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
    
    # Plot 3: Overall recall
    bars = axes[2].bar(x, recall_overall, width, alpha=0.8, color='blue')
    
    axes[2].set_xlabel('Size Group', fontweight='bold')
    axes[2].set_ylabel('Overall Recall Score', fontweight='bold')
    axes[2].set_title('c) Overall Recall by Size Group', fontweight='bold')
    axes[2].set_xticks(x)
    axes[2].set_xticklabels(group_names, rotation=45)
    axes[2].grid(True, alpha=0.3)
    axes[2].set_ylim(0, 1.0)
    
    for i, (bar, recall) in enumerate(zip(bars, recall_overall)):
        if recall > 0.05:
            axes[2].text(i, recall + 0.02, f'{recall:.2f}', ha='center', va='bottom', fontweight='bold')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Saved recall plots to {save_path}")
    
    plt.show()

print("\nGenerating plots...")

plots_dir = os.path.join(base_dir, 'plots')
os.makedirs(plots_dir, exist_ok=True)

recall_plot_path = os.path.join(plots_dir, 'recall_by_size_groups.png')
plot_recall_by_size_groups(group_metrics, recall_plot_path)

In [None]:
def plot_detection_outcomes_by_group_and_class(group_metrics, size_groups, save_path=None, figsize=(14, 7)):
    
    group_names = []
    class_labels = ['healthy', 'damaged']
    outcomes = ['correct', 'incorrect', 'background', 'none']
    outcome_colors = {
        'correct': "#10C716",      
        'incorrect': "#FFEE00",    
        'background': "#FF7B00",   
        'none': "#AA1C12"         
    }
    outcomes_labels = {
        'correct': 'Detected and Classified Correctly',
        'incorrect': 'Detected but Misclassified',
        'background': 'Detected as Background',
        'none': 'Undetected'
    }

    proportions = {cls: {out: [] for out in outcomes} for cls in class_labels}

    sorted_groups = sorted(group_metrics.items(), key=lambda x: x[1]['avg_size'])
    for group_name, metrics in sorted_groups:
        group_names.append(group_name.replace('size_group_', 'Group '))
        group_results = size_groups[group_name]['results']

        for cls in class_labels:
            total = sum(1 for r in group_results if r['actual'] == cls)
            if total == 0:
                for out in outcomes:
                    proportions[cls][out].append(0)
                continue

            # Correct: predicted == actual
            correct = sum(1 for r in group_results if r['actual'] == cls and r['predicted'] == cls)
            # Incorrect: predicted != actual and predicted in class_labels
            incorrect = sum(1 for r in group_results if r['actual'] == cls and r['predicted'] in class_labels and r['predicted'] != cls)
            # Background: predicted == 'background'
            background = sum(1 for r in group_results if r['actual'] == cls and r['predicted'] == 'background')
            # None: predicted == 'none'
            none = sum(1 for r in group_results if r['actual'] == cls and r['predicted'] == 'none')

            proportions[cls]['correct'].append(correct / total)
            proportions[cls]['incorrect'].append(incorrect / total)
            proportions[cls]['background'].append(background / total)
            proportions[cls]['none'].append(none / total)

    title_fs = 18
    label_fs = 16
    tick_fs = 14
    legend_fs = 13

    fig, axes = plt.subplots(1, 2, figsize=figsize, sharey=False)
    width = 0.6
    x = np.arange(len(group_names))

    for idx, cls in enumerate(class_labels):
        bottom = np.zeros(len(group_names))
        for out in outcomes:
            axes[idx].bar(x, proportions[cls][out], width, bottom=bottom, label=outcomes_labels[out], color=outcome_colors[out], alpha=0.85)
            bottom += np.array(proportions[cls][out])
        axes[idx].set_title(f"Detection Outcomes for {cls.capitalize()} Signs", fontweight='bold', fontsize=title_fs)
        axes[idx].set_xticks(x)
        axes[idx].set_xticklabels(group_names, fontsize=tick_fs)
        axes[idx].set_ylim(0, 1.0)
        axes[idx].set_ylabel("Proportion of Detections", fontsize=label_fs)
        axes[idx].grid(True, alpha=0.3)
        axes[idx].legend(loc='lower right', fontsize=legend_fs)
        axes[idx].tick_params(axis='y', labelsize=tick_fs)

    plt.tight_layout()
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Saved detection outcomes plot to {save_path}")
    plt.show()

outcomes_plot_path = os.path.join(plots_dir, 'detection_outcomes_by_group_and_class.png')
plot_detection_outcomes_by_group_and_class(group_metrics, size_groups, outcomes_plot_path)

In [None]:
def save_size_analysis_results(group_metrics: Dict, all_results: List[Dict], output_dir: str):
    metrics_json_path = os.path.join(output_dir, 'size_group_metrics.json')
    with open(metrics_json_path, 'w') as f:
        json.dump(group_metrics, f, indent=2)
    print(f"Saved group metrics to {metrics_json_path}")
    
    summary_stats = {
        'total_predictions': len(all_results),
        'size_statistics': {
            'min_size': min(r['size_percentage'] for r in all_results),
            'max_size': max(r['size_percentage'] for r in all_results),
            'mean_size': np.mean([r['size_percentage'] for r in all_results]),
            'median_size': np.median([r['size_percentage'] for r in all_results]),
            'std_size': np.std([r['size_percentage'] for r in all_results])
        },
        'class_distribution': {
            'healthy': sum(1 for r in all_results if r['actual'] == 'healthy'),
            'damaged': sum(1 for r in all_results if r['actual'] == 'damaged')
        },
        'overall_recall': {
            'healthy': sum(1 for r in all_results if r['actual'] == 'healthy' and r['predicted'] == 'healthy') / 
                      sum(1 for r in all_results if r['actual'] == 'healthy'),
            'damaged': sum(1 for r in all_results if r['actual'] == 'damaged' and r['predicted'] == 'damaged') / 
                      sum(1 for r in all_results if r['actual'] == 'damaged')
        }
    }
    
    summary_json_path = os.path.join(output_dir, 'analysis_summary.json')
    with open(summary_json_path, 'w') as f:
        json.dump(summary_stats, f, indent=2)
    print(f"Saved analysis summary to {summary_json_path}")

save_size_analysis_results(group_metrics, all_results, base_dir)

print("\n" + "="*60)
print("SIZE ANALYSIS COMPLETE")
print("="*60)
print(f"Total predictions analyzed: {len(all_results)}")
print(f"Number of size groups: {CONFIG['size_groups']}")
print(f"Results saved to: {base_dir}")
print("\nOverall Recall Performance:")
for group_name, metrics in group_metrics.items():
    print(f"{group_name:15} | {metrics['size_range']:15} | "
          f"Overall Recall: {metrics['recall_overall']:.3f}")