# YOLOv9 Model Evaluation

Comprehensive evaluation of YOLOv9 model performance on parking lot detection task.

**Metrics Evaluated:**
- Object Detection: mAP@0.5, mAP@0.5:0.95, Precision, Recall
- Counting Accuracy: Exact Match Accuracy, Mean Absolute Error (MAE)
- Inference Speed: Frames Per Second (FPS)

## 1. Apply YOLOv9 Patch

Fix AttributeError in detect.py for models with list-based output.

In [None]:
import os
from pathlib import Path

YOLOv9_DIR = Path('./yolov9')
detect_script_path = YOLOv9_DIR / 'detect.py'
patch_signature = "# PATCHED: Handle list output from model"

if detect_script_path.exists():
    with open(detect_script_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    already_patched = any(patch_signature in line for line in lines)

    if not already_patched:
        print(f"Applying patch to {detect_script_path}...")
        new_lines = []
        for line in lines:
            new_lines.append(line)
            if 'pred = model(im, augment=augment, visualize=visualize)' in line:
                indent = ' ' * (len(line) - len(line.lstrip()))
                new_lines.append(f"{indent}{patch_signature}\n")
                new_lines.append(f"{indent}if isinstance(pred, list):\n")
                new_lines.append(f"{indent}    pred = pred[0]\n")
        
        with open(detect_script_path, 'w', encoding='utf-8') as f:
            f.writelines(new_lines)
        print("Patch applied successfully.")
    else:
        print(f"{detect_script_path} is already patched.")
else:
    print(f"Warning: {detect_script_path} not found.")

## 2. Configuration

In [None]:
import sys
import subprocess
import yaml
import re
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

YOLO_RUN_DIR = Path('./runs/train/carpk_yolov9/')
MODEL_CHECKPOINT = YOLO_RUN_DIR / 'weights' / 'best.pt'
DATA_YAML = Path('./prepared_data/yolo/data.yaml')
IMG_SIZE = 640
CONF_THRESHOLD = 0.25
IOU_THRESHOLD = 0.7
DEVICE = '0'

if not MODEL_CHECKPOINT.exists(): 
    raise FileNotFoundError(f"Model checkpoint not found: {MODEL_CHECKPOINT}")
if not YOLOv9_DIR.exists() or not (YOLOv9_DIR / 'val.py').exists(): 
    raise FileNotFoundError(f"YOLOv9 repository not found at: {YOLOv9_DIR}")
if not DATA_YAML.exists(): 
    raise FileNotFoundError(f"Data YAML not found: {DATA_YAML}")

print(f"Model: {MODEL_CHECKPOINT}")
print(f"Data: {DATA_YAML}")
print(f"Config: img={IMG_SIZE}, conf={CONF_THRESHOLD}, iou={IOU_THRESHOLD}")

## 3. Object Detection Metrics (mAP)

In [None]:
def run_validation():
    original_dir = Path.cwd()
    abs_weights = MODEL_CHECKPOINT.resolve()
    abs_data_yaml = DATA_YAML.resolve()
    os.chdir(YOLOv9_DIR)
    
    cmd = [
        sys.executable, 'val.py',
        '--data', str(abs_data_yaml),
        '--weights', str(abs_weights),
        '--img', str(IMG_SIZE),
        '--conf-thres', '0.001',
        '--iou-thres', str(IOU_THRESHOLD),
        '--device', DEVICE,
        '--task', 'test',
        '--verbose'
    ]
    
    print("Running validation...")
    result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
    os.chdir(original_dir)

    if result.returncode != 0:
        print(f"Validation failed with return code {result.returncode}")
        print(result.stderr)
        return ""
    
    full_output = result.stdout + "\n" + result.stderr
    return full_output

def parse_validation_output(output):
    metrics = {}
    
    all_class_line = re.search(r"all\s+\d+\s+\d+\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)", output)
    if all_class_line:
        metrics['precision'] = float(all_class_line.group(1))
        metrics['recall'] = float(all_class_line.group(2))
        metrics['map_50'] = float(all_class_line.group(3))
        metrics['map_50_95'] = float(all_class_line.group(4))

    speed_patterns = [
        r"Speed:.*?([\d\.]+)ms\s+inference",
        r"inference:\s*([\d\.]+)ms",
        r"([\d\.]+)ms.*?inference"
    ]
    
    for pattern in speed_patterns:
        speed_line = re.search(pattern, output, re.IGNORECASE)
        if speed_line:
            inference_ms = float(speed_line.group(1))
            if inference_ms > 0: 
                metrics['fps'] = 1000.0 / inference_ms
            break
    
    return metrics

validation_output = run_validation()
bbox_metrics = parse_validation_output(validation_output)

print("\nDetection Metrics:")
for key, val in bbox_metrics.items():
    print(f"  {key}: {val:.4f}" if key != 'fps' else f"  {key}: {val:.2f}")

## 4. Counting Metrics (Accuracy & MAE)

In [None]:
def get_test_directories():
    abs_data_yaml = DATA_YAML.resolve()
    
    with open(abs_data_yaml, 'r') as f:
        data_config = yaml.safe_load(f)
    
    test_path_str = data_config.get('test', '')
    if not test_path_str:
        raise ValueError("'test' key not found in data.yaml")
    
    test_path = (abs_data_yaml.parent / test_path_str).resolve()
    
    if test_path.name == 'images' and test_path.exists():
        test_img_dir = test_path
        gt_label_dir = test_path.parent / 'labels'
    elif (test_path / 'images').exists():
        test_img_dir = test_path / 'images'
        gt_label_dir = test_path / 'labels'
    else:
        test_img_dir = test_path
        gt_label_dir = test_path.with_name('labels')
        if not gt_label_dir.exists():
            gt_label_dir = test_path.parent / 'labels'
    
    return test_img_dir, gt_label_dir

def deduplicate_predictions(pred_label_dir):
    label_files = list(pred_label_dir.glob('*.txt'))
    
    total_before = 0
    total_after = 0
    
    for label_file in label_files:
        if not label_file.exists():
            continue
            
        with open(label_file, 'r') as f:
            lines = f.readlines()
        
        total_before += len(lines)
        
        unique_lines = []
        seen = set()
        
        for line in lines:
            line = line.strip()
            if line and line not in seen:
                unique_lines.append(line)
                seen.add(line)
        
        total_after += len(unique_lines)
        
        with open(label_file, 'w') as f:
            f.write('\n'.join(unique_lines) + '\n' if unique_lines else '')
    
    if total_before > total_after:
        print(f"Removed {total_before - total_after} duplicate detections ({(total_before-total_after)/total_before*100:.1f}%)")

def run_detection_and_get_paths():
    import shutil
    
    original_dir = Path.cwd()
    abs_weights = MODEL_CHECKPOINT.resolve()
    test_img_dir, _ = get_test_directories()

    project_dir = original_dir / 'runs' / 'detect_for_counting'
    exp_name = 'exp'
    pred_label_dir = project_dir / exp_name / 'labels'
    
    if (project_dir / exp_name).exists():
        shutil.rmtree(project_dir / exp_name)
    
    os.chdir(YOLOv9_DIR)
    cmd = [
        sys.executable, 'detect.py',
        '--weights', str(abs_weights),
        '--source', str(test_img_dir),
        '--img', str(IMG_SIZE),
        '--conf', str(CONF_THRESHOLD),
        '--iou-thres', str(IOU_THRESHOLD),
        '--device', DEVICE,
        '--save-txt', '--project', str(project_dir), '--name', exp_name
    ]
    
    print("Running detection...")
    
    try:
        subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8')
    except subprocess.CalledProcessError as e:
        print(f"Detection failed: {e.stderr}")
        raise
    finally:
        os.chdir(original_dir)

    deduplicate_predictions(pred_label_dir)
    return pred_label_dir

def calculate_counting_metrics(pred_label_dir):
    test_img_dir, gt_label_dir = get_test_directories()
    
    if not gt_label_dir.exists():
        raise FileNotFoundError(f"Ground truth label directory not found: {gt_label_dir}")
    
    test_images = list(test_img_dir.glob('*.jpg')) + list(test_img_dir.glob('*.png'))
    if not test_images:
        raise ValueError(f"No images found in {test_img_dir}")
    
    results = []
    
    for img_path in tqdm(test_images, desc="Calculating counting metrics"):
        gt_label_path = gt_label_dir / f"{img_path.stem}.txt"
        pred_label_path = pred_label_dir / f"{img_path.stem}.txt"

        gt_count = 0
        if gt_label_path.exists():
            with open(gt_label_path, 'r') as f: 
                gt_count = sum(1 for line in f if line.strip())

        pred_count = 0
        if pred_label_path.exists():
            with open(pred_label_path, 'r') as f: 
                pred_count = sum(1 for line in f if line.strip())
        
        results.append({
            'image': img_path.name,
            'gt_count': gt_count,
            'pred_count': pred_count,
            'difference': pred_count - gt_count,
            'abs_error': abs(pred_count - gt_count)
        })
    
    df = pd.DataFrame(results)
    
    correct_counts = (df['difference'] == 0).sum()
    total_images = len(df)
    exact_accuracy = (correct_counts / total_images) * 100
    mae = df['abs_error'].mean()
    
    return {'count_accuracy': exact_accuracy, 'count_mae': mae}, df

pred_label_dir = run_detection_and_get_paths()
counting_metrics, count_df = calculate_counting_metrics(pred_label_dir)

print("\nCounting Metrics:")
print(f"  Exact Accuracy: {counting_metrics['count_accuracy']:.2f}%")
print(f"  MAE: {counting_metrics['count_mae']:.4f}")

## 5. Count Analysis Visualization

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Ground Truth vs Predicted scatter plot
ax1 = axes[0, 0]
ax1.scatter(count_df['gt_count'], count_df['pred_count'], alpha=0.6, s=50)
max_val = max(count_df['gt_count'].max(), count_df['pred_count'].max())
ax1.plot([0, max_val], [0, max_val], 'r--', linewidth=2, label='Perfect Prediction')
ax1.set_xlabel('Ground Truth Count', fontsize=12)
ax1.set_ylabel('Predicted Count', fontsize=12)
ax1.set_title('Ground Truth vs Predicted Counts', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Error distribution histogram
ax2 = axes[0, 1]
ax2.hist(count_df['difference'], bins=30, edgecolor='black', alpha=0.7)
ax2.axvline(x=0, color='r', linestyle='--', linewidth=2, label='Zero Error')
ax2.set_xlabel('Prediction Error (Pred - GT)', fontsize=12)
ax2.set_ylabel('Frequency', fontsize=12)
ax2.set_title('Distribution of Prediction Errors', fontsize=14, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Count distribution comparison
ax3 = axes[1, 0]
bins = np.arange(0, max(count_df['gt_count'].max(), count_df['pred_count'].max()) + 2, 1)
ax3.hist(count_df['gt_count'], bins=bins, alpha=0.5, label='Ground Truth', edgecolor='black')
ax3.hist(count_df['pred_count'], bins=bins, alpha=0.5, label='Predicted', edgecolor='black')
ax3.set_xlabel('Count', fontsize=12)
ax3.set_ylabel('Frequency', fontsize=12)
ax3.set_title('Distribution of Counts', fontsize=14, fontweight='bold')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Sample-by-sample comparison (first 30 images)
ax4 = axes[1, 1]
n_samples = min(30, len(count_df))
x = np.arange(n_samples)
width = 0.35
ax4.bar(x - width/2, count_df['gt_count'].iloc[:n_samples], width, label='Ground Truth', alpha=0.8)
ax4.bar(x + width/2, count_df['pred_count'].iloc[:n_samples], width, label='Predicted', alpha=0.8)
ax4.set_xlabel('Image Index', fontsize=12)
ax4.set_ylabel('Count', fontsize=12)
ax4.set_title(f'Count Comparison (First {n_samples} Images)', fontsize=14, fontweight='bold')
ax4.legend()
ax4.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

# Summary statistics
print(f"\nCount Analysis Statistics:")
print(f"  Total images: {len(count_df)}")
print(f"  Exact matches: {(count_df['difference'] == 0).sum()} ({(count_df['difference'] == 0).sum()/len(count_df)*100:.2f}%)")
print(f"  Mean GT count: {count_df['gt_count'].mean():.2f}")
print(f"  Mean Pred count: {count_df['pred_count'].mean():.2f}")
print(f"  GT count range: {count_df['gt_count'].min()} - {count_df['gt_count'].max()}")
print(f"  Pred count range: {count_df['pred_count'].min()} - {count_df['pred_count'].max()}")

## 6. Performance Summary

In [None]:
print("\n" + "="*50)
print("         YOLOv9 Performance Summary")
print("="*50)

if bbox_metrics and len(bbox_metrics) > 0:
    print(f" mAP@0.5:0.95:        {bbox_metrics.get('map_50_95', 0):.4f}")
    print(f" mAP@0.5:             {bbox_metrics.get('map_50', 0):.4f}") 
    print(f" Precision:           {bbox_metrics.get('precision', 0):.4f}")
    print(f" Recall:              {bbox_metrics.get('recall', 0):.4f}")
else:
    print(" Detection metrics unavailable")

print("-"*50)

if counting_metrics:
    print(f" Count Accuracy:      {counting_metrics.get('count_accuracy', 0):.2f}%")
    print(f" Count MAE:           {counting_metrics.get('count_mae', 0):.4f}")
else:
    print(" Counting metrics unavailable")
    
print("-"*50)

if bbox_metrics and bbox_metrics.get('fps'):
    print(f" Inference Speed:     {bbox_metrics.get('fps', 0):.2f} FPS")
else:
    print(" FPS metric unavailable")

print("="*50)

## 7. Visualize Predictions

In [None]:
def visualize_predictions(num_samples=5):
    pred_dir = Path('./runs/detect_for_counting/exp/')
    if not pred_dir.exists():
        print(f"Prediction directory not found: {pred_dir}")
        return
        
    pred_images = list(pred_dir.glob('*.jpg')) + list(pred_dir.glob('*.png'))
    if not pred_images:
        print("No prediction images found.")
        return
    
    sample_indices = np.random.choice(len(pred_images), min(num_samples, len(pred_images)), replace=False)
    
    for i in sample_indices:
        img_path = pred_images[i]
        img = Image.open(img_path)
        plt.figure(figsize=(15, 10))
        plt.imshow(img)
        plt.title(f"{img_path.name}", fontsize=14)
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        
visualize_predictions()