# Model Comparison: YOLOv9 vs Faster R-CNN

This notebook provides a direct comparison between the trained YOLOv9 and Faster R-CNN models on the parking lot detection task.

**Key Comparisons:**
1. **R-CNN Learning Curves:** Dedicated visualization matching YOLO style.
2. **Visual Comparison:** 3-column view (Ground Truth vs YOLO vs R-CNN).
3. **Counting Error Analysis:** Single graph showing Ground Truth vs Prediction Error for both models.
4. **Statistical Summary:** Count error (Mean ± Std Dev) and inference speed.

## 1. Setup and Configuration

In [8]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import json
import yaml
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# --- PATH CONFIGURATION ---
# Root directories
YOLO_BASE = Path('./yolo')
RCNN_BASE = Path('./rcnn')

# YOLO Paths
YOLO_ROOT = YOLO_BASE / 'yolov9'
YOLO_WEIGHTS = YOLO_BASE / 'runs/train/carpk_yolov9/weights/best.pt'
YOLO_DATA_YAML = YOLO_BASE / 'prepared_data/yolo/data.yaml'

# RCNN Paths
RCNN_WEIGHTS = RCNN_BASE / 'runs/rcnn/latest.pt'
RCNN_HISTORY = RCNN_BASE / 'runs/runs/rcnn/history.json'
RCNN_TEST_JSON = RCNN_BASE / 'data/test.json'
RCNN_TEST_IMG_DIR = RCNN_BASE / 'data/test'

# Common Config
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
IMG_SIZE = 640
CONF_THRESHOLD = 0.25  # Confidence threshold for detection
IOU_THRESHOLD = 0.45   # NMS IoU threshold

# Check files
required_files = [YOLO_ROOT, YOLO_WEIGHTS, YOLO_DATA_YAML, RCNN_WEIGHTS, RCNN_HISTORY, RCNN_TEST_JSON]
for p in required_files:
    if not p.exists():
        raise FileNotFoundError(f"Missing required file/directory: {p}")

print(f"Device: {DEVICE}")
print(f"YOLO Weights: {YOLO_WEIGHTS}")
print(f"RCNN Weights: {RCNN_WEIGHTS}")
print(f"RCNN History: {RCNN_HISTORY}")

Device: cuda
YOLO Weights: yolo\runs\train\carpk_yolov9\weights\best.pt
RCNN Weights: rcnn\runs\rcnn\latest.pt
RCNN History: rcnn\runs\runs\rcnn\history.json


## 2. Load Models
Standardized wrappers for inference.

In [9]:
class YOLOWrapper:
    def __init__(self, weights_path, repo_path, device=DEVICE):
        self.device = device
        repo_str = str(repo_path.absolute())
        if repo_str not in sys.path:
            sys.path.append(repo_str)
            
        try:
            from models.common import DetectMultiBackend
            from utils.general import non_max_suppression, scale_boxes
            from utils.augmentations import letterbox
        except ImportError as e:
            print(f"Error importing YOLO modules from {repo_path}: {e}")
            raise
        
        self.model = DetectMultiBackend(weights_path, device=device, fp16=False)
        self.stride = self.model.stride
        self.names = self.model.names
        self.pt = self.model.pt
        self.non_max_suppression = non_max_suppression
        self.scale_boxes = scale_boxes
        self.letterbox = letterbox

    def predict(self, img_pil, conf_thres=0.25, iou_thres=0.45):
        im0 = np.array(img_pil)
        im = self.letterbox(im0, 640, stride=self.stride, auto=True)[0]
        im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        im = np.ascontiguousarray(im)
        im = torch.from_numpy(im).to(self.device)
        im = im.float() / 255.0
        if len(im.shape) == 3:
            im = im[None]

        pred = self.model(im, augment=False, visualize=False)
        if isinstance(pred, list):
            pred = pred[0]

        pred = self.non_max_suppression(pred, conf_thres, iou_thres, classes=None, max_det=1000)
        results = []
        det = pred[0]
        if len(det):
            det[:, :4] = self.scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
            for *xyxy, conf, cls in reversed(det):
                results.append({
                    'box': [int(c) for c in xyxy],
                    'score': float(conf),
                    'label': 'car'
                })
        return results

class RCNNWrapper:
    def __init__(self, weights_path, device=DEVICE):
        self.device = device
        def get_rcnn_model(num_classes):
            try:
                model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=None)
            except AttributeError:
                model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)
            in_features = model.roi_heads.box_predictor.cls_score.in_features
            model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
            return model

        self.model = get_rcnn_model(num_classes=2)
        print(f"Loading R-CNN weights from {weights_path}")
        checkpoint = torch.load(weights_path, map_location=device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.to(device)
        self.model.eval()
        
        self.transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def predict(self, img_pil, conf_thres=0.25):
        img_tensor = self.transforms(img_pil).to(self.device).unsqueeze(0)
        with torch.no_grad():
            pred = self.model(img_tensor)[0]
        
        results = []
        boxes = pred['boxes'].cpu().numpy()
        scores = pred['scores'].cpu().numpy()
        labels = pred['labels'].cpu().numpy()
        
        for box, score, label in zip(boxes, scores, labels):
            if score >= conf_thres and label == 1: # 1 is car
                results.append({
                    'box': [int(c) for c in box],
                    'score': float(score),
                    'label': 'car'
                })
        return results

print("Loading YOLOv9...")
yolo_model = YOLOWrapper(YOLO_WEIGHTS, YOLO_ROOT)
print("Loading Faster R-CNN...")
rcnn_model = RCNNWrapper(RCNN_WEIGHTS)
print("Models loaded.")

Loading YOLOv9...


Fusing layers... 
yolov9-c summary: 604 layers, 50698278 parameters, 0 gradients, 236.6 GFLOPs


Loading Faster R-CNN...
Loading R-CNN weights from rcnn\runs\rcnn\latest.pt
Models loaded.


## 3. Faster R-CNN Training Learning Curves
Visualizing training metrics for Faster R-CNN in the same style as YOLOv9.

In [10]:
def plot_rcnn_learning_curves():
    with open(RCNN_HISTORY, 'r') as f:
        h = json.load(f)
        
    epochs = range(1, len(h['train_loss']) + 1)
    total_epochs = len(epochs)
    
    # Create figure with grid spec (2x2)
    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    fig.suptitle('Faster R-CNN Training Learning Curves - Parking Dataset', fontsize=22, fontweight='bold', y=0.95)
    
    # --- 1. Training Loss Components ---
    ax = axes[0, 0]
    # Colors matching warm tones for training
    ax.plot(epochs, h.get('loss_classifier', []), color='#D32F2F', linewidth=2, label='Train Cls Loss')
    ax.plot(epochs, h.get('loss_box_reg', []), color='#F57C00', linewidth=2, label='Train Box Loss')
    ax.plot(epochs, h.get('loss_objectness', []), color='#FFB300', linewidth=2, label='Train Obj Loss')
    ax.plot(epochs, h.get('loss_rpn_box_reg', []), color='#FBC02D', linewidth=2, linestyle='--', label='Train RPN Box Loss')
    
    ax.set_title('Training Loss Components', fontsize=16, fontweight='bold')
    ax.set_xlabel('Epoch', fontsize=14)
    ax.set_ylabel('Loss', fontsize=14)
    ax.tick_params(axis='both', which='major', labelsize=12)
    ax.grid(True, alpha=0.3)
    ax.legend(loc='upper right', frameon=True, fontsize=11)
    
    # --- 2. Mean Average Precision (mAP) ---
    ax = axes[0, 1]
    ax.plot(epochs, h.get('val_ap50', []), color='#00BFA5', linewidth=2.5, label='mAP@0.5')
    ax.plot(epochs, h.get('val_map', []), color='#00897B', linewidth=2.5, label='mAP@0.5:0.95')
    
    ax.set_title('Mean Average Precision (mAP)', fontsize=16, fontweight='bold')
    ax.set_xlabel('Epoch', fontsize=14)
    ax.set_ylabel('mAP', fontsize=14)
    ax.tick_params(axis='both', which='major', labelsize=12)
    ax.set_ylim([0, 1.02])
    ax.grid(True, alpha=0.3)
    ax.legend(loc='lower right', frameon=True, fontsize=11)
    
    # --- 3. Precision and Recall ---
    ax = axes[1, 0]
    # Using AP50 as Precision Proxy (per instructions) and AR100 as Recall
    ax.plot(epochs, h.get('val_ap50', []), color='#536DFE', linewidth=2.5, label='Precision (AP@0.50)')
    ax.plot(epochs, h.get('val_ar_100', []), color='#7C4DFF', linewidth=2.5, label='Recall (AR@100)')
    
    ax.set_title('Precision and Recall', fontsize=16, fontweight='bold')
    ax.set_xlabel('Epoch', fontsize=14)
    ax.set_ylabel('Score', fontsize=14)
    ax.tick_params(axis='both', which='major', labelsize=12)
    ax.set_ylim([0, 1.02])
    ax.grid(True, alpha=0.3)
    ax.legend(loc='lower right', frameon=True, fontsize=11)
    
    # --- 4. Summary Box ---
    ax = axes[1, 1]
    ax.axis('off')
    
    # Calculate stats
    best_map_idx = np.argmax(h.get('val_map', [0]))
    best_map = h['val_map'][best_map_idx]
    final_map = h['val_map'][-1]
    final_ap50 = h['val_ap50'][-1]
    final_recall = h['val_ar_100'][-1]
    start_loss = h['train_loss'][0]
    end_loss = h['train_loss'][-1]
    
    summary_text = (
        f"Training Summary\n"
        f"=======================================\n\n"
        f"Total Epochs: {total_epochs}\n\n"
        f"Final Metrics (Epoch {total_epochs}):\n"
        f"  • mAP@0.5:        {final_ap50:.4f} ({final_ap50*100:.1f}%)\n"
        f"  • mAP@0.5:0.95:   {final_map:.4f} ({final_map*100:.1f}%)\n"
        f"  • Precision:      {final_ap50:.4f} ({final_ap50*100:.1f}%)\n"
        f"  • Recall:         {final_recall:.4f} ({final_recall*100:.1f}%)\n\n"
        f"Best Performance:\n"
        f"  • Best mAP@0.5:0.95: {best_map:.4f} ({best_map*100:.1f}%)\n"
        f"  • Achieved at Epoch: {best_map_idx + 1}\n\n"
        f"Loss Reduction:\n"
        f"  • Total Train Loss:  {start_loss:.4f} → {end_loss:.4f}"
    )
    
    # Add text box
    props = dict(boxstyle='round', facecolor='#F5F5DC', alpha=0.5)
    ax.text(0.1, 0.5, summary_text, transform=ax.transAxes, fontsize=14,
            verticalalignment='center', bbox=props, fontfamily='monospace')
    
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig('rcnn_learning_curves.png', dpi=300)
    plt.show()

plot_rcnn_learning_curves()

## 4. Counting Error Analysis
We run inference on the TEST set for both models, calculating errors per image.

In [11]:
def run_comparative_inference():
    # Load Test Data Ground Truth from JSON
    with open(RCNN_TEST_JSON, 'r') as f:
        coco_data = json.load(f)
    
    # Map image ID to filename and annotations
    img_map = {img['id']: img for img in coco_data['images']}
    ann_map = {}
    for ann in coco_data['annotations']:
        img_id = ann['image_id']
        if img_id not in ann_map:
            ann_map[img_id] = []
        ann_map[img_id].append(ann)

    results = []
    
    print(f"Running inference on {len(img_map)} test images...")
    
    for img_id, img_info in tqdm(img_map.items()):
        file_name = img_info['file_name']
        img_path = RCNN_TEST_IMG_DIR / file_name
        
        if not img_path.exists():
            continue
            
        # Ground Truth Count
        gt_anns = ann_map.get(img_id, [])
        gt_count = len(gt_anns)
        
        try:
            pil_img = Image.open(img_path).convert('RGB')
            
            # YOLO Prediction
            yolo_preds = yolo_model.predict(pil_img, conf_thres=CONF_THRESHOLD)
            yolo_count = len(yolo_preds)
            
            # RCNN Prediction
            rcnn_preds = rcnn_model.predict(pil_img, conf_thres=CONF_THRESHOLD)
            rcnn_count = len(rcnn_preds)
            
            results.append({
                'image': file_name,
                'gt_count': gt_count,
                'yolo_count': yolo_count,
                'rcnn_count': rcnn_count,
                'yolo_error': yolo_count - gt_count,
                'rcnn_error': rcnn_count - gt_count
            })
            
        except Exception as e:
            print(f"Error processing {file_name}: {e}")
            continue

    return pd.DataFrame(results)

df_results = run_comparative_inference()
print(f"Processed {len(df_results)} images.")

Running inference on 99 test images...


100%|██████████| 99/99 [00:20<00:00,  4.92it/s]

Processed 99 images.





In [12]:
def plot_error_comparison(df):
    plt.figure(figsize=(12, 6))
    
    # Scatter plot: X=Ground Truth Count, Y=Error (Predicted - GT)
    plt.scatter(df['gt_count'], df['yolo_error'], c='blue', alpha=0.6, label='YOLOv9 Error', marker='o')
    plt.scatter(df['gt_count'], df['rcnn_error'], c='red', alpha=0.6, label='R-CNN Error', marker='x')
    
    plt.axhline(0, color='black', linestyle='--', linewidth=1)
    plt.title('Counting Error Comparison: Ground Truth vs Prediction Error', fontsize=18, fontweight='bold')
    plt.xlabel('Ground Truth Count (Number of Cars)', fontsize=15)
    plt.ylabel('Prediction Error (Pred - GT)', fontsize=15)
    plt.tick_params(axis='both', which='major', labelsize=13)
    plt.legend(fontsize=12)
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('comparison_error_scatter.png')
    plt.show()

plot_error_comparison(df_results)

## 5. Summary Statistics
Calculation of Mean Error and Standard Deviation for the summary text file.

In [13]:
def generate_summary(df):
    yolo_mae = df['yolo_error'].abs().mean()
    rcnn_mae = df['rcnn_error'].abs().mean()
    
    yolo_mean_err = df['yolo_error'].mean()
    rcnn_mean_err = df['rcnn_error'].mean()
    
    yolo_std_err = df['yolo_error'].std()
    rcnn_std_err = df['rcnn_error'].std()
    
    summary = "COMPARISON SUMMARY\n"
    summary += "==================\n"
    summary += f"Total Images Tested: {len(df)}\n\n"
    
    summary += "YOLOv9 Performance:\n"
    summary += f"  Mean Absolute Error (MAE): {yolo_mae:.2f}\n"
    summary += f"  Error Distribution: {yolo_mean_err:.2f} +/- {yolo_std_err:.2f}\n\n"
    
    summary += "Faster R-CNN Performance:\n"
    summary += f"  Mean Absolute Error (MAE): {rcnn_mae:.2f}\n"
    summary += f"  Error Distribution: {rcnn_mean_err:.2f} +/- {rcnn_std_err:.2f}\n\n"
    
    summary += "Conclusion:\n"
    if yolo_mae < rcnn_mae:
        summary += "  YOLOv9 provided more accurate counts on average.\n"
    else:
        summary += "  Faster R-CNN provided more accurate counts on average.\n"
        
    print(summary)
    
    with open('model_comparison_summary.txt', 'w') as f:
        f.write(summary)

generate_summary(df_results)

COMPARISON SUMMARY
Total Images Tested: 99

YOLOv9 Performance:
  Mean Absolute Error (MAE): 1.02
  Error Distribution: 0.41 +/- 1.69

Faster R-CNN Performance:
  Mean Absolute Error (MAE): 3.64
  Error Distribution: 3.62 +/- 2.93

Conclusion:
  YOLOv9 provided more accurate counts on average.



## 6. Visual Side-by-Side Comparison
Visualizing predictions side-by-side: Ground Truth vs YOLOv9 vs Faster R-CNN.

In [14]:
def draw_boxes(img_pil, boxes, color='red', width=3):
    img = img_pil.copy()
    draw = ImageDraw.Draw(img)
    for box in boxes:
        # Box format can be [x, y, w, h] (coco) or [x1, y1, x2, y2] (pascal)
        # We need to handle both. Our models output [x1, y1, x2, y2]
        # COCO JSON is [x, y, w, h]
        if len(box) == 4:
            # Determine if it's likely xywh or xyxy by checking if x2 < x1 is impossible
            # For drawing, we assume input `boxes` are already in [x1, y1, x2, y2] format
            # except for ground truth which we'll convert before calling this
            draw.rectangle(box, outline=color, width=width)
    return img

def get_ground_truth_boxes(image_filename):
    # Reload JSON to be safe
    with open(RCNN_TEST_JSON, 'r') as f:
        coco_data = json.load(f)
    
    # Find image ID
    img_id = None
    for img in coco_data['images']:
        if img['file_name'] == image_filename:
            img_id = img['id']
            break
            
    if img_id is None: return []
    
    boxes = []
    for ann in coco_data['annotations']:
        if ann['image_id'] == img_id:
            x, y, w, h = ann['bbox']
            # Convert xywh to xyxy
            boxes.append([x, y, x+w, y+h])
    return boxes

def visualize_side_by_side(num_samples=3):
    # Pick random samples from our results dataframe
    sample_df = df_results.sample(n=num_samples)
    
    for i, (_, row) in enumerate(sample_df.iterrows()):
        img_filename = row['image']
        img_path = RCNN_TEST_IMG_DIR / img_filename
        pil_img = Image.open(img_path).convert('RGB')
        
        # 1. Ground Truth
        gt_boxes = get_ground_truth_boxes(img_filename)
        img_gt = draw_boxes(pil_img, gt_boxes, color='#00FF00') # Green
        
        # 2. YOLO Prediction
        yolo_raw = yolo_model.predict(pil_img, conf_thres=CONF_THRESHOLD)
        yolo_boxes = [p['box'] for p in yolo_raw]
        img_yolo = draw_boxes(pil_img, yolo_boxes, color='#00FFFF') # Cyan
        
        # 3. RCNN Prediction
        rcnn_raw = rcnn_model.predict(pil_img, conf_thres=CONF_THRESHOLD)
        rcnn_boxes = [p['box'] for p in rcnn_raw]
        img_rcnn = draw_boxes(pil_img, rcnn_boxes, color='#FF00FF') # Magenta
        
        # Plot - 3 Columns
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(24, 8))
        
        ax1.imshow(img_gt)
        ax1.set_title(f"Ground Truth\nCount: {len(gt_boxes)}", fontsize=20, fontweight='bold')
        ax1.axis('off')
        
        ax2.imshow(img_yolo)
        ax2.set_title(f"YOLOv9 Prediction\nCount: {len(yolo_boxes)} (Diff: {len(yolo_boxes)-len(gt_boxes)})", fontsize=20, fontweight='bold')
        ax2.axis('off')
        
        ax3.imshow(img_rcnn)
        ax3.set_title(f"Faster R-CNN Prediction\nCount: {len(rcnn_boxes)} (Diff: {len(rcnn_boxes)-len(gt_boxes)})", fontsize=20, fontweight='bold')
        ax3.axis('off')
        
        # SAVE TO FILE with a unique name
        save_path = f'comparison_sample_{i+1}.png'
        plt.tight_layout()
        plt.savefig(save_path)
        print(f"Saved visualization to {save_path}")
        plt.show()

visualize_side_by_side(num_samples=3)

Saved visualization to comparison_sample_1.png
Saved visualization to comparison_sample_2.png
Saved visualization to comparison_sample_3.png
