In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.235-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.235-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.235 ultralytics-thop-2.0.18


In [None]:
import torch
import cv2
import numpy as np
from pathlib import Path
import time
from datetime import datetime
import pandas as pd
from tqdm import tqdm
import json
import os
from collections import defaultdict

import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt
plt.ioff()  # Turn off interactive mode

# YOLO imports
from ultralytics import YOLO

# Faster R-CNN imports
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F

# DETR imports
from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image


# =============================
# UNIFIED CONFIGURATION
# =============================
class UnifiedEvalConfig:
    # Model paths
    yolov5n_path = "/content/drive/MyDrive/CuoiKi/model/yolov5n_realcar.pt"
    yolov8n_path = "/content/drive/MyDrive/CuoiKi/model/yolov8_realcar.pt"
    yolov11n_path = "/content/drive/MyDrive/CuoiKi/model/yolo11n_realcar.pt"
    faster_rcnn_path = "/content/drive/MyDrive/CuoiKi/model/best_faster_rcnn_pretrained (1).pth"
    detr_path = "/content/drive/MyDrive/CuoiKi/model/best_model"

    # Test data paths
    test_images_dir = "/content/drive/MyDrive/CuoiKi/Data/Yolo_V11_new/test/images"
    test_labels_dir = "/content/drive/MyDrive/CuoiKi/Data/Yolo_V11_new/test/labels"
    test_video_path = "/content/drive/MyDrive/CuoiKi/video/video1.mp4"

    # Output paths
    output_dir = "/content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3"
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Evaluation settings
    yolo_conf_threshold = 0.5
    faster_rcnn_conf_threshold = 0.6
    detr_conf_threshold = 0.8
    iou_threshold = 0.45

    # Device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Model-specific configurations
    faster_rcnn_num_classes = 2  # background + car
    faster_rcnn_car_class = 1
    detr_num_classes = 1
    detr_car_class = 0

    # Display names
    model_display_names = {
        'yolov5n': 'YOLOv5n',
        'yolov8n': 'YOLOv8n',
        'yolov11n': 'YOLOv11n',
        'faster_rcnn': 'Faster R-CNN',
        'detr': 'DETR'
    }

config = UnifiedEvalConfig()

# Create output directories
os.makedirs(config.output_dir, exist_ok=True)
os.makedirs(f"{config.output_dir}/visualizations", exist_ok=True)
os.makedirs(f"{config.output_dir}/metrics", exist_ok=True)
os.makedirs(f"{config.output_dir}/videos", exist_ok=True)
os.makedirs(f"{config.output_dir}/comparisons", exist_ok=True)

print(f" Output directories created at: {config.output_dir}")
print(f" Timestamp: {config.timestamp}")


# =============================
# UTILITY FUNCTIONS
# =============================
def convert_to_serializable(obj):
    """Convert numpy types to native Python types"""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_to_serializable(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(item) for item in obj]
    return obj

def get_display_name(model_key):
    """Get formatted display name for model"""
    return config.model_display_names.get(model_key, model_key.upper())


# =============================
# MODEL LOADING
# =============================
class ModelLoader:
    """Unified model loader for all architectures"""

    @staticmethod
    def load_all_models(config):
        """Load all available models"""
        models = {}

        print("\n" + "="*70)
        print(" "*20 + "LOADING ALL MODELS")
        print("="*70)

        # Load YOLOv5n
        if os.path.exists(config.yolov5n_path):
            try:
                models['yolov5n'] = {
                    'model': YOLO(config.yolov5n_path),
                    'type': 'yolo',
                    'conf_threshold': config.yolo_conf_threshold
                }
                print(f" YOLOv5n loaded successfully")
            except Exception as e:
                print(f" Error loading YOLOv5n: {e}")

        # Load YOLOv8n
        if os.path.exists(config.yolov8n_path):
            try:
                models['yolov8n'] = {
                    'model': YOLO(config.yolov8n_path),
                    'type': 'yolo',
                    'conf_threshold': config.yolo_conf_threshold
                }
                print(f" YOLOv8n loaded successfully")
            except Exception as e:
                print(f" Error loading YOLOv8n: {e}")

        # Load YOLOv11n
        if os.path.exists(config.yolov11n_path):
            try:
                models['yolov11n'] = {
                    'model': YOLO(config.yolov11n_path),
                    'type': 'yolo',
                    'conf_threshold': config.yolo_conf_threshold
                }
                print(f" YOLOv11n loaded successfully")
            except Exception as e:
                print(f" Error loading YOLOv11n: {e}")

        # Load Faster R-CNN
        if os.path.exists(config.faster_rcnn_path):
            try:
                model = fasterrcnn_resnet50_fpn(weights=None)
                in_features = model.roi_heads.box_predictor.cls_score.in_features
                model.roi_heads.box_predictor = FastRCNNPredictor(
                    in_features, config.faster_rcnn_num_classes
                )

                checkpoint = torch.load(config.faster_rcnn_path, map_location=config.device)
                if 'model_state_dict' in checkpoint:
                    state_dict = checkpoint['model_state_dict']
                else:
                    state_dict = checkpoint

                model.load_state_dict(state_dict, strict=True)
                model.to(config.device)
                model.eval()

                models['faster_rcnn'] = {
                    'model': model,
                    'type': 'faster_rcnn',
                    'conf_threshold': config.faster_rcnn_conf_threshold
                }
                print(f" Faster R-CNN loaded successfully")
            except Exception as e:
                print(f" Error loading Faster R-CNN: {e}")

        # Load DETR
        if os.path.exists(config.detr_path):
            try:
                processor = DetrImageProcessor.from_pretrained(config.detr_path)
                model = DetrForObjectDetection.from_pretrained(config.detr_path)
                model.to(config.device)
                model.eval()

                models['detr'] = {
                    'model': model,
                    'processor': processor,
                    'type': 'detr',
                    'conf_threshold': config.detr_conf_threshold
                }
                print(f" DETR loaded successfully")
            except Exception as e:
                print(f" Error loading DETR: {e}")

        print(f"\n Total models loaded: {len(models)}")
        print("="*70 + "\n")

        return models


# =============================
# UNIFIED INFERENCE
# =============================
class UnifiedPredictor:
    """Unified prediction interface for all models"""

    @staticmethod
    @torch.no_grad()
    def predict(model_dict, image_path, model_key):
        """
        Unified prediction interface

        Args:
            model_dict: Dictionary containing model and metadata
            image_path: Path to image or numpy array
            model_key: Key identifying the model type

        Returns:
            detections: Dictionary with boxes, scores, labels, num_detections
            inference_time: Time taken for inference
        """
        model_type = model_dict['type']
        conf_threshold = model_dict['conf_threshold']

        if model_type == 'yolo':
            return UnifiedPredictor._predict_yolo(
                model_dict['model'], image_path, conf_threshold
            )
        elif model_type == 'faster_rcnn':
            return UnifiedPredictor._predict_faster_rcnn(
                model_dict['model'], image_path, conf_threshold
            )
        elif model_type == 'detr':
            return UnifiedPredictor._predict_detr(
                model_dict['model'], model_dict['processor'],
                image_path, conf_threshold
            )

    @staticmethod
    def _predict_yolo(model, image_path, conf_threshold):
        """YOLO prediction"""
        start_time = time.time()
        results = model(image_path, conf=conf_threshold, verbose=False)
        inference_time = time.time() - start_time

        boxes = results[0].boxes
        detections = {
            'boxes': boxes.xyxy.cpu().numpy(),
            'scores': boxes.conf.cpu().numpy(),
            'labels': boxes.cls.cpu().numpy().astype(int),
            'num_detections': int(len(boxes))
        }

        return detections, inference_time

    @staticmethod
    def _predict_faster_rcnn(model, image_path, conf_threshold):
        """Faster R-CNN prediction"""
        if isinstance(image_path, str):
            image = cv2.imread(image_path)
        else:
            image = image_path

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        img_tensor = F.to_tensor(image_rgb).unsqueeze(0).to(config.device)

        start_time = time.time()
        predictions = model(img_tensor)
        inference_time = time.time() - start_time

        pred = predictions[0]
        boxes = pred['boxes'].cpu().numpy()
        scores = pred['scores'].cpu().numpy()
        labels = pred['labels'].cpu().numpy()

        # Filter by confidence and car class (class 1)
        mask = (scores >= conf_threshold) & (labels == config.faster_rcnn_car_class)

        detections = {
            'boxes': boxes[mask],
            'scores': scores[mask],
            'labels': labels[mask],
            'num_detections': int(mask.sum())
        }

        return detections, inference_time

    @staticmethod
    def _predict_detr(model, processor, image_path, conf_threshold):
        """DETR prediction"""
        if isinstance(image_path, str):
            image = Image.open(image_path).convert("RGB")
        else:
            image_bgr = image_path
            image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(image_rgb)

        width, height = image.size

        inputs = processor(images=image, return_tensors="pt")
        inputs = {k: v.to(config.device) for k, v in inputs.items()}

        start_time = time.time()
        outputs = model(**inputs)
        inference_time = time.time() - start_time

        target_sizes = torch.tensor([[height, width]]).to(config.device)
        results = processor.post_process_object_detection(
            outputs, target_sizes=target_sizes, threshold=conf_threshold
        )[0]

        boxes = results['boxes'].cpu().numpy()
        scores = results['scores'].cpu().numpy()
        labels = results['labels'].cpu().numpy()

        mask = labels == config.detr_car_class

        detections = {
            'boxes': boxes[mask],
            'scores': scores[mask],
            'labels': labels[mask],
            'num_detections': int(mask.sum())
        }

        return detections, inference_time


# =============================
# METRICS CALCULATION
# =============================
def calculate_iou(box1, box2):
    """Calculate Intersection over Union"""
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    inter_x_min = max(x1_min, x2_min)
    inter_y_min = max(y1_min, y2_min)
    inter_x_max = min(x1_max, x2_max)
    inter_y_max = min(y1_max, y2_max)

    inter_width = max(0, inter_x_max - inter_x_min)
    inter_height = max(0, inter_y_max - inter_y_min)
    inter_area = inter_width * inter_height

    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)
    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area > 0 else 0


def load_ground_truth(label_path, image_width, image_height):
    """Load ground truth from YOLO format"""
    gt_boxes = []

    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) == 5:
                    cls, x, y, w, h = map(float, parts)

                    x_center = x * image_width
                    y_center = y * image_height
                    box_width = w * image_width
                    box_height = h * image_height

                    x1 = x_center - box_width / 2
                    y1 = y_center - box_height / 2
                    x2 = x_center + box_width / 2
                    y2 = y_center + box_height / 2

                    gt_boxes.append([x1, y1, x2, y2])

    return np.array(gt_boxes) if len(gt_boxes) > 0 else np.array([]).reshape(0, 4)


def calculate_metrics(detections, ground_truth, iou_threshold=0.5):
    """Calculate precision, recall, F1-score"""
    pred_boxes = detections['boxes']
    pred_scores = detections['scores']

    if len(pred_boxes) == 0:
        return {
            'precision': 0.0,
            'recall': 0.0,
            'f1_score': 0.0,
            'true_positives': 0,
            'false_positives': 0,
            'false_negatives': int(len(ground_truth))
        }

    if len(ground_truth) == 0:
        return {
            'precision': 0.0,
            'recall': 0.0,
            'f1_score': 0.0,
            'true_positives': 0,
            'false_positives': int(len(pred_boxes)),
            'false_negatives': 0
        }

    sorted_indices = np.argsort(-pred_scores)
    pred_boxes = pred_boxes[sorted_indices]

    gt_matched = np.zeros(len(ground_truth), dtype=bool)
    true_positives = 0
    false_positives = 0

    for pred_box in pred_boxes:
        max_iou = 0
        max_gt_idx = -1

        for gt_idx, gt_box in enumerate(ground_truth):
            if gt_matched[gt_idx]:
                continue

            iou = calculate_iou(pred_box, gt_box)
            if iou > max_iou:
                max_iou = iou
                max_gt_idx = gt_idx

        if max_iou >= iou_threshold and max_gt_idx >= 0:
            true_positives += 1
            gt_matched[max_gt_idx] = True
        else:
            false_positives += 1

    false_negatives = len(ground_truth) - true_positives

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return {
        'precision': float(precision),
        'recall': float(recall),
        'f1_score': float(f1_score),
        'true_positives': int(true_positives),
        'false_positives': int(false_positives),
        'false_negatives': int(false_negatives)
    }


# =============================
# EVALUATION ON IMAGES
# =============================
def evaluate_all_models(models, config):
    """Evaluate all models on image dataset"""
    image_paths = list(sorted(Path(config.test_images_dir).glob('*.jpg'))) + \
                  list(sorted(Path(config.test_images_dir).glob('*.png')))

    print("="*70)
    print(" "*20 + "EVALUATING ALL MODELS")
    print("="*70)
    print(f"Number of test images: {len(image_paths)}")
    print(f"IoU threshold: {config.iou_threshold}")
    print("="*70 + "\n")

    all_results = {}

    for model_key, model_dict in models.items():
        display_name = get_display_name(model_key)
        print(f"\nEvaluating {display_name}...")
        print("-" * 70)

        predictions = []
        metrics_per_image = []
        inference_times = []
        total_detections = 0

        progress_bar = tqdm(image_paths, desc=display_name)

        for img_path in progress_bar:
            image = cv2.imread(str(img_path))
            if image is None:
                continue

            height, width = image.shape[:2]

            label_path = os.path.join(config.test_labels_dir, img_path.stem + '.txt')
            ground_truth = load_ground_truth(label_path, width, height)

            try:
                detections, inference_time = UnifiedPredictor.predict(
                    model_dict, str(img_path), model_key
                )
            except Exception as e:
                print(f"\nError predicting {img_path}: {e}")
                continue

            metrics = calculate_metrics(detections, ground_truth, config.iou_threshold)

            predictions.append(detections)
            metrics_per_image.append(metrics)
            inference_times.append(float(inference_time))
            total_detections += detections['num_detections']

            progress_bar.set_postfix({
                'detections': detections['num_detections'],
                'time': f'{inference_time*1000:.1f}ms'
            })

        all_results[model_key] = {
            'predictions': predictions,
            'metrics_per_image': metrics_per_image,
            'inference_times': inference_times,
            'total_detections': int(total_detections)
        }

    return all_results, image_paths


# =============================
# AGGREGATE METRICS
# =============================
def aggregate_all_metrics(all_results):
    """Aggregate metrics for all models"""
    aggregated = {}

    for model_key, results in all_results.items():
        metrics_list = results['metrics_per_image']

        if len(metrics_list) == 0:
            continue

        total_tp = sum(m['true_positives'] for m in metrics_list)
        total_fp = sum(m['false_positives'] for m in metrics_list)
        total_fn = sum(m['false_negatives'] for m in metrics_list)

        precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
        recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
        f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        precisions = [m['precision'] for m in metrics_list if m['precision'] > 0]
        mAP = np.mean(precisions) if len(precisions) > 0 else 0

        avg_inference_time = np.mean(results['inference_times'])
        fps = 1 / avg_inference_time if avg_inference_time > 0 else 0

        aggregated[model_key] = {
            'precision': float(precision),
            'recall': float(recall),
            'f1_score': float(f1_score),
            'mAP@0.5': float(mAP),
            'true_positives': int(total_tp),
            'false_positives': int(total_fp),
            'false_negatives': int(total_fn),
            'avg_inference_time_ms': float(avg_inference_time * 1000),
            'fps': float(fps),
            'total_detections': int(results['total_detections'])
        }

    return aggregated


# =============================
# COMPREHENSIVE VISUALIZATION
# =============================
def create_comprehensive_comparison(aggregated_metrics, config):
    """Create comprehensive comparison charts"""
    if len(aggregated_metrics) == 0:
        print(" No metrics to visualize")
        return

    model_keys = list(aggregated_metrics.keys())
    model_names = [get_display_name(key) for key in model_keys]

    # Prepare data
    precisions = [aggregated_metrics[k]['precision'] for k in model_keys]
    recalls = [aggregated_metrics[k]['recall'] for k in model_keys]
    f1_scores = [aggregated_metrics[k]['f1_score'] for k in model_keys]
    maps = [aggregated_metrics[k]['mAP@0.5'] for k in model_keys]
    fps_values = [aggregated_metrics[k]['fps'] for k in model_keys]
    inference_times = [aggregated_metrics[k]['avg_inference_time_ms'] for k in model_keys]

    # Create comprehensive figure with better spacing
    fig = plt.figure(figsize=(20, 14))
    gs = fig.add_gridspec(3, 3, hspace=0.4, wspace=0.35)

    fig.suptitle('Comprehensive Model Comparison: YOLOv5n, YOLOv8n, YOLOv11n, Faster R-CNN, DETR',
                 fontsize=18, fontweight='bold', y=0.98)

    # Color scheme
    colors = ['#3498db', '#e74c3c', '#2ecc71', '#9b59b6', '#f39c12'][:len(model_keys)]

    # 1. Precision, Recall, F1-Score
    ax1 = fig.add_subplot(gs[0, 0])
    x = np.arange(len(model_names))
    width = 0.25
    ax1.bar(x - width, precisions, width, label='Precision', color='#2ecc71', alpha=0.8, edgecolor='black')
    ax1.bar(x, recalls, width, label='Recall', color='#3498db', alpha=0.8, edgecolor='black')
    ax1.bar(x + width, f1_scores, width, label='F1-Score', color='#e74c3c', alpha=0.8, edgecolor='black')
    ax1.set_xlabel('Model', fontweight='bold', fontsize=10)
    ax1.set_ylabel('Score', fontweight='bold', fontsize=10)
    ax1.set_title('Accuracy Metrics', fontsize=12, fontweight='bold', pad=10)
    ax1.set_xticks(x)
    ax1.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
    ax1.legend(fontsize=8)
    ax1.grid(True, alpha=0.3, axis='y')
    ax1.set_ylim([0, 1.1])

    # 2. mAP@0.5
    ax2 = fig.add_subplot(gs[0, 1])
    bars = ax2.bar(model_names, maps, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
    ax2.set_ylabel('mAP@0.5', fontweight='bold', fontsize=10)
    ax2.set_title('Mean Average Precision @IoU=0.5', fontsize=12, fontweight='bold', pad=10)
    ax2.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
    ax2.grid(True, alpha=0.3, axis='y')
    ax2.set_ylim([0, 1.1])

    for bar in bars:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=8)

    # 3. FPS Comparison
    ax3 = fig.add_subplot(gs[0, 2])
    bars = ax3.bar(model_names, fps_values, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
    ax3.set_ylabel('FPS', fontweight='bold', fontsize=10)
    ax3.set_title('Inference Speed (Frames Per Second)', fontsize=12, fontweight='bold', pad=10)
    ax3.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
    ax3.grid(True, alpha=0.3, axis='y')

    for bar in bars:
        height = bar.get_height()
        ax3.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}', ha='center', va='bottom', fontweight='bold', fontsize=8)

    # 4. Inference Time
    ax4 = fig.add_subplot(gs[1, 0])
    bars = ax4.bar(model_names, inference_times, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
    ax4.set_ylabel('Time (ms)', fontweight='bold', fontsize=10)
    ax4.set_title('Average Inference Time', fontsize=12, fontweight='bold', pad=10)
    ax4.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
    ax4.grid(True, alpha=0.3, axis='y')

    for bar in bars:
        height = bar.get_height()
        ax4.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}ms', ha='center', va='bottom', fontweight='bold', fontsize=7)

    # 5. TP, FP, FN
    ax5 = fig.add_subplot(gs[1, 1])
    tp_values = [aggregated_metrics[k]['true_positives'] for k in model_keys]
    fp_values = [aggregated_metrics[k]['false_positives'] for k in model_keys]
    fn_values = [aggregated_metrics[k]['false_negatives'] for k in model_keys]

    x = np.arange(len(model_names))
    width = 0.25
    ax5.bar(x - width, tp_values, width, label='TP', color='#2ecc71', alpha=0.8, edgecolor='black')
    ax5.bar(x, fp_values, width, label='FP', color='#e74c3c', alpha=0.8, edgecolor='black')
    ax5.bar(x + width, fn_values, width, label='FN', color='#f39c12', alpha=0.8, edgecolor='black')
    ax5.set_xlabel('Model', fontweight='bold', fontsize=10)
    ax5.set_ylabel('Count', fontweight='bold', fontsize=10)
    ax5.set_title('Detection Statistics', fontsize=12, fontweight='bold', pad=10)
    ax5.set_xticks(x)
    ax5.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
    ax5.legend(fontsize=8)
    ax5.grid(True, alpha=0.3, axis='y')

    # 6. Overall Score
    ax6 = fig.add_subplot(gs[1, 2])
    overall_scores = [
        (aggregated_metrics[k]['f1_score'] * 0.4 +
         aggregated_metrics[k]['mAP@0.5'] * 0.3 +
         min(aggregated_metrics[k]['fps'] / 100, 1) * 0.3)
        for k in model_keys
    ]
    bars = ax6.bar(model_names, overall_scores, color=colors, alpha=0.7, edgecolor='black', linewidth=2)
    ax6.set_ylabel('Score', fontweight='bold', fontsize=10)
    ax6.set_title('Overall Performance Score\n(F1×0.4 + mAP×0.3 + Speed×0.3)',
                 fontsize=11, fontweight='bold', pad=10)
    ax6.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
    ax6.grid(True, alpha=0.3, axis='y')
    ax6.set_ylim([0, 1.1])

    for bar in bars:
        height = bar.get_height()
        ax6.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=8)

    # 7. Radar Chart - Accuracy
    ax7 = fig.add_subplot(gs[2, 0], projection='polar')
    categories = ['Precision', 'Recall', 'F1-Score', 'mAP@0.5']
    angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
    angles += angles[:1]

    for i, model_key in enumerate(model_keys):
        values = [
            aggregated_metrics[model_key]['precision'],
            aggregated_metrics[model_key]['recall'],
            aggregated_metrics[model_key]['f1_score'],
            aggregated_metrics[model_key]['mAP@0.5']
        ]
        values += values[:1]
        ax7.plot(angles, values, 'o-', linewidth=2, label=get_display_name(model_key), color=colors[i])
        ax7.fill(angles, values, alpha=0.15, color=colors[i])

    ax7.set_xticks(angles[:-1])
    ax7.set_xticklabels(categories, fontsize=8)
    ax7.set_ylim(0, 1)
    ax7.set_title('Accuracy Comparison (Radar)', fontsize=12, fontweight='bold', pad=20)
    ax7.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=7)
    ax7.grid(True)

    # 8. Speed vs Accuracy Scatter
    ax8 = fig.add_subplot(gs[2, 1])
    for i, model_key in enumerate(model_keys):
        ax8.scatter(
            aggregated_metrics[model_key]['fps'],
            aggregated_metrics[model_key]['f1_score'],
            s=200, c=colors[i], alpha=0.6, edgecolors='black', linewidth=2,
            label=get_display_name(model_key)
        )
        ax8.annotate(
            get_display_name(model_key),
            (aggregated_metrics[model_key]['fps'], aggregated_metrics[model_key]['f1_score']),
            xytext=(5, 5), textcoords='offset points', fontsize=8, fontweight='bold'
        )

    ax8.set_xlabel('FPS (Speed)', fontweight='bold', fontsize=10)
    ax8.set_ylabel('F1-Score (Accuracy)', fontweight='bold', fontsize=10)
    ax8.set_title('Speed vs Accuracy Trade-off', fontsize=12, fontweight='bold', pad=10)
    ax8.grid(True, alpha=0.3)
    ax8.legend(fontsize=7)

    # 9. Summary Table
    ax9 = fig.add_subplot(gs[2, 2])
    ax9.axis('off')

    # Create ranking
    sorted_f1 = sorted(model_keys, key=lambda x: aggregated_metrics[x]['f1_score'], reverse=True)
    sorted_fps = sorted(model_keys, key=lambda x: aggregated_metrics[x]['fps'], reverse=True)
    sorted_map = sorted(model_keys, key=lambda x: aggregated_metrics[x]['mAP@0.5'], reverse=True)

    summary_text = f"""
    RANKINGS & RECOMMENDATIONS
    {'='*45}

    Best Accuracy (F1-Score):
       1. {get_display_name(sorted_f1[0])}: {aggregated_metrics[sorted_f1[0]]['f1_score']:.4f}
       2. {get_display_name(sorted_f1[1])}: {aggregated_metrics[sorted_f1[1]]['f1_score']:.4f}
       3. {get_display_name(sorted_f1[2])}: {aggregated_metrics[sorted_f1[2]]['f1_score']:.4f}

    Fastest (FPS):
       1. {get_display_name(sorted_fps[0])}: {aggregated_metrics[sorted_fps[0]]['fps']:.2f}
       2. {get_display_name(sorted_fps[1])}: {aggregated_metrics[sorted_fps[1]]['fps']:.2f}
       3. {get_display_name(sorted_fps[2])}: {aggregated_metrics[sorted_fps[2]]['fps']:.2f}

    Best mAP:
       1. {get_display_name(sorted_map[0])}: {aggregated_metrics[sorted_map[0]]['mAP@0.5']:.4f}
       2. {get_display_name(sorted_map[1])}: {aggregated_metrics[sorted_map[1]]['mAP@0.5']:.4f}
       3. {get_display_name(sorted_map[2])}: {aggregated_metrics[sorted_map[2]]['mAP@0.5']:.4f}

    RECOMMENDATIONS:
       • Real-time: {get_display_name(sorted_fps[0])}
       • Best accuracy: {get_display_name(sorted_f1[0])}
       • Balanced: {get_display_name(sorted_f1[1]) if sorted_f1[1] in sorted_fps[:3] else get_display_name(sorted_f1[0])}
    """

    ax9.text(0.1, 0.5, summary_text, fontsize=9, family='monospace',
            verticalalignment='center',
            bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))

    # Save figure
    plt.tight_layout()

    # Save to comparisons folder
    save_path_comp = f"{config.output_dir}/comparisons/comprehensive_comparison_{config.timestamp}.png"
    plt.savefig(save_path_comp, dpi=300, bbox_inches='tight')
    print(f"\n Comprehensive comparison saved to: {save_path_comp}")

    # Save to metrics folder
    save_path_metrics = f"{config.output_dir}/metrics/comprehensive_comparison_{config.timestamp}.png"
    plt.savefig(save_path_metrics, dpi=300, bbox_inches='tight')
    print(f" Also saved to metrics folder: {save_path_metrics}")

    # QUAN TRỌNG: Đóng figure để tránh chồng chéo
    plt.close(fig)


# =============================
# SAVE RESULTS
# =============================
def save_all_results(aggregated_metrics, config):
    """Save all results to CSV and JSON"""
    if len(aggregated_metrics) == 0:
        print(" No metrics to save")
        return

    # Convert to display names
    data_with_display_names = {}
    for model_key, metrics in aggregated_metrics.items():
        display_name = get_display_name(model_key)
        data_with_display_names[display_name] = metrics

    # Save to comparisons folder
    json_path_comp = f"{config.output_dir}/comparisons/all_models_metrics_{config.timestamp}.json"
    with open(json_path_comp, 'w') as f:
        json.dump(convert_to_serializable(data_with_display_names), f, indent=4)
    print(f" Metrics saved to JSON (comparisons): {json_path_comp}")

    csv_path_comp = f"{config.output_dir}/comparisons/all_models_metrics_{config.timestamp}.csv"
    df = pd.DataFrame(data_with_display_names).T
    df.index.name = 'Model'
    df = df.round(4)
    df.to_csv(csv_path_comp)
    print(f" Metrics saved to CSV (comparisons): {csv_path_comp}")

    # Save to metrics folder
    json_path_metrics = f"{config.output_dir}/metrics/all_models_metrics_{config.timestamp}.json"
    with open(json_path_metrics, 'w') as f:
        json.dump(convert_to_serializable(data_with_display_names), f, indent=4)
    print(f" Metrics saved to JSON (metrics): {json_path_metrics}")

    csv_path_metrics = f"{config.output_dir}/metrics/all_models_metrics_{config.timestamp}.csv"
    df.to_csv(csv_path_metrics)
    print(f" Metrics saved to CSV (metrics): {csv_path_metrics}")

    # Save detailed comparison
    df_sorted = df.sort_values('f1_score', ascending=False)

    print("\n" + "="*100)
    print(" "*35 + "COMPREHENSIVE EVALUATION RESULTS")
    print("="*100)
    print(df_sorted.to_string())
    print("="*100)

    # Print detailed analysis
    print("\n" + "="*100)
    print(" "*40 + "DETAILED ANALYSIS")
    print("="*100)

    best_f1 = df_sorted.index[0]
    best_fps = df.loc[df['fps'].idxmax()]
    best_map = df.loc[df['mAP@0.5'].idxmax()]

    print(f"\n BEST OVERALL ACCURACY: {best_f1}")
    print(f"   • F1-Score: {df.loc[best_f1, 'f1_score']:.4f}")
    print(f"   • Precision: {df.loc[best_f1, 'precision']:.4f}")
    print(f"   • Recall: {df.loc[best_f1, 'recall']:.4f}")
    print(f"   • mAP@0.5: {df.loc[best_f1, 'mAP@0.5']:.4f}")

    print(f"\n FASTEST MODEL: {best_fps.name}")
    print(f"   • FPS: {best_fps['fps']:.2f}")
    print(f"   • Inference Time: {best_fps['avg_inference_time_ms']:.2f}ms")
    print(f"   • F1-Score: {best_fps['f1_score']:.4f}")

    print(f"\n BEST mAP: {best_map.name}")
    print(f"   • mAP@0.5: {best_map['mAP@0.5']:.4f}")
    print(f"   • Precision: {best_map['precision']:.4f}")
    print(f"   • Recall: {best_map['recall']:.4f}")

    print("\n" + "="*100)


# =============================
# VISUALIZE SAMPLE PREDICTIONS
# =============================
def visualize_all_sample_predictions(models, image_paths, config, num_samples=3):
    """Visualize predictions from all models on same images"""
    print("\n" + "="*70)
    print(" "*15 + "VISUALIZING SAMPLE PREDICTIONS (ALL MODELS)")
    print("="*70)

    if len(image_paths) == 0:
        print(" No images to visualize")
        return

    sample_indices = np.random.choice(len(image_paths), min(num_samples, len(image_paths)), replace=False)

    for idx in sample_indices:
        img_path = image_paths[idx]
        image = cv2.imread(str(img_path))

        if image is None:
            continue

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Create figure for this image
        num_models = len(models)
        cols = min(3, num_models + 1)
        rows = (num_models + 1 + cols - 1) // cols

        fig, axes = plt.subplots(rows, cols, figsize=(6*cols, 5*rows))
        if rows == 1:
            axes = axes.reshape(1, -1)
        axes = axes.flatten()

        fig.suptitle(f'Sample: {img_path.name}', fontsize=14, fontweight='bold')

        # Original image
        axes[0].imshow(image_rgb)
        axes[0].set_title('Original', fontweight='bold')
        axes[0].axis('off')

        # Predictions from each model
        for ax_idx, (model_key, model_dict) in enumerate(models.items(), 1):
            img_copy = image_rgb.copy()
            display_name = get_display_name(model_key)

            try:
                detections, inference_time = UnifiedPredictor.predict(
                    model_dict, str(img_path), model_key
                )

                # Draw boxes
                for box, score in zip(detections['boxes'], detections['scores']):
                    x1, y1, x2, y2 = map(int, box)

                    if score > 0.7:
                        color = (0, 255, 0)
                    elif score > 0.5:
                        color = (255, 255, 0)
                    else:
                        color = (255, 165, 0)

                    cv2.rectangle(img_copy, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(img_copy, f'{score:.2f}', (x1, y1-10),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                axes[ax_idx].imshow(img_copy)
                title = f'{display_name}\n{detections["num_detections"]} cars | {inference_time*1000:.1f}ms'
                axes[ax_idx].set_title(title, fontweight='bold', fontsize=9)
                axes[ax_idx].axis('off')

            except Exception as e:
                axes[ax_idx].text(0.5, 0.5, f'Error: {str(e)[:30]}...',
                                ha='center', va='center', transform=axes[ax_idx].transAxes)
                axes[ax_idx].set_title(f'{display_name}\nError', fontweight='bold')
                axes[ax_idx].axis('off')

        # Hide unused subplots
        for ax_idx in range(num_models + 1, len(axes)):
            axes[ax_idx].axis('off')

        plt.tight_layout()
        save_path = f"{config.output_dir}/visualizations/all_models_sample_{img_path.stem}_{config.timestamp}.png"
        plt.savefig(save_path, dpi=150, bbox_inches='tight')
        print(f" Saved: {save_path}")

        plt.close(fig)


# =============================
# VIDEO PROCESSING (ALL MODELS)
# =============================
def process_video_all_models(models, video_path, config, max_frames=300):
    """Process video with all models"""
    print("\n" + "="*70)
    print(" "*15 + "PROCESSING VIDEO WITH ALL MODELS")
    print("="*70)

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return

    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if max_frames:
        total_frames = min(total_frames, max_frames)

    print(f"Video: {video_path}")
    print(f"Resolution: {width}x{height}")
    print(f"FPS: {fps}")
    print(f"Total frames to process: {total_frames}")

    # Create video writers for each model
    output_videos = {}
    for model_key in models.keys():
        display_name = get_display_name(model_key)
        output_path = f"{config.output_dir}/videos/{display_name.replace(' ', '_')}_{config.timestamp}.mp4"
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        output_videos[model_key] = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Statistics
    processing_times = {key: [] for key in models.keys()}
    detection_counts = {key: [] for key in models.keys()}

    frame_count = 0

    print("\nProcessing frames...")
    progress_bar = tqdm(total=total_frames, desc="Video processing")

    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret or (max_frames and frame_count >= max_frames):
                break

            frame_count += 1

            for model_key, model_dict in models.items():
                frame_copy = frame.copy()
                display_name = get_display_name(model_key)

                try:
                    start_time = time.time()
                    detections, _ = UnifiedPredictor.predict(model_dict, frame_copy, model_key)
                    inference_time = time.time() - start_time

                    processing_times[model_key].append(inference_time)
                    detection_counts[model_key].append(detections['num_detections'])

                    # Draw detections
                    for box, score in zip(detections['boxes'], detections['scores']):
                        x1, y1, x2, y2 = map(int, box)

                        if score > 0.7:
                            color = (0, 255, 0)
                        elif score > 0.5:
                            color = (0, 255, 255)
                        else:
                            color = (0, 165, 255)

                        cv2.rectangle(frame_copy, (x1, y1), (x2, y2), color, 2)
                        cv2.putText(frame_copy, f'{score:.2f}', (x1, y1-10),
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

                    # Info overlay
                    current_fps = 1/inference_time if inference_time > 0 else 0
                    info_bg = frame_copy.copy()
                    cv2.rectangle(info_bg, (0, 0), (width, 80), (0, 0, 0), -1)
                    frame_copy = cv2.addWeighted(frame_copy, 0.7, info_bg, 0.3, 0)

                    cv2.putText(frame_copy, display_name, (10, 25),
                               cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 2)
                    cv2.putText(frame_copy, f"FPS: {current_fps:.1f}", (10, 50),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                    cv2.putText(frame_copy, f"Cars: {detections['num_detections']}", (200, 50),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
                    cv2.putText(frame_copy, f"Frame: {frame_count}/{total_frames}", (400, 50),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 200, 100), 2)

                    output_videos[model_key].write(frame_copy)

                except Exception as e:
                    print(f"\n❌ Error processing frame {frame_count} with {display_name}: {e}")

            progress_bar.update(1)

    finally:
        progress_bar.close()
        cap.release()
        for writer in output_videos.values():
            writer.release()

    # Print statistics
    print("\n" + "="*70)
    print(" "*20 + "VIDEO PROCESSING STATISTICS")
    print("="*70)

    for model_key in models.keys():
        display_name = get_display_name(model_key)
        avg_time = np.mean(processing_times[model_key])
        avg_fps = 1 / avg_time if avg_time > 0 else 0
        avg_detections = np.mean(detection_counts[model_key])

        print(f"\n{display_name}:")
        print(f"  Average inference time: {avg_time*1000:.2f}ms")
        print(f"  Average FPS: {avg_fps:.1f}")
        print(f"  Average detections: {avg_detections:.1f}")
        print(f"  Output: {config.output_dir}/videos/{display_name.replace(' ', '_')}_{config.timestamp}.mp4")

    print("="*70)


# =============================
# MAIN EVALUATION PIPELINE
# =============================
def run_comprehensive_evaluation(config):
    """Run complete comprehensive evaluation"""
    # Load all models
    models = ModelLoader.load_all_models(config)

    if len(models) == 0:
        print("Error: No models loaded.")
        return

    # Evaluate on images
    all_results, image_paths = evaluate_all_models(models, config)

    # Aggregate metrics
    aggregated_metrics = aggregate_all_metrics(all_results)

    # Create comprehensive comparison
    create_comprehensive_comparison(aggregated_metrics, config)

    # Save results
    save_all_results(aggregated_metrics, config)

    # Visualize sample predictions
    visualize_all_sample_predictions(models, image_paths, config, num_samples=3)

    # Process video
    if os.path.exists(config.test_video_path):
        process_video_all_models(models, config.test_video_path, config, max_frames=300)
    else:
        print(f"\n Video not found: {config.test_video_path}")

    print("\n" + "="*80)
    print("COMPREHENSIVE EVALUATION COMPLETED SUCCESSFULLY!")
    print(f"All results saved to: {config.output_dir}")
    print("\nGenerated outputs:")
    print(f"   • Comprehensive comparison chart")
    print(f"   • Individual model metrics (JSON & CSV)")
    print(f"   • Sample prediction visualizations")
    print(f"   • Processed videos for each model")
    print("="*80)


# =============================
# EXECUTE
# =============================
if __name__ == "__main__":
    run_comprehensive_evaluation(config)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
 Output directories created at: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3
 Timestamp: 20251206_075256

                    LOADING ALL MODELS
 YOLOv5n loaded successfully
 YOLOv8n loaded successfully
 YOLOv11n loaded successfully
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 165MB/s]


 Faster R-CNN loaded successfully


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]



 DETR loaded successfully

 Total models loaded: 5

                    EVALUATING ALL MODELS
Number of test images: 40
IoU threshold: 0.45


Evaluating YOLOv5n...
----------------------------------------------------------------------


YOLOv5n: 100%|██████████| 40/40 [00:29<00:00,  1.36it/s, detections=31, time=15.6ms]



Evaluating YOLOv8n...
----------------------------------------------------------------------


YOLOv8n: 100%|██████████| 40/40 [00:01<00:00, 29.10it/s, detections=32, time=15.6ms]



Evaluating YOLOv11n...
----------------------------------------------------------------------


YOLOv11n: 100%|██████████| 40/40 [00:01<00:00, 27.03it/s, detections=32, time=17.1ms]



Evaluating Faster R-CNN...
----------------------------------------------------------------------


Faster R-CNN: 100%|██████████| 40/40 [00:04<00:00,  8.74it/s, detections=34, time=72.8ms]



Evaluating DETR...
----------------------------------------------------------------------


DETR: 100%|██████████| 40/40 [00:05<00:00,  6.83it/s, detections=38, time=39.8ms]
  ax2.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
  ax3.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
  ax4.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
  ax6.set_xticklabels(model_names, rotation=20, ha='right', fontsize=8)
  plt.tight_layout()



 Comprehensive comparison saved to: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/comparisons/comprehensive_comparison_20251206_075256.png
 Also saved to metrics folder: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/metrics/comprehensive_comparison_20251206_075256.png
 Metrics saved to JSON (comparisons): /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/comparisons/all_models_metrics_20251206_075256.json
 Metrics saved to CSV (comparisons): /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/comparisons/all_models_metrics_20251206_075256.csv
 Metrics saved to JSON (metrics): /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/metrics/all_models_metrics_20251206_075256.json
 Metrics saved to CSV (metrics): /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/metrics/all_models_metrics_20251206_075256.csv

                                   COMPREHENSIVE EVALUATION RESULTS
              precision

Video processing: 100%|██████████| 300/300 [02:25<00:00,  2.06it/s]


                    VIDEO PROCESSING STATISTICS

YOLOv5n:
  Average inference time: 15.94ms
  Average FPS: 62.7
  Average detections: 28.0
  Output: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/videos/YOLOv5n_20251206_075256.mp4

YOLOv8n:
  Average inference time: 11.19ms
  Average FPS: 89.4
  Average detections: 27.7
  Output: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/videos/YOLOv8n_20251206_075256.mp4

YOLOv11n:
  Average inference time: 13.60ms
  Average FPS: 73.5
  Average detections: 27.6
  Output: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/videos/YOLOv11n_20251206_075256.mp4

Faster R-CNN:
  Average inference time: 152.52ms
  Average FPS: 6.6
  Average detections: 27.6
  Output: /content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_3/videos/Faster_R-CNN_20251206_075256.mp4

DETR:
  Average inference time: 168.18ms
  Average FPS: 5.9
  Average detections: 21.6
  Output: /content/drive/MyDrive/CuoiKi/compreh




In [None]:
import shutil
import os

source_folder = '/content/comprehensive_evaluation_results_2'
destination_folder = '/content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_2'

try:
    # Check if the destination folder already exists
    if os.path.exists(destination_folder):
        print(f"Destination folder '{destination_folder}' already exists. Deleting it to ensure a clean copy.")
        shutil.rmtree(destination_folder)

    shutil.copytree(source_folder, destination_folder)
    print(f"Successfully copied '{source_folder}' to '{destination_folder}'")
except FileNotFoundError:
    print(f"Error: Source folder '{source_folder}' not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Successfully copied '/content/comprehensive_evaluation_results_2' to '/content/drive/MyDrive/CuoiKi/comprehensive_evaluation_results_2'
