# RF-DETR Model Evaluation for Single-Class Vehicle Detection

**Research Project**: Comparative Analysis of CNN vs Vision Transformer

**Author**: Abdullah Waraich


## 1. Environment Setup

First, let's check GPU availability and install the required libraries for RF-DETR evaluation.

In [None]:
# Check GPU availability
!nvidia-smi


In [None]:
# Install required libraries
!pip install -q rfdetr==1.2.1 supervision==0.26.1 scikit-learn seaborn


## 2. Upload Model and Dataset

Upload trained RF-DETR model (.pth file) and test dataset.


In [None]:
from google.colab import files
import os
import zipfile

print(" Upload modeel (.pth file) and dataset zip file")

uploaded = files.upload()

# Process uploaded files
model_path = None
dataset_path = None

for filename in uploaded.keys():
    if filename.endswith('.pth'):
        model_path = filename
        print(f"✅ Model found: {filename}")
    elif filename.endswith('.zip'):
        # Extract dataset
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall('test_dataset')
        dataset_path = 'test_dataset'
        print(f"✅ Dataset extracted to: {dataset_path}")

        # Show dataset structure
        print("\n📁 Dataset structure:")
        for root, dirs, files in os.walk(dataset_path):
            level = root.replace(dataset_path, '').count(os.sep)
            indent = ' ' * 2 * level
            print(f"{indent}{os.path.basename(root)}/")
            subindent = ' ' * 2 * (level + 1)
            for file in files[:3]:  # Show first 3 files
                print(f"{subindent}{file}")
            if len(files) > 3:
                print(f"{subindent}... and {len(files)-3} more files")

if not model_path:
    print("No .pth model file found. Please upload your trained RF-DETR model.")
if not dataset_path:
    print("No dataset zip file found. Please upload your test dataset.")

## 3. Load Model and Dataset

Load the RF-DETR model and prepare the test dataset for single-class vehicle detection.

In [None]:
import supervision as sv
from rfdetr import RFDETRMedium
from PIL import Image
import numpy as np
import json

# Load RF-DETR model with trained weights
try:
    model = RFDETRMedium(pretrain_weights=model_path)
    print("RF-DETR model loaded successfully!")

    # Optimize for inference
    model.optimize_for_inference()
    print("Model optimized for inference!")

except Exception as e:
    print(f"Error loading model: {e}")
    raise

# Load test dataset in COCO format
print("\n Loading test dataset")

try:
    # Look for COCO annotation file
    coco_files = []
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.json') and 'annotation' in file.lower():
                coco_files.append(os.path.join(root, file))

    if not coco_files:
        # Look for any JSON file
        for root, dirs, files in os.walk(dataset_path):
            for file in files:
                if file.endswith('.json'):
                    coco_files.append(os.path.join(root, file))

    print(f"Found annotation files: {coco_files}")

    # Find images directory
    images_dir = None
    for root, dirs, files in os.walk(dataset_path):
        if 'images' in os.path.basename(root).lower() or any(f.lower().endswith(('.jpg', '.jpeg', '.png')) for f in files):
            images_dir = root
            break

    if not images_dir:
        images_dir = dataset_path  # Fallback to root

    print(f"Images directory: {images_dir}")

    # Load dataset using supervision
    test_ds = sv.DetectionDataset.from_coco(
        images_directory_path=images_dir,
        annotations_path=coco_files[0]
    )

    print(f"Dataset loaded successfully!")
    print(f"Dataset classes: {test_ds.classes}")
    print(f"Number of test images: {len(test_ds)}")

    # Define single class for evaluation
    CLASSES = ['vehicle']  # Single class

    print(f"\n Model Configuration:")
    print(f"  Target class: {CLASSES[0]}")


except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

## 4. Run Inference with Timing Analysis

Run inference on all test images and measure performance.

In [None]:
import time
from tqdm import tqdm

print("Running RF-DETR inference on test dataset")
print(f"Processing {len(test_ds)} total images")
print(f"Class to be detected: {CLASSES[0]}")

# Storage for results
all_predictions = []
all_ground_truth = []
inference_times = []
image_paths = []

# Process each image in the dataset
for i in tqdm(range(len(test_ds)), desc="Running inference"):
    try:
        path, image, annotations = test_ds[i]
        image_paths.append(path)

        # Load image for model
        image_pil = Image.open(path)

        # Measure inference time
        start_time = time.time()
        detections = model.predict(image_pil, threshold=0.5)
        end_time = time.time()

        inference_time = end_time - start_time
        inference_times.append(inference_time)

        # Store predictions (no class filtering needed for single class)
        all_predictions.append(detections)

        # Store ground truth annotations
        all_ground_truth.append(annotations)

    except Exception as e:
        print(f"Error processing image {i}: {e}")
        continue

print(f"\nInference complete")
print(f"Average inference time: {np.mean(inference_times):.4f} seconds")
print(f"Average FPS: {1/np.mean(inference_times):.2f}")
print(f"Processed {len(all_predictions)} images")

## 5. Calculate Performance Metrics

Calculate performance metrics for vehicle detection.

In [None]:
from supervision.metrics import MeanAveragePrecision
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd

print("Calculating Performance Metrics")


# Calculate mAP using supervision
map_metric = MeanAveragePrecision()
map_result = map_metric.update(all_predictions, all_ground_truth).compute()

print("MEAN AVERAGE PRECISION (mAP) RESULTS:")
print(f"mAP@0.5: {map_result.map50:.3f}")
print(f"mAP@0.5:0.95: {map_result.map50_95:.3f}")
print(f"mAP@0.75: {map_result.map75:.3f}")

# Calculate precision, recall and F1 Score
print("\nPRECISION, RECALL, F1-SCORE ANALYSIS:")

# Create binary classification arrays for vehicle detection
all_pred_binary = []
all_true_binary = []

for pred, gt in zip(all_predictions, all_ground_truth):
    # Binary: has vehicle or not
    has_prediction = len(pred) > 0
    has_ground_truth = len(gt) > 0

    all_pred_binary.append(1 if has_prediction else 0)
    all_true_binary.append(1 if has_ground_truth else 0)

# Calculate binary metrics
all_pred_binary = np.array(all_pred_binary)
all_true_binary = np.array(all_true_binary)

if np.sum(all_true_binary) > 0:  # Only if we have positive samples
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_true_binary, all_pred_binary, average='binary', zero_division=0
    )

    print(f"\n{CLASSES[0].upper()} DETECTION:")
    print(f"  Precision: {precision:.3f}")
    print(f"  Recall: {recall:.3f}")
    print(f"  F1-Score: {f1:.3f}")

    # Calculate additional metrics
    total_images = len(all_true_binary)
    true_positives = np.sum((all_true_binary == 1) & (all_pred_binary == 1))
    false_positives = np.sum((all_true_binary == 0) & (all_pred_binary == 1))
    false_negatives = np.sum((all_true_binary == 1) & (all_pred_binary == 0))
    true_negatives = np.sum((all_true_binary == 0) & (all_pred_binary == 0))

    accuracy = (true_positives + true_negatives) / total_images

    print(f"\n DETAILED METRICS:")
    print(f"  Accuracy: {accuracy:.3f}")
    print(f"  True Positives: {true_positives}")
    print(f"  False Positives: {false_positives}")
    print(f"  False Negatives: {false_negatives}")
    print(f"  True Negatives: {true_negatives}")

else:
    print(f"\n{CLASSES[0].upper()}: No ground truth instances found")
    precision = recall = f1 = accuracy = 0.0


## 6. Generate Confusion Matrix

Create and visualize the confusion matrix for vehicle detection.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

print("Generating Confusion Matrix")

# Create confusion matrix for binary classification (vehicle/no vehicle)
binary_classes = ['No Vehicle', 'Vehicle']

# Generate confusion matrix
cm = confusion_matrix(all_true_binary, all_pred_binary, labels=[0, 1])

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Oranges',
            xticklabels=binary_classes,
            yticklabels=binary_classes)
plt.title('RF-DETR Confusion Matrix\nSingle-Class Vehicle Detection',
          fontsize=14, fontweight='bold')
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('True', fontsize=12)
plt.tight_layout()
plt.show()

# Print classification report
print("\n DETAILED CLASSIFICATION RESULTS:")
print(classification_report(all_true_binary, all_pred_binary,
                          target_names=binary_classes,
                          zero_division=0))

print(f"\n Summary Statistics:")
print(f"  Total images: {len(all_true_binary)}")
print(f"  Images with vehicles (GT): {np.sum(all_true_binary)}")
print(f"  Images with vehicles (Pred): {np.sum(all_pred_binary)}")
print(f"  Detection accuracy: {accuracy:.3f}")

## 7. Inference Time Analysis

Analyze the computational efficiency of the RF-DETR model.

In [None]:
print("RF-DETR INFERENCE TIME ANALYSIS")

# Calculate timing statistics
avg_time = np.mean(inference_times)
std_time = np.std(inference_times)
min_time = np.min(inference_times)
max_time = np.max(inference_times)
fps = 1 / avg_time

print(f"Timing Statistics:")
print(f"  Average time per image: {avg_time:.4f} seconds")
print(f"  Standard deviation: {std_time:.4f} seconds")
print(f"  Minimum time: {min_time:.4f} seconds")
print(f"  Maximum time: {max_time:.4f} seconds")
print(f"  Average FPS: {fps:.2f}")

# Plot inference time distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(inference_times, bins=30, alpha=0.7, color='orange', edgecolor='black')
plt.axvline(avg_time, color='red', linestyle='--',
           label=f'Mean: {avg_time:.4f}s')
plt.xlabel('Inference Time (seconds)')
plt.ylabel('Frequency')
plt.title('RF-DETR Inference Time Distribution')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(inference_times[:100], marker='o', markersize=2, alpha=0.7, color='orange')
plt.axhline(avg_time, color='red', linestyle='--',
           label=f'Mean: {avg_time:.4f}s')
plt.xlabel('Image Index')
plt.ylabel('Inference Time (seconds)')
plt.title('Inference Time per Image (First 100)')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## 8. Visual Results Analysis

Visualize detection results to assess RF-DETR performance.

In [None]:
print("VISUAL RESULTS ANALYSIS")

# Setup visualization components
color_palette = sv.ColorPalette.from_hex([
    "#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FECA57", "#FF9FF3"
])

# Select interesting images for visualization
num_samples = min(9, len(all_predictions))
sample_indices = np.linspace(0, len(all_predictions)-1, num_samples, dtype=int)

result_images = []
titles = []

for idx in sample_indices:
    img_path = image_paths[idx]
    pred = all_predictions[idx]
    gt = all_ground_truth[idx]

    # Load image
    image = Image.open(img_path)

    # Calculate text scale based on image size
    text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
    thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)

    # Create prediction labels (simplified for single class)
    pred_labels = [
        f"vehicle {confidence:.2f}"
        for confidence in pred.confidence
    ] if len(pred) > 0 else []

    # Annotate image with predictions
    annotated_image = image.copy()

    # Draw predictions in color
    if len(pred) > 0:
        bbox_annotator = sv.BoxAnnotator(color=color_palette, thickness=thickness)
        label_annotator = sv.LabelAnnotator(
            color=color_palette,
            text_color=sv.Color.WHITE,
            text_scale=text_scale
        )
        annotated_image = bbox_annotator.annotate(annotated_image, pred)
        annotated_image = label_annotator.annotate(annotated_image, pred, pred_labels)

    # Add ground truth boxes in white outline for comparison
    if len(gt) > 0:
        import cv2
        annotated_array = np.array(annotated_image)
        for box in gt.xyxy:
            x1, y1, x2, y2 = box.astype(int)
            cv2.rectangle(annotated_array, (x1, y1), (x2, y2), (255, 255, 255), max(1, thickness//2))
        annotated_image = Image.fromarray(annotated_array)

    result_images.append(annotated_image)

    # Create title with detection count
    gt_count = len(gt) if gt is not None else 0
    pred_count = len(pred) if pred is not None else 0
    titles.append(f"GT: {gt_count} | Pred: {pred_count}")

# Display results in grid
sv.plot_images_grid(
    images=result_images,
    grid_size=(3, 3),
    titles=titles,
    size=(15, 15)
)

print("\n Legend:")
print("  • Colored boxes: RF-DETR predictions with confidence scores")
print("  • White outlines: Ground truth annotations")
print("  • GT: Ground truth count | Pred: Prediction count")

## 9. Results Summary

Summary of all metrics for research comparison with YOLOv11.

In [None]:
import json
from datetime import datetime


# Create comprehensive results dictionary
results_dict = {
    'model_info': {
        'architecture': 'RF-DETR Medium',
        'approach': 'Vision Transformer-based',
        'classes': CLASSES,
        'num_classes': len(CLASSES),
        'detection_type': 'single-class'
    },
    'dataset_info': {
        'num_test_images': len(test_ds),
        'image_resolution': '640x640',
        'processed_successfully': len(all_predictions)
    },
    'performance_metrics': {
        'mAP': {
            'mAP50': float(map_result.map50),
            'mAP50_95': float(map_result.map50_95),
            'mAP75': float(map_result.map75)
        },
        'binary_classification': {
            'precision': float(precision),
            'recall': float(recall),
            'f1_score': float(f1),
            'accuracy': float(accuracy)
        },
        'detection_counts': {
            'true_positives': int(true_positives),
            'false_positives': int(false_positives),
            'false_negatives': int(false_negatives),
            'true_negatives': int(true_negatives)
        }
    },
    'computational_efficiency': {
        'avg_inference_time': float(avg_time),
        'std_inference_time': float(std_time),
        'min_inference_time': float(min_time),
        'max_inference_time': float(max_time),
        'avg_fps': float(fps)
    },
    'evaluation_timestamp': datetime.now().isoformat()
}

# Display the summary
print("RF-DETR EVALUATION SUMMARY")

print(f" Dataset: {len(test_ds)} test images")
print(f" Target class: {CLASSES[0]}")
print("\n KEY PERFORMANCE METRICS:")
print(f"  - mAP@0.5: {map_result.map50:.3f}")
print(f"  - mAP@0.5:0.95: {map_result.map50_95:.3f}")
print(f"  - Precision: {precision:.3f}")
print(f"  - Recall: {recall:.3f}")
print(f"  - F1-Score: {f1:.3f}")
print(f"  - Accuracy: {accuracy:.3f}")
print("\n COMPUTATIONAL EFFICIENCY:")
print(f"  - Average inference time: {avg_time:.4f} seconds")
print(f"  - Average FPS: {fps:.2f}")
print(f"  - Processing speed: {len(test_ds)/sum(inference_times):.2f} images/second")

print("\n DETECTION PERFORMANCE:")
print(f"  - True Positives: {true_positives}")
print(f"  - False Positives: {false_positives}")
print(f"  - False Negatives: {false_negatives}")
print(f"  - True Negatives: {true_negatives}")

# Save results to JSON file
with open('rfdetr_single_class_results.json', 'w') as f:
    json.dump(results_dict, f, indent=2)

print("\n Results saved to: rfdetr_single_class_results.json")

# Download results file
try:
    files.download('rfdetr_single_class_results.json')
    print("Results file downloaded successfully!")
except:
    print("Results file saved locally (download from file panel)")

## 10. Prepare Data for Statistical Comparison

Generate detection results in format suitable for McNemar's test comparison with YOLOv11.

In [None]:
print("PREPARING DATA FOR STATISTICAL COMPARISON")


def calculate_iou(box1, box2):
    #Calculate Intersection over Union of two bounding boxes
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    # Calculate intersection
    inter_x_min = max(x1_min, x2_min)
    inter_y_min = max(y1_min, y2_min)
    inter_x_max = min(x1_max, x2_max)
    inter_y_max = min(y1_max, y2_max)

    if inter_x_max <= inter_x_min or inter_y_max <= inter_y_min:
        return 0.0

    inter_area = (inter_x_max - inter_x_min) * (inter_y_max - inter_y_min)

    # Calculate union
    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)
    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area > 0 else 0.0

# Create per-image detection results for McNemar's test
detection_results = []

for i, (pred, gt, img_path) in enumerate(zip(all_predictions, all_ground_truth, image_paths)):
    from pathlib import Path
    img_name = Path(img_path).stem

    # Calculate if detection was successful (IoU-based)
    has_ground_truth = len(gt) > 0
    has_prediction = len(pred) > 0

    # Check if any prediction matches ground truth (IoU > 0.5)
    detection_success = False
    if has_ground_truth and has_prediction:
        for gt_box in gt.xyxy:
            for pred_box, conf in zip(pred.xyxy, pred.confidence):
                if conf > 0.5:
                    iou = calculate_iou(gt_box, pred_box)
                    if iou > 0.5:
                        detection_success = True
                        break
            if detection_success:
                break
    elif not has_ground_truth and not has_prediction:
        detection_success = True  # Correct negative

    detection_results.append({
        'image_name': img_name,
        'has_ground_truth': has_ground_truth,
        'has_prediction': has_prediction,
        'detection_success': detection_success,
        'num_gt_objects': len(gt),
        'num_pred_objects': len(pred),
        'inference_time': inference_times[i]
    })

# Convert to DataFrame
results_df = pd.DataFrame(detection_results)

# Calculate success rate
success_rate = results_df['detection_success'].mean()
print(f"🎯 Overall Detection Success Rate: {success_rate:.3f}")

# Save detection results for McNemar's test
results_df.to_csv('rfdetr_single_class_detection_results.csv', index=False)

print(f"\n Detection Results Summary:")
print(f"  - Images with ground truth: {results_df['has_ground_truth'].sum()}")
print(f"  - Images with predictions: {results_df['has_prediction'].sum()}")
print(f"  - Successful detections: {results_df['detection_success'].sum()}")
print(f"  - Success rate: {success_rate:.1%}")

# Generate research comparison summary
comparison_summary = f"""
RF-DETR vs YOLOv11 SINGLE-CLASS COMPARISON SUMMARY
==================================================

Research Project: CNN vs Vision Transformer for Satellite Vehicle Detection
Student: Abdullah Waraich (ID: 2401554)
Supervisor: Dr. Adrian Clark

ARCHITECTURE COMPARISON:
• YOLOv11: CNN-based single-stage detector
• RF-DETR: Vision Transformer-based detector

MODEL CONFIGURATION:
• Single class: {CLASSES[0]}
• Simplified evaluation pipeline
• Clean comparison without class imbalance issues

KEY FINDINGS:
• RF-DETR mAP@0.5: {map_result.map50:.3f}
• YOLOv11 mAP@0.5: [To be filled from YOLOv11 evaluation]
• RF-DETR inference time: {avg_time:.4f}s
• YOLOv11 inference time: [To be filled from YOLOv11 evaluation]
• RF-DETR accuracy: {accuracy:.3f}
• YOLOv11 accuracy: [To be filled from YOLOv11 evaluation]

NEXT STEPS:
1. Run McNemar's Test for statistical significance
2. Compare computational efficiency
3. Analyze failure cases and strengths
4. Complete dissertation analysis

Files generated:
• rfdetr_single_class_results.json
• rfdetr_single_class_detection_results.csv
"""

# Save comparison summary
with open('research_comparison_single_class.txt', 'w') as f:
    f.write(comparison_summary)

print(comparison_summary)