In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import numpy as np
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import linear_sum_assignment
from model_evaluation import *


In [2]:
CLASS_NAMES = ["apple", "avocado", "banana", "kiwi", "lemon", "orange", "pear", "pomegranate", "strawberry", "watermelon"]
TEST_IMAGES = "dataset/split/test/images"
TEST_LABELS = "dataset/split/test/labels"
CONFIDENCE = 0.5
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
RCNN_MODEL_WEIGHTS = Path("models/faster_rcnn_fruits.pth")
RCNN_OUTPUT_DIR = "evaluation_results/faster_rcnn"
YOLOV8N_MODEL_WEIGHTS = Path("runs/fruits_yolov8n/weights/best.pt")
YOLOV8N_OUTPUT_DIR = "evaluation_results/yolov8n"
YOLOV8M_MODEL_WEIGHTS = Path("runs/fruits_yolov8m/weights/best.pt")
YOLOV8M_OUTPUT_DIR = "evaluation_results/yolov8m"
YOLO11L_MODEL_WEIGHTS = Path("runs/fruits_yolov11l/weights/best.pt")
YOLO11L_OUTPUT_DIR = "evaluation_results/yolov11l"


In [3]:
all_results = {}

Evaluation for Faster_RCNN

In [4]:
# Run evaluation
rcnn_metrics = evaluate_faster_rcnn(
    model_weights_path=RCNN_MODEL_WEIGHTS,
    test_image_dir=TEST_IMAGES,
    test_label_dir=TEST_LABELS,
    output_dir=RCNN_OUTPUT_DIR,
    confidence_threshold=CONFIDENCE,
    device=DEVICE
)

all_results['Faster_RCNN'] = rcnn_metrics

print("\n" + "="*60)
print("FASTER_RCNN MODEL EVALUATION COMPLETE!")
print("="*60)
print(f"mAP@0.5: {rcnn_metrics.map_50:.4f}")
print(f"mAP@0.75: {rcnn_metrics.map_75:.4f}")
print(f"Recall: {rcnn_metrics.recall_50:.4f}")
print(f"Precision: {rcnn_metrics.precision_50:.4f}")

Loading Faster R-CNN model...




Found 127 test images
Processing 10/127...
Processing 20/127...
Processing 30/127...
Processing 40/127...
Processing 50/127...
Processing 60/127...




Processing 70/127...
Processing 80/127...
Processing 90/127...
Processing 100/127...
Processing 110/127...
Processing 120/127...

Generating evaluation report...
BOUNDING BOX EVALUATION REPORT: Faster R-CNN

Confidence Threshold: 0.5

------------------------------------------------------------
PRIMARY METRICS
------------------------------------------------------------
mAP@0.5:          0.8032
mAP@0.75:         0.7086
mAP@[0.5:0.95]:   0.6415
Mean IoU:         0.8877

------------------------------------------------------------
DETECTION METRICS (IoU@0.5)
------------------------------------------------------------
Recall:           0.8709
Precision:        0.6921
F1-Score:         0.7713

------------------------------------------------------------
ERROR ANALYSIS
------------------------------------------------------------
False Positives:  141
False Negatives:  47
Total Predictions: 458
Total Ground Truths: 364



Report saved to: evaluation_results/faster_rcnn

FASTER_RCNN MODEL EV

Evalution for Yolov8n

In [5]:
# Run evaluation
yolov8n_metrics = evaluate_yolo(
    model_weights_path=YOLOV8N_MODEL_WEIGHTS,
    test_image_dir=TEST_IMAGES,
    test_label_dir=TEST_LABELS,
    output_dir=YOLOV8N_OUTPUT_DIR,
    confidence_threshold=CONFIDENCE,
    device=DEVICE
)

all_results['YoloV8n'] = yolov8n_metrics

print("\n" + "="*60)
print("YOLOV8N MODEL EVALUATION COMPLETE!")
print("="*60)
print(f"mAP@0.5: {yolov8n_metrics.map_50:.4f}")
print(f"mAP@0.75: {yolov8n_metrics.map_75:.4f}")
print(f"Recall: {yolov8n_metrics.recall_50:.4f}")
print(f"Precision: {yolov8n_metrics.precision_50:.4f}")

Loading YOLO model from runs/fruits_yolov8n/weights/best.pt...
Found 127 test images
Processing 10/127...
Processing 20/127...
Processing 30/127...
Processing 40/127...
Processing 50/127...
Processing 60/127...
Processing 70/127...
Processing 80/127...
Processing 90/127...
Processing 100/127...
Processing 110/127...
Processing 120/127...

Generating evaluation report...
BOUNDING BOX EVALUATION REPORT: YOLO (best)

Confidence Threshold: 0.5

------------------------------------------------------------
PRIMARY METRICS
------------------------------------------------------------
mAP@0.5:          0.7970
mAP@0.75:         0.7002
mAP@[0.5:0.95]:   0.6372
Mean IoU:         0.8908

------------------------------------------------------------
DETECTION METRICS (IoU@0.5)
------------------------------------------------------------
Recall:           0.8297
Precision:        0.8629
F1-Score:         0.8459

------------------------------------------------------------
ERROR ANALYSIS
--------------

Evalution for Yolov8m

In [6]:
# Run evaluation
yolov8m_metrics = evaluate_yolo(
    model_weights_path=YOLOV8M_MODEL_WEIGHTS,
    test_image_dir=TEST_IMAGES,
    test_label_dir=TEST_LABELS,
    output_dir=YOLOV8M_OUTPUT_DIR,
    confidence_threshold=CONFIDENCE,
    device=DEVICE
)

all_results['YoloV8m'] = yolov8m_metrics

print("\n" + "="*60)
print("YOLOV8M MODEL EVALUATION COMPLETE!")
print("="*60)
print(f"mAP@0.5: {yolov8m_metrics.map_50:.4f}")
print(f"mAP@0.75: {yolov8m_metrics.map_75:.4f}")
print(f"Recall: {yolov8m_metrics.recall_50:.4f}")
print(f"Precision: {yolov8m_metrics.precision_50:.4f}")

Loading YOLO model from runs/fruits_yolov8m/weights/best.pt...
Found 127 test images
Processing 10/127...
Processing 20/127...
Processing 30/127...
Processing 40/127...
Processing 50/127...
Processing 60/127...
Processing 70/127...
Processing 80/127...
Processing 90/127...
Processing 100/127...
Processing 110/127...
Processing 120/127...

Generating evaluation report...
BOUNDING BOX EVALUATION REPORT: YOLO (best)

Confidence Threshold: 0.5

------------------------------------------------------------
PRIMARY METRICS
------------------------------------------------------------
mAP@0.5:          0.8019
mAP@0.75:         0.7036
mAP@[0.5:0.95]:   0.6486
Mean IoU:         0.8923

------------------------------------------------------------
DETECTION METRICS (IoU@0.5)
------------------------------------------------------------
Recall:           0.8407
Precision:        0.8768
F1-Score:         0.8583

------------------------------------------------------------
ERROR ANALYSIS
--------------

Evalution for Yolo11l

In [8]:
# Run evaluation
yolo11l_metrics = evaluate_yolo(
    model_weights_path=YOLO11L_MODEL_WEIGHTS,
    test_image_dir=TEST_IMAGES,
    test_label_dir=TEST_LABELS,
    output_dir=YOLO11L_OUTPUT_DIR,
    confidence_threshold=CONFIDENCE,
    device=DEVICE
)

all_results['Yolo11l'] = yolo11l_metrics

print("\n" + "="*60)
print("YOLO11L MODEL EVALUATION COMPLETE!")
print("="*60)
print(f"mAP@0.5: {yolo11l_metrics.map_50:.4f}")
print(f"mAP@0.75: {yolo11l_metrics.map_75:.4f}")
print(f"Recall: {yolo11l_metrics.recall_50:.4f}")
print(f"Precision: {yolo11l_metrics.precision_50:.4f}")

Loading YOLO model from runs/fruits_yolov11l/weights/best.pt...
Found 127 test images
Processing 10/127...
Processing 20/127...
Processing 30/127...
Processing 40/127...
Processing 50/127...
Processing 60/127...
Processing 70/127...
Processing 80/127...
Processing 90/127...
Processing 100/127...
Processing 110/127...
Processing 120/127...

Generating evaluation report...
BOUNDING BOX EVALUATION REPORT: YOLO (best)

Confidence Threshold: 0.5

------------------------------------------------------------
PRIMARY METRICS
------------------------------------------------------------
mAP@0.5:          0.7119
mAP@0.75:         0.6930
mAP@[0.5:0.95]:   0.5984
Mean IoU:         0.8919

------------------------------------------------------------
DETECTION METRICS (IoU@0.5)
------------------------------------------------------------
Recall:           0.7802
Precision:        0.8987
F1-Score:         0.8353

------------------------------------------------------------
ERROR ANALYSIS
-------------

Model Comparison Summary

In [9]:
# Print comparison table
print("\n" + "="*90)
print("MODEL COMPARISON - BOUNDING BOX ACCURACY")
print("="*90)
print(f"{'Model':<12} {'mAP@0.5':<10} {'mAP@0.75':<10} {'mAP@.5:.95':<12} "
        f"{'Mean IoU':<10} {'Recall':<10} {'Precision':<10}")
print("-"*90)

for model_name, metrics in all_results.items():
    f1 = (2 * metrics.precision_50 * metrics.recall_50 / 
            (metrics.precision_50 + metrics.recall_50) 
            if (metrics.precision_50 + metrics.recall_50) > 0 else 0)
    
    print(f"{model_name:<12} {metrics.map_50:<10.4f} {metrics.map_75:<10.4f} "
            f"{metrics.map_50_95:<12.4f} {metrics.mean_iou:<10.4f} "
            f"{metrics.recall_50:<10.4f} {metrics.precision_50:<10.4f}")

print("="*90)

# Find best model for each metric
print("\nBEST MODELS:")
print(f"  Best mAP@0.5: {max(all_results.items(), key=lambda x: x[1].map_50)[0]}")
print(f"  Best mAP@0.75: {max(all_results.items(), key=lambda x: x[1].map_75)[0]}")
print(f"  Best Recall: {max(all_results.items(), key=lambda x: x[1].recall_50)[0]}")
print(f"  Best Precision: {max(all_results.items(), key=lambda x: x[1].precision_50)[0]}")


MODEL COMPARISON - BOUNDING BOX ACCURACY
Model        mAP@0.5    mAP@0.75   mAP@.5:.95   Mean IoU   Recall     Precision 
------------------------------------------------------------------------------------------
Faster_RCNN  0.8032     0.7086     0.6415       0.8877     0.8709     0.6921    
YoloV8n      0.7970     0.7002     0.6372       0.8908     0.8297     0.8629    
YoloV8m      0.8019     0.7036     0.6486       0.8923     0.8407     0.8768    
Yolo11l      0.7119     0.6930     0.5984       0.8919     0.7802     0.8987    

BEST MODELS:
  Best mAP@0.5: Faster_RCNN
  Best mAP@0.75: Faster_RCNN
  Best Recall: Faster_RCNN
  Best Precision: Yolo11l
