In [26]:
import torch
import cv2 as cv
import numpy as np
import os
import json
from ultralytics import YOLO
from torchmetrics.detection.mean_ap import MeanAveragePrecision

In [27]:
COCO_ANNOTATIONS_PATH = r"D:\Object Detection\Object-Detector-and-Distance-Estimator\COCO dataset\annotations\instances_val2017.json"
COCO_IMAGES_PATH = r"D:\Object Detection\Object-Detector-and-Distance-Estimator\COCO dataset\val2017"
KNOWN_DISTANCES_PATH = r"D:\Object Detection\Object-Detector-and-Distance-Estimator\known_distances"


In [28]:
# Load YOLO model
model = YOLO('yolov8m.pt')  

In [29]:
# Load COCO ground truth annotations
def load_coco_annotations():
    with open(COCO_ANNOTATIONS_PATH, 'r') as f:
        data = json.load(f)
    annotations = {}
    for ann in data['annotations']:
        image_id = ann['image_id']
        x, y, width, height = ann['bbox']
        bbox = [x, y, width, height]  # Keep in COCO format
        category_id = ann['category_id']
        if image_id not in annotations:
            annotations[image_id] = []
        annotations[image_id].append((bbox, category_id))
    return annotations

In [30]:
# Initialize mAP metric
metric = MeanAveragePrecision(iou_thresholds=[0.5, 0.75, 0.95])

In [31]:
# Function to evaluate object detection accuracy
def evaluate_object_detection():
    ground_truth_annotations = load_coco_annotations()
    predictions = []
    gt_data = []

    # Batch processing
    batch_size = 16  # Adjust based on available memory
    image_paths = [os.path.join(COCO_IMAGES_PATH, f) for f in os.listdir(COCO_IMAGES_PATH) if f.endswith(".jpg")]

    for i in range(0, len(image_paths), batch_size):
        batch = image_paths[i:i + batch_size]  # Get a batch of images
        results = model.predict(batch, conf=0.25, max_det=100, stream=True, verbose=False)

        for img_path, result in zip(batch, results):  # Process results one by one
            image_id = int(os.path.basename(img_path).split('.')[0])

            detections = result.boxes.data.cpu().numpy()
            pred_boxes = []
            pred_scores = []
            pred_labels = []

            for detection in detections:
                x1, y1, x2, y2, confidence, class_id = detection
                pred_boxes.append([x1, y1, x2 - x1, y2 - y1])  # Convert to COCO format
                pred_scores.append(confidence)
                pred_labels.append(int(class_id))

            if image_id in ground_truth_annotations:
                gt_boxes = torch.tensor([box for box, _ in ground_truth_annotations[image_id]])
                gt_labels = torch.tensor([label for _, label in ground_truth_annotations[image_id]])

                predictions.append({
                    "boxes": torch.tensor(pred_boxes),
                    "scores": torch.tensor(pred_scores),
                    "labels": torch.tensor(pred_labels)
                })

                gt_data.append({
                    "boxes": gt_boxes,
                    "labels": gt_labels
                })

    metric.update(predictions, gt_data)
    mAP_score = metric.compute()
    print("Mean Average Precision (mAP):", mAP_score)


In [32]:
# Function to evaluate distance estimation accuracy
def evaluate_distance_estimation():
    actual_distances = []
    predicted_distances = []
    
    for filename in os.listdir(KNOWN_DISTANCES_PATH):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img_path = os.path.join(KNOWN_DISTANCES_PATH, filename)
            img = cv.imread(img_path)
            
            # Extract actual distance from filename
            import re
            match = re.search(r"(\d+(\.\d+)?)m", filename)
            if match:
                actual_distance = float(match.group(1))
            else:
                print(f"Skipping {filename}, could not extract distance.")
                continue
            
            # Define focal length and real-world object width
            focal_length = 600  # Adjust based on calibration
            real_width = 16  # Example: person shoulder width
            
            detections = model.predict(img, verbose=False)[0].boxes.data.cpu().numpy()
            
            for detection in detections:
                width_pixel = detection[2] - detection[0]
                if width_pixel == 0:
                    continue  # Avoid division by zero
                estimated_distance = (real_width * focal_length) / width_pixel
                
                actual_distances.append(actual_distance)
                predicted_distances.append(estimated_distance)
    
    if actual_distances and predicted_distances:
        mae = np.mean(np.abs(np.array(actual_distances) - np.array(predicted_distances)))
        mse = np.mean((np.array(actual_distances) - np.array(predicted_distances))**2)
        print(f"Mean Absolute Error (MAE): {mae:.3f} meters")
        print(f"Mean Squared Error (MSE): {mse:.3f}")
    else:
        print("No valid distance estimations found.")

In [33]:
# Run evaluations
evaluate_object_detection()
#evaluate_distance_estimation()

Mean Average Precision (mAP): {'map': tensor(7.5982e-06), 'map_50': tensor(8.5805e-06), 'map_75': tensor(8.5805e-06), 'map_small': tensor(0.), 'map_medium': tensor(0.), 'map_large': tensor(1.2114e-05), 'mar_1': tensor(0.0002), 'mar_10': tensor(0.0002), 'mar_100': tensor(0.0002), 'mar_small': tensor(0.), 'mar_medium': tensor(0.), 'mar_large': tensor(0.0003), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.), 'classes': tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
        78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90], dtype=torch.int32)}
