In [8]:
from ultralytics import YOLO
import cv2
import os
import matplotlib.pyplot as plt
import random
import torch
import imgaug.augmenters as iaa

# Load pre-trained YOLOv9 model
model = YOLO("yolov9e.pt")

# Modify last layer for car detection
num_car_classes = 1  # Since we're detecting only cars
model.model.anchor_vec = torch.tensor([[3.5067, 4.7690], [4.9702, 6.8688], [6.9725, 9.5452]])
model.model.nc = num_car_classes
model.model.n = num_car_classes + 5  # 5 is the number of default attributes (4 box coordinates + 1 objectness)

# Specify the path to the 'images' folder
folder_path = "./images"

# Specify the output folder for processed images
output_folder = "./Processed images"

# Check if the output folder exists, and if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# List all image files in the folder
image_files = [f for f in os.listdir(folder_path) if f.endswith('.png')]

# Select a random sample of 10 images
selected_image_files = random.sample(image_files, 10)

# Data Augmentation
seq = iaa.Sequential([
    iaa.Fliplr(0.5),  # Horizontal flips
    iaa.Affine(rotate=(-10, 10)),  # Rotate images
    iaa.GaussianBlur(sigma=(0, 3.0))  # Apply Gaussian blur
])

# Function to process an individual image and return the processed image
def process_image(image_file):
    image_path = os.path.join(folder_path, image_file)
    img = cv2.imread(image_path)
    
    # Data Augmentation
    img_aug = seq(image=img)
    results = model(img_aug)
    return img_aug, results

# Adjust Detection Threshold
model.conf = 0.3  # Lowering the confidence threshold

# Define function to compute precision and recall
def compute_precision_recall(gt_boxes, pred_boxes, iou_threshold=0.5):
    """Compute precision and recall for object detection."""
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    # Calculate IoU for each predicted box with ground truth boxes
    ious = torch.zeros(len(pred_boxes), len(gt_boxes))
    for i, pred_box in enumerate(pred_boxes):
        for j, gt_box in enumerate(gt_boxes):
            ious[i, j] = box_iou(pred_box, gt_box)

    # Match predictions to ground truth boxes
    for j in range(len(gt_boxes)):
        # Find the best matching prediction for each ground truth box
        best_iou, best_pred_idx = torch.max(ious[:, j], dim=0)
        if best_iou >= iou_threshold:
            true_positives += 1
            # Remove the matched prediction to avoid double counting
            ious[best_pred_idx, :] = -1
        else:
            false_negatives += 1

    # Count false positives
    false_positives = len(pred_boxes) - true_positives

    # Compute precision and recall
    precision = true_positives / (true_positives + false_positives + 1e-6)  # Add epsilon to avoid division by zero
    recall = true_positives / (true_positives + false_negatives + 1e-6)  # Add epsilon to avoid division by zero

    return precision, recall

# Calculate Average Precision (AP)
def calculate_ap(precision, recall):
    """Calculate Average Precision (AP) using the precision-recall curve."""
    # Add endpoints to the precision-recall curve
    precision = torch.cat([torch.tensor([0.]), precision, torch.tensor([0.])])
    recall = torch.cat([torch.tensor([0.]), recall, torch.tensor([1.])])

    # Compute area under the curve (AUC)
    ap = torch.trapz(precision, recall)

    return ap

# Compute mAP
def compute_map(gt_boxes_list, pred_boxes_list, iou_threshold=0.5):
    """Compute mean Average Precision (mAP) for object detection."""
    assert len(gt_boxes_list) == len(pred_boxes_list), "Number of ground truth and prediction lists must match."
    num_classes = len(gt_boxes_list)

    ap_list = []
    for class_idx in range(num_classes):
        gt_boxes = gt_boxes_list[class_idx]
        pred_boxes = pred_boxes_list[class_idx]

        # Compute precision and recall
        precision, recall = compute_precision_recall(gt_boxes, pred_boxes, iou_threshold)

        # Calculate AP
        ap = calculate_ap(precision, recall)
        ap_list.append(ap)

    # Compute mAP
    mAP = torch.mean(torch.tensor(ap_list))

    return mAP

# Function to get ground truth bounding boxes
def get_ground_truth_boxes(image_file):
    # Dummy function to get ground truth bounding boxes
    # Replace with your actual implementation
    return []

# Process and save selected images
gt_boxes_list = []  # List to store ground truth boxes for each class
pred_boxes_list = []  # List to store predicted boxes for each class

for image_file in selected_image_files:
    img, results = process_image(image_file)
    processed_image = draw_bounding_boxes(img, results)
    output_path = os.path.join(output_folder, "processed_" + image_file)
    cv2.imwrite(output_path, processed_image)
    
    # Get ground truth bounding boxes for the current image
    gt_boxes = get_ground_truth_boxes(image_file)
    gt_boxes_list.append(gt_boxes)
    
    # Extract predicted bounding boxes from YOLO results
    pred_boxes = []
    for result in results:
        if result.boxes is not None:
            pred_boxes.append(result.boxes.xyxy)
    pred_boxes_list.append(pred_boxes)

# Compute mAP
mAP = compute_map(gt_boxes_list, pred_boxes_list)
print(f"mAP: {mAP}")

# Display example processed images
display_example_images(output_folder)



0: 384x640 (no detections), 1065.1ms
Speed: 2.0ms preprocess, 1065.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 suitcase, 964.6ms
Speed: 2.3ms preprocess, 964.6ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 976.7ms
Speed: 3.7ms preprocess, 976.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 1000.6ms
Speed: 3.0ms preprocess, 1000.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 1026.1ms
Speed: 2.5ms preprocess, 1026.1ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 1001.0ms
Speed: 2.5ms preprocess, 1001.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 925.8ms
Speed: 2.2ms preprocess, 925.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bench, 932.2ms
Speed: 3.5ms preprocess

TypeError: expected Tensor as element 1 in argument 0, but got float