In [1]:
from ultralytics import YOLO
import cv2
from pathlib import Path
import os
import numpy as np
from torchvision.ops import nms
import torch
from torchvision.ops import box_iou

from torchmetrics.detection.mean_ap import MeanAveragePrecision
from PIL import Image
import sys
notebook_dir = os.getcwd()
sys.path.append(os.path.abspath(os.path.join(notebook_dir, '..')))
from utils import parse_labels
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
runs_rgb = Path("../yolo_rgb/runs/detect/train/weights/best.pt")
runs_t = Path("../yolo_t_grayscale/runs/detect/train/weights/best.pt")
train_rgb = Path("../yolo_rgb/images/train")
train_t = Path("../yolo_t_grayscale/images/train")

modelo_rgb = YOLO(runs_rgb)
modelo_thermal = YOLO(runs_t)
imgs_train_rgb = os.listdir(train_rgb)
imgs_train_t = os.listdir(train_t)

In [3]:

# Métricas manuales
precisions = []
recalls = []

def compute_precision_recall(preds, targets, iou_threshold=0.5):
    pred_boxes = preds['boxes']
    pred_labels = preds['labels']

    gt_boxes = targets['boxes']
    gt_labels = targets['labels']

    if len(pred_boxes) == 0:
        precision = 0.0
        recall = 0.0 if len(gt_boxes) > 0 else 1.0
        return precision, recall

    ious = box_iou(pred_boxes, gt_boxes)  # (N_pred, N_gt)
    matched_gt = torch.zeros(len(gt_boxes), dtype=torch.bool)
    TP = 0

    for i in range(len(pred_boxes)):
        max_iou, max_idx = ious[i].max(0)
        if max_iou >= iou_threshold and pred_labels[i] == gt_labels[max_idx] and not matched_gt[max_idx]:
            TP += 1
            matched_gt[max_idx] = True

    FP = len(pred_boxes) - TP
    FN = len(gt_boxes) - TP

    precision = TP / (TP + FP) if TP + FP > 0 else 0.0
    recall = TP / (TP + FN) if TP + FN > 0 else 0.0
    return precision, recall

In [4]:
metric = MeanAveragePrecision(iou_type="bbox",class_metrics=True)  # mAP50-95 y mAP50
train_labels = Path("../yolo_rgb/labels/train")

for (rgb,t) in zip(imgs_train_rgb,imgs_train_t):
    path_rgb = train_rgb / rgb
    path_t = train_t / t
    img_rgb = cv2.imread(str(path_rgb))  
    img_t = cv2.imread(str(path_t))  
    result_rgb = modelo_rgb(img_rgb)[0]
    result_t = modelo_thermal(img_t)[0]
    boxes_rgb = result_rgb.boxes.xyxy.cpu().numpy()     
    scores_rgb = result_rgb.boxes.conf.cpu().numpy()
    labels_rgb = result_rgb.boxes.cls.cpu().numpy().astype(int)

    boxes_t = result_t.boxes.xyxy.cpu().numpy()
    scores_t = result_t.boxes.conf.cpu().numpy()
    labels_t = result_t.boxes.cls.cpu().numpy().astype(int)
    all_boxes = np.vstack([boxes_rgb, boxes_t])
    all_scores = np.hstack([scores_rgb, scores_t])
    all_labels = np.hstack([labels_rgb, labels_t])

    boxes_tensor = torch.tensor(all_boxes)
    scores_tensor = torch.tensor(all_scores)

    keep_indices = nms(boxes_tensor, scores_tensor, iou_threshold=0.5)

    boxes_fusion = boxes_tensor[keep_indices]
    scores_fusion = scores_tensor[keep_indices]
    labels_fusion = torch.tensor(all_labels)[keep_indices]
    label_path = train_labels / f"{Path(rgb).stem}.txt"
    image_pil = Image.open(str(path_rgb))
    w, h = image_pil.size

    gt_boxes, gt_labels = parse_labels.parse_labels(label_path, w, h)
    preds = [{
        "boxes": boxes_fusion,
        "scores": scores_fusion,
        "labels": labels_fusion
    }]

    targets = [{
        "boxes": gt_boxes,
        "labels": gt_labels
    }]
    p, r = compute_precision_recall(preds[0], targets[0])
    precisions.append(p)
    recalls.append(r)
    metric.update(preds, targets)


0: 768x960 1 deer, 40.9ms
Speed: 12.2ms preprocess, 40.9ms inference, 66.8ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 1 deer, 6.9ms
Speed: 4.0ms preprocess, 6.9ms inference, 1.2ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 4 deers, 6.3ms
Speed: 4.0ms preprocess, 6.3ms inference, 1.3ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 4 deers, 5.9ms
Speed: 3.9ms preprocess, 5.9ms inference, 1.1ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 3 deers, 6.7ms
Speed: 4.1ms preprocess, 6.7ms inference, 1.4ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 3 deers, 6.5ms
Speed: 4.4ms preprocess, 6.5ms inference, 1.1ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 6 deers, 8.9ms
Speed: 4.3ms preprocess, 8.9ms inference, 1.1ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 6 deers, 6.4ms
Speed: 4.2ms preprocess, 6.4ms inference, 0.9ms postprocess per image at shape (1, 3, 768, 960)

0: 768x960 3 

In [5]:
mean_P = sum(precisions) / len(precisions)
mean_R = sum(recalls) / len(recalls)

print(f"Precisión (P) real: {mean_P:.4f}")
print(f"Recall (R) real:    {mean_R:.4f}")

Precisión (P) real: 0.9169
Recall (R) real:    1.0000


In [6]:
def get_yolo_style_metrics(results_dict, p, r, model_name="late_fusion_rgb_t", best_epoch=None):
    return {
        "Model": model_name,
        "P": round(p, 5),
        "R": round(r, 5),
        "mAP50": round(results_dict["map_50"].item(), 5),
        "mAP50-95": round(results_dict["map"].item(), 5),
    }

In [7]:
results = metric.compute()
final_metrics = get_yolo_style_metrics(results,mean_P,mean_R)
df = pd.DataFrame([final_metrics])
csv_path = "../yolo_final_results/comparative_model_table.csv"

existing_df = pd.read_csv(csv_path)
df.to_csv(csv_path, mode='a', header=False, index=False)

In [8]:
print(results)

{'map': tensor(0.8099), 'map_50': tensor(0.9963), 'map_75': tensor(0.9697), 'map_small': tensor(0.6000), 'map_medium': tensor(0.7836), 'map_large': tensor(0.8566), 'mar_1': tensor(0.3435), 'mar_10': tensor(0.8494), 'mar_100': tensor(0.8494), 'mar_small': tensor(0.6000), 'mar_medium': tensor(0.8264), 'mar_large': tensor(0.8807), 'map_per_class': tensor([0.7683, 0.8064, 0.8552]), 'mar_100_per_class': tensor([0.8197, 0.8369, 0.8916]), 'classes': tensor([0, 1, 2], dtype=torch.int32)}
