In [2]:
import torchvision.transforms as T
from custom_dataset import YoloToFRCNNDataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from faster_rcnn import FasterRCNN
from torchvision.ops import box_iou
from extract_results import load_model_and_store_data_in_csv
from torchmetrics.detection.mean_ap import MeanAveragePrecision


In [3]:
import torch
import torchvision

def late_fusion_predictions(pred1, pred2, iou_threshold=0.5):
    boxes = torch.cat([pred1['boxes'], pred2['boxes']], dim=0)
    scores = torch.cat([pred1['scores'], pred2['scores']], dim=0)
    labels = torch.cat([pred1['labels'], pred2['labels']], dim=0)

    keep = torchvision.ops.nms(boxes, scores, iou_threshold)

    return {
        "boxes": boxes[keep],
        "scores": scores[keep],
        "labels": labels[keep]
    }


In [4]:
# Cargar dos modelos diferentes
classes = ["cow", "deer", "horse"]

model_rgb = FasterRCNN(fasterrcnn_resnet50_fpn(weights=None), classes)
model_t_grayscale = FasterRCNN(fasterrcnn_resnet50_fpn(weights=None), classes)

model_rgb.model.load_state_dict(torch.load("models/fasterrcnn_rgb.pth", map_location=model_rgb.device))
model_t_grayscale.model.load_state_dict(torch.load("models/fasterrcnn_t_grayscale.pth", map_location=model_t_grayscale.device))

model_rgb.model.eval()
model_t_grayscale.model.eval()


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [5]:
transforms = T.Compose([
    T.ToTensor()
])
test_ds_rgb = YoloToFRCNNDataset("../yolo_rgb/images/test", "../yolo_rgb/labels/test", transforms)
test_ds_grayscale = YoloToFRCNNDataset("../yolo_t_grayscale/images/test", "../yolo_t_grayscale/labels/test", transforms)


In [7]:
metric = MeanAveragePrecision(iou_type="bbox")

TP = 0
FP = 0
FN = 0

for idx in range(len(test_ds_rgb)):
    # Obtener predicciones de ambos modelos
    pred_rgb, image_rgb = model_rgb.get_prediction(test_ds_rgb, idx)
    pred_t_grayscale, image_t_grayscale = model_t_grayscale.get_prediction(test_ds_grayscale, idx)

    # Fusionar predicciones
    fused_pred = late_fusion_predictions(pred_rgb, pred_t_grayscale, iou_threshold=0.5)

    # Formatear predicción para torchmetrics
    pred_fmt = {
        "boxes": fused_pred["boxes"].cpu(),
        "scores": fused_pred["scores"].cpu(),
        "labels": fused_pred["labels"].cpu()
    }

    # Obtener ground truth original
    _, gt = test_ds_rgb[idx]  # asumimos que ground truth es el mismo para ambos datasets
    gt_fmt = {
        "boxes": gt["boxes"].cpu(),
        "labels": gt["labels"].cpu()
    }
    pred_boxes = fused_pred["boxes"].cpu()
    pred_labels = fused_pred["labels"].cpu()

    _, gt = test_ds_rgb[idx]
    gt_boxes = gt["boxes"].cpu()
    gt_labels = gt["labels"].cpu()

    # Actualizar métrica
    metric.update([pred_fmt], [gt_fmt])
    if len(pred_boxes) == 0:
        FN += len(gt_boxes)
        continue

    ious = box_iou(pred_boxes, gt_boxes)
    matched_gt = set()
    matched_pred = set()
    for i in range(ious.shape[0]):
        max_iou, j = ious[i].max(0)
        if max_iou >= 0.5 and j.item() not in matched_gt:
            TP += 1
            matched_gt.add(j.item())
            matched_pred.add(i)
        else:
            FP += 1

    FN += len(gt_boxes) - len(matched_gt)

    
results = metric.compute()


In [8]:
precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0

print(f"Precision: {precision:.5f}")
print(f"Recall: {recall:.5f}")
print(f"TP: {TP}, FP: {FP}, FN: {FN}")

Precision: 0.82022
Recall: 0.97333
TP: 73, FP: 16, FN: 2


In [None]:
print(results)

{'map': tensor(0.5539), 'map_50': tensor(0.9540), 'map_75': tensor(0.6261), 'map_small': tensor(-1.), 'map_medium': tensor(0.5220), 'map_large': tensor(0.6864), 'mar_1': tensor(0.2356), 'mar_10': tensor(0.6216), 'mar_100': tensor(0.6216), 'mar_small': tensor(-1.), 'mar_medium': tensor(0.5880), 'mar_large': tensor(0.7169), 'map_per_class': tensor(-1.), 'mar_100_per_class': tensor(-1.), 'classes': tensor([1, 2, 3], dtype=torch.int32)}


In [None]:
load_model_and_store_data_in_csv("fasterrcnn_hsv","models/fasterrcnn_hsv.pth",test_loader,test_ds)