diff --git a/src/torchmetrics/detection/mean_ap.py b/src/torchmetrics/detection/mean_ap.py index 5f8569791a9..352a81f4d6b 100644 --- a/src/torchmetrics/detection/mean_ap.py +++ b/src/torchmetrics/detection/mean_ap.py @@ -794,21 +794,25 @@ def __calculate_recall_precision_scores( return recall, precision, scores det_scores = torch.cat([e["dtScores"][:max_det] for e in img_eval_cls_bbox]) - # different sorting method generates slightly different results. # mergesort is used to be consistent as Matlab implementation. # Sort in PyTorch does not support bool types on CUDA (yet, 1.11.0) dtype = torch.uint8 if det_scores.is_cuda and det_scores.dtype is torch.bool else det_scores.dtype # Explicitly cast to uint8 to avoid error for bool inputs on CUDA to argsort inds = torch.argsort(det_scores.to(dtype), descending=True) - det_scores_sorted = det_scores[inds] - - det_matches = torch.cat([e["dtMatches"][:, :max_det] for e in img_eval_cls_bbox], axis=1)[:, inds] det_ignore = torch.cat([e["dtIgnore"][:, :max_det] for e in img_eval_cls_bbox], axis=1)[:, inds] gt_ignore = torch.cat([e["gtIgnore"] for e in img_eval_cls_bbox]) + npig = torch.count_nonzero(gt_ignore == False) # noqa: E712 if npig == 0: + # If there are any predictions, make Precision 0; otherwise, -1. + npreds = torch.count_nonzero(det_ignore == False) # noqa: E712 + if npreds != 0: + precision[:, :, idx_cls, idx_bbox_area, idx_max_det_thrs] = 0.0 return recall, precision, scores + + det_scores_sorted = det_scores[inds] + det_matches = torch.cat([e["dtMatches"][:, :max_det] for e in img_eval_cls_bbox], axis=1)[:, inds] tps = torch.logical_and(det_matches, torch.logical_not(det_ignore)) fps = torch.logical_and(torch.logical_not(det_matches), torch.logical_not(det_ignore)) diff --git a/tests/unittests/detection/test_map.py b/tests/unittests/detection/test_map.py index 19e0e0e06cd..951c4560d0f 100644 --- a/tests/unittests/detection/test_map.py +++ b/tests/unittests/detection/test_map.py @@ -578,6 +578,52 @@ def test_missing_gt(): assert result["map"] < 1, "MAP cannot be 1, as there is an image with no ground truth, but some predictions." +@pytest.mark.skipif(_pytest_condition, reason="test requires that pycocotools and torchvision=>0.8.0 is installed") +def test_class_metrics_with_missing_gt(): + """Checks MAP for each class when there are 4 detections, each for a different class. + + But there are targets for only 2 classes. Hence, MAP should be lower than 1. MAP for classes with targets should be + 1 and 0 for the others. + """ + # Example source: Issue https://github.com/Lightning-AI/metrics/issues/1184 + preds = [ + { + "boxes": torch.Tensor( + [ + [0, 0, 20, 20], + [30, 30, 50, 50], + [70, 70, 90, 90], # FP + [100, 100, 120, 120], # FP + ] + ), + "scores": torch.Tensor([0.6, 0.6, 0.6, 0.6]), + "labels": torch.IntTensor([0, 1, 2, 3]), + } + ] + + targets = [ + { + "boxes": torch.Tensor([[0, 0, 20, 20], [30, 30, 50, 50]]), + "labels": torch.IntTensor([0, 1]), + } + ] + + metric = MeanAveragePrecision(class_metrics=True) + metric.update(preds, targets) + result = metric.compute() + + assert result["map"] < 1, "MAP cannot be 1, as for class 2 and 3, there are some predictions, but not targets." + + result_map_per_class = result.get("map_per_class", None) + assert result_map_per_class is not None, "map_per_class must be present in results." + assert isinstance(result_map_per_class, Tensor), "map_per_class must be a tensor" + assert len(result_map_per_class) == 4, "map_per_class must be of length 4, same as the number of classes." + assert result_map_per_class[0].item() == 1.0, "map for class 0 must be 1." + assert result_map_per_class[1].item() == 1.0, "map for class 1 must be 1." + assert result_map_per_class[2].item() == 0.0, "map for class 2 must be 0." + assert result_map_per_class[3].item() == 0.0, "map for class 3 must be 0." + + @pytest.mark.skipif(_pytest_condition, reason="test requires that torchvision=>0.8.0 is installed") def test_segm_iou_empty_gt_mask(): """Test empty ground truths."""