## mAP на примере одного батча

In [1]:
import torch

In [2]:
def find_intersection(set_1, set_2):
    """
    Find the intersection of every box combination between two sets of boxes that are in boundary coordinates.

    :param set_1: set 1, a tensor of dimensions (n1, 4)
    :param set_2: set 2, a tensor of dimensions (n2, 4)
    :return: intersection of each of the boxes in set 1 with respect to each of the boxes in set 2, a tensor of dimensions (n1, n2)
    """

    # PyTorch auto-broadcasts singleton dimensions
    lower_bounds = torch.max(set_1[:, :2].unsqueeze(1), set_2[:, :2].unsqueeze(0))  # (n1, n2, 2)
    upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1), set_2[:, 2:].unsqueeze(0))  # (n1, n2, 2)
    intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0)  # (n1, n2, 2)
    return intersection_dims[:, :, 0] * intersection_dims[:, :, 1]  # (n1, n2)


def find_jaccard_overlap(set_1, set_2):
    """
    Find the Jaccard Overlap (IoU) of every box combination between two sets of boxes that are in boundary coordinates.

    :param set_1: set 1, a tensor of dimensions (n1, 4)
    :param set_2: set 2, a tensor of dimensions (n2, 4)
    :return: Jaccard Overlap of each of the boxes in set 1 with respect to each of the boxes in set 2, a tensor of dimensions (n1, n2)
    """

    # Find intersections
    intersection = find_intersection(set_1, set_2)  # (n1, n2)

    # Find areas of each box in both sets
    areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1])  # (n1)
    areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1])  # (n2)

    # Find the union
    # PyTorch auto-broadcasts singleton dimensions
    union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection  # (n1, n2)

    return intersection / union  # (n1, n2)

### Init some data

In [3]:
det_boxes = [
    torch.tensor([[0.5181, 0.1481, 0.9734, 0.9495],
        [0.4061, 0.0868, 0.9967, 1.0121]]),
    torch.tensor([[0.5112, 0.2552, 0.9611, 0.6476],
        [0.3756, 0.2327, 0.9368, 0.5786],
        [0.1407, 0.2285, 0.7031, 0.6167],
        [0.2269, 0.2269, 0.8302, 0.5679]]),
    torch.tensor([[0.0262, 0.0626, 0.9048, 1.0039]]),
    torch.tensor([]),
    torch.tensor([[0.1899, 0.2915, 0.8136, 0.9690],
         [0.2999, 0.2476, 0.8813, 0.9457],
         [0.0913, 0.3507, 0.6562, 0.9572],
         [0.0658, 0.2665, 0.7552, 0.9305]]),
    torch.tensor([[0.0594, 0.0101, 0.7279, 1.0358],
         [0.1577, 0.1561, 0.8410, 0.9924],
         [0.0226, 0.1019, 0.5832, 0.9948]]),
    torch.tensor([[0.2565, 0.3249, 0.8049, 0.8538],
         [0.1724, 0.3774, 0.7303, 0.8684]]),
    torch.tensor([[ 0.0885,  0.0042,  0.7078,  0.9169],
         [ 0.1837, -0.0027,  0.8143,  0.9832],
         [ 0.0447,  0.0715,  0.5369,  0.8338]])
]

In [4]:
det_scores = [
    torch.tensor([0.7783, 0.5865]),
    torch.tensor([0.8726, 0.8634, 0.7082, 0.6611]),
    torch.tensor([0.9984]),
    torch.tensor([]),
    torch.tensor([0.9895, 0.9589, 0.9230, 0.7901]),
    torch.tensor([0.9995, 0.6241, 0.5843]),
    torch.tensor([0.9849, 0.6808]),
    torch.tensor([0.9874, 0.8269, 0.5664])
]

In [5]:
true_boxes = [
    torch.tensor([[0.5580, 0.0693, 0.9980, 0.9920]]),
    torch.tensor([[0.0000, 0.2480, 0.4640, 0.6853],
        [0.5280, 0.2533, 0.9960, 0.6987]]),
    torch.tensor([[0.0080, 0.0675, 0.9640, 0.9775]]),
    torch.tensor([[0.5300, 0.4933, 0.6680, 0.5413]]),
    torch.tensor([[0.0000, 0.1680, 0.8260, 0.8853]]),
    torch.tensor([[0.0080, 0.0150, 0.8160, 0.9970]]),
    torch.tensor([[0.2643, 0.4604, 0.5375, 0.8562],
        [0.3754, 0.3250, 0.7267, 0.8188]]),
    torch.tensor([[0.0000, 0.0000, 0.8053, 0.9620]])
]

In [6]:
true_labels = [
    torch.tensor([9]),
    torch.tensor([20, 20]),
    torch.tensor([12]),
    torch.tensor([4]),
    torch.tensor([15]),
    torch.tensor([15]),
    torch.tensor([13, 13]),
    torch.tensor([14])
]

In [7]:
len(det_boxes), len(det_scores), len(true_boxes), len(true_labels)

(8, 8, 8, 8)

In [8]:
# different numbers of predictions and true objects
det_boxes[1].shape, true_boxes[1].shape

(torch.Size([4, 4]), torch.Size([2, 4]))

### true

In [9]:
true_images = list()
for i in range(len(true_labels)):
    true_images.extend([i] * true_labels[i].size(0))
# (n_objects), n_objects is the total no. of objects across all images
true_images = torch.LongTensor(true_images)
true_boxes = torch.cat(true_boxes, dim=0)  # (n_objects, 4)
true_labels = torch.cat(true_labels, dim=0)  # (n_objects)

In [10]:
true_images

tensor([0, 1, 1, 2, 3, 4, 5, 6, 6, 7])

In [11]:
true_labels

tensor([ 9, 20, 20, 12,  4, 15, 15, 13, 13, 14])

In [12]:
true_images.shape, true_boxes.shape, true_labels.shape

(torch.Size([10]), torch.Size([10, 4]), torch.Size([10]))

### det

In [13]:
det_images = list()
for i in range(len(det_scores)):
    det_images.extend([i] * det_scores[i].size(0))
det_images = torch.LongTensor(det_images) # (n_detections)
det_boxes = torch.cat(det_boxes, dim=0)  # (n_detections, 4)
det_scores = torch.cat(det_scores, dim=0)  # (n_detections)

In [14]:
det_images

tensor([0, 0, 1, 1, 1, 1, 2, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7])

In [15]:
det_scores

tensor([0.7783, 0.5865, 0.8726, 0.8634, 0.7082, 0.6611, 0.9984, 0.9895, 0.9589,
        0.9230, 0.7901, 0.9995, 0.6241, 0.5843, 0.9849, 0.6808, 0.9874, 0.8269,
        0.5664])

In [16]:
det_images.shape, det_boxes.shape, det_scores.shape

(torch.Size([19]), torch.Size([19, 4]), torch.Size([19]))

### steps

In [17]:
n_objects = true_boxes.size(0)
n_objects

10

In [18]:
true_boxes_detected = torch.zeros(n_objects, dtype=torch.uint8)
true_boxes_detected, true_boxes_detected.shape

(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.uint8), torch.Size([10]))

In [19]:
n_detections = det_boxes.size(0)
n_detections

19

In [20]:
# Sort detections in decreasing order of confidence/scores
det_scores_sorted, sort_ind = torch.sort(det_scores, dim=0, descending=True)  # (n_detections)
det_images_sorted = det_images[sort_ind]  # (n_detections)
det_boxes_sorted = det_boxes[sort_ind]  # (n_detections, 4)

In [21]:
det_scores_sorted

tensor([0.9995, 0.9984, 0.9895, 0.9874, 0.9849, 0.9589, 0.9230, 0.8726, 0.8634,
        0.8269, 0.7901, 0.7783, 0.7082, 0.6808, 0.6611, 0.6241, 0.5865, 0.5843,
        0.5664])

In [22]:
det_images_sorted

tensor([5, 2, 4, 7, 6, 4, 4, 1, 1, 7, 4, 0, 1, 6, 1, 5, 0, 5, 7])

In [23]:
# In the order of decreasing scores, check if true or false positive
true_positives = torch.zeros((n_detections), dtype=torch.float) # (n_detections)
false_positives = torch.zeros((n_detections), dtype=torch.float) # (n_detections)

In [24]:
false_negatives = torch.zeros((n_detections), dtype=torch.float)  # (n_detections)

In [37]:
threshold = 0.5

for d in range(n_detections):
    this_detection_box = det_boxes_sorted[d].unsqueeze(0)  # (1, 4)
    this_image = det_images_sorted[d]  # (), scalar
    
    # print(this_detection_box.shape, this_image)

    # Find objects in the same image and whether they have been detected before
    
    this_image_boxes = true_boxes[true_images==this_image] # (n_objects_in_img, 4)
    
    # Find maximum overlap of this detection with objects in this image of this class
    overlaps = find_jaccard_overlap(this_detection_box, this_image_boxes)  # (1, n_objects_in_img)
    max_overlap, ind = torch.max(overlaps.squeeze(0), dim=0)  # (), () - scalars
    
    # Index in the true_boxes and true_boxes_detected to find duplicated detections
    original_ind = torch.LongTensor(range(true_boxes.size(0)))[true_images == this_image][ind]
    
    if max_overlap > threshold:      
        if true_boxes_detected[original_ind] == 0:
            true_positives[d] = 1
            true_boxes_detected[original_ind] = 1
        else:
            false_positives[d] = 1
    else:
        false_positives[d] = 1
        
    false_negatives[d] = (1-true_boxes_detected).sum()

In [38]:
true_positives

tensor([1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0.])

In [39]:
false_positives

tensor([0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1.,
        1.])

In [40]:
false_negatives

tensor([9., 8., 7., 6., 5., 5., 5., 4., 4., 4., 4., 3., 3., 3., 3., 3., 3., 3.,
        3.])

In [41]:
true_boxes_detected

tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1], dtype=torch.uint8)

In [42]:
# Compute cumulative precision and recall at each detection in the order of decreasing scores
cumul_true_positives = torch.cumsum(true_positives, dim=0)  # (n_detections)
cumul_false_positives = torch.cumsum(false_positives, dim=0)  # (n_detections)
# cumul_false_negatives = torch.cumsum(false_negatives, dim=0)  # (n_detections)

cumul_precision = cumul_true_positives / (
    cumul_true_positives + cumul_false_positives + 1e-10)  # (n_detections)

cumul_recall = cumul_true_positives / (
    cumul_true_positives + false_negatives + 1e-10)  # (n_detections)

In [43]:
cumul_true_positives, cumul_false_positives, cumul_precision, cumul_recall

(tensor([1., 2., 3., 4., 5., 5., 5., 6., 6., 6., 6., 7., 7., 7., 7., 7., 7., 7.,
         7.]),
 tensor([ 0.,  0.,  0.,  0.,  0.,  1.,  2.,  2.,  3.,  4.,  5.,  5.,  6.,  7.,
          8.,  9., 10., 11., 12.]),
 tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.8333, 0.7143, 0.7500, 0.6667,
         0.6000, 0.5455, 0.5833, 0.5385, 0.5000, 0.4667, 0.4375, 0.4118, 0.3889,
         0.3684]),
 tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.5000, 0.5000, 0.6000, 0.6000,
         0.6000, 0.6000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000,
         0.7000]))

In [44]:
recall_thresholds = torch.arange(start=0, end=1.1, step=.1).tolist()  # (11)
recall_thresholds

[0.0,
 0.10000000149011612,
 0.20000000298023224,
 0.30000001192092896,
 0.4000000059604645,
 0.5,
 0.6000000238418579,
 0.699999988079071,
 0.800000011920929,
 0.8999999761581421,
 1.0]

In [46]:
precisions = torch.zeros((len(recall_thresholds)), dtype=torch.float)  # (11)
for i, t in enumerate(recall_thresholds):
    recalls_above_t = cumul_recall >= t
    if recalls_above_t.any():
        precisions[i] = cumul_precision[recalls_above_t].max()
    else:
        precisions[i] = 0.

In [47]:
precisions

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.7500, 0.5833, 0.0000,
        0.0000, 0.0000])

In [48]:
average_precision = precisions.mean()
average_precision

tensor(0.6667)