# Intersection of Union & Average Precision

## 1. Import packages.

In [None]:
import os
import numpy as np
import scipy.io as sio
import pickle
import torch
import torch.utils.data as data
import xml.etree.ElementTree as ET
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision import transforms


### Funtion for visualization
def draw_bb(img, boxes, color='r'):
    fig,ax = plt.subplots(1)
    for box in boxes:
        rect = patches.Rectangle(
            (box[0],box[1]),
            box[2],
            box[3],
            linewidth=3,
            edgecolor=color,
            facecolor='none')
        ax.add_patch(rect)
    ax.imshow(img)
    
### Fuction for vislualize bounding with two differnt colors
def draw_bb2(img, boxes1, boxes2, color1='r', color2='g'):
    fig,ax = plt.subplots(1)
    for box in boxes1:
        rect = patches.Rectangle((box[0],box[1]),box[2],box[3],linewidth=3,edgecolor=color1,facecolor='none')
        ax.add_patch(rect)
    for box in boxes2:
        rect = patches.Rectangle((box[0],box[1]),box[2],box[3],linewidth=3,edgecolor=color2,facecolor='none')
        ax.add_patch(rect)
    ax.imshow(img)
    plt.show()

## 2. PASCAL VOC2007 Dataset in COCO format

In [None]:
import os
import json
import torch
from PIL import Image

class_list = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable','dog', 'horse', 'motorbike', 'person', 'pottedplant','sheep', 'sofa', 'train', 'tvmonitor'] 
class_dict = {}
for i in range(len(class_list)):
    class_dict[i+1] = class_list[i]

class PASCAL_DATASET(torch.utils.data.Dataset):
    def __init__(self, data_root, image_dir, split='train'):        
        self.data_root = data_root
        self.image_dir = os.path.join(data_root, image_dir)
        self.img_list = [file for file in os.listdir(self.image_dir) if file.endswith(r'.jpg')]
        annotation_dir = os.path.join(data_root, 'annotations', f'instances_{split}2017.json')
        self.class_dict = class_dict
        self.boxes, self.gt_classes_str = self._load_annotation(annotation_dir)
                                  
    def _load_annotation(self, annotation_dir):
        boxes = {}
        gt_classes_str = {}
        
        with open(annotation_dir)as f: 
            data = json.load(f)
        
        for box_dict in data['annotations']:
            bbox = box_dict['bbox']
            category = self.class_dict[box_dict['category_id']]
            if box_dict['image_id'] not in boxes:
               boxes[box_dict['image_id']] = [bbox]
               gt_classes_str[box_dict['image_id']] = [category]
            else:
               boxes[box_dict['image_id']] += [bbox]
               gt_classes_str[box_dict['image_id']] += [category]
    

        return boxes, gt_classes_str
                                      
    def __len__(self,):
        return len(self.img_list)

    def __getitem__(self, index):
        img_path = self.img_list[index]
        img = Image.open(os.path.join(self.image_dir, img_path))
        img_idx = int(os.path.splitext(img_path)[0])
        boxes, gt_classes_str = self.boxes[img_idx], self.gt_classes_str[img_idx]
        boxes = np.array(boxes)
        return img, boxes, gt_classes_str

In [None]:
pascal_data = PASCAL_DATASET('./VOC2COCO', 'train2017')

In [None]:
pascal_it = iter(pascal_data)

In [None]:
img, gt_boxes, gt_box_classes = next(pascal_it)
draw_bb(img, gt_boxes)

In [None]:
img, gt_boxes, gt_box_classes = next(pascal_it)
draw_bb(img, gt_boxes)

In [None]:
img, gt_boxes, gt_box_classes = next(pascal_it)
draw_bb(img, gt_boxes)

## 3. Calculate IoU!

In [None]:
### Our ground truth box
print(gt_boxes)
print(gt_boxes.shape)

In [None]:
### Assume we have following predictions from network
predicted_boxes = np.zeros((4, 4), dtype=np.int32)
predicted_boxes[0, :] = [110, 80, 100, 120]
predicted_boxes[1, :] = [50, 40, 110, 80]
predicted_boxes[2, :] = [130, 120, 100, 180]
predicted_boxes[3, :] = [200, 50, 150, 300]
predicted_scores = np.array([0.9, 0.8, 0.7, 0.6])
print(predicted_boxes)
print(predicted_boxes.shape)

In [None]:
draw_bb2(img, gt_boxes, predicted_boxes)

In [None]:
## Convert to pytorch tensors
gt_boxes = torch.from_numpy(gt_boxes).float()
one_box = torch.unsqueeze(gt_boxes[0], 0)
predicted_boxes = torch.from_numpy(predicted_boxes).float()
predicted_scores = torch.from_numpy(predicted_scores).float()
print(predicted_boxes.shape)

In [None]:
def bbox_iou(box1, box2):
    """
    Returns the IoU of two bounding boxes 
    box 1 : (1, 4) shaped pytorch tensors - sinlge GT bounding box
    box 2 : (N, 4) shaped pytorch tensors - multiple predictions from network
    """
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,0]+box1[:,2], box1[:,1]+box1[:,3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,0]+box2[:,2], box2[:,1]+box2[:,3]
    
    ## intersection rectangle coordinate
    inter_rect_x1 = torch.max(b1_x1, b2_x1)
    inter_rect_y1 = torch.max(b1_y1, b2_y1)
    inter_rect_x2 = torch.min(b1_x2, b2_x2)
    inter_rect_y2 = torch.min(b1_y2, b2_y2)
    
    ## practice
    inter_area = torch.clamp(inter_rect_x2-inter_rect_x1, min=0.)\
            * torch.clamp(inter_rect_y2-inter_rect_y1, min=0.)
                 
    
    ## calculate iou
    area_1 = (b1_x2-b1_x1) * (b1_y2-b1_y1)
    area_2 = (b2_x2-b2_x1) * (b2_y2-b2_y1)
    iou = inter_area/(area_1+area_2-inter_area)
    
    return iou

In [None]:
### Get iou score for each prediction boxes
ious = bbox_iou(one_box, predicted_boxes)

In [None]:
print(ious)
print(predicted_boxes)

## 4. Threshold bounding boxes based on IoU scores

In [None]:
threshold = 0.2
th = np.where(ious.numpy() > threshold)
th_boxes = predicted_boxes[th]
draw_bb2(img, one_box, th_boxes)

In [None]:
threshold = 0.3
th = np.where(ious.numpy() > threshold)
th_boxes = predicted_boxes[th]
draw_bb2(img, one_box, th_boxes)

## 5. Average Precision

In [None]:
import random
class_list = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable','dog', 'horse', 'motorbike', 'person', 'pottedplant','sheep', 'sofa', 'train', 'tvmonitor'] 


def make_test_observations(pascal_it, num_image = 10):
    images = []
    groundtruths = []
    detections = []
    for i in range(num_image):
        img, gt_boxes, gt_box_classes = next(pascal_it)
        images.append(img)
        for j in range(len(gt_box_classes)):
            groundtruths.append([i, gt_box_classes[j], 1.0, (list(gt_boxes[j]))])
        
            rand_seed = random.random()
            pred_cls = class_list.index(gt_box_classes[j])
            if rand_seed < 0.33:
                pred_cls = random.randrange(0, 21)
            box_jitter = []
            for k in range(4):
                random_jitter = random.random()*2-1.0
                jitter_scale = random.random()*10
                box_jitter.append(random_jitter*jitter_scale)
            box_jitter = np.asarray(box_jitter)
            pred_boxes = box_jitter + gt_boxes[j]

            detections.append([i, class_list[pred_cls], random.random(), (list(pred_boxes))])
        
    return images, groundtruths, detections

In [None]:
images, groundtruths, detections = make_test_observations(pascal_it)

In [None]:
groundtruths

In [None]:
detections

In [None]:
def calculateAveragePrecision(rec, prec):
    
    mrec = [0] + [e for e in rec] + [1]
    mpre = [0] + [e for e in prec] + [0]

    for i in range(len(mpre)-1, 0, -1):
        mpre[i-1] = max(mpre[i-1], mpre[i])

    ii = []

    for i in range(len(mrec)-1):
        if mrec[1:][i] != mrec[0:-1][i]:
            ii.append(i+1)

    ap = 0
    for i in ii:
        ap = ap + np.sum((mrec[i] - mrec[i-1]) * mpre[i])
    
    return [ap, mpre[0:len(mpre)-1], mrec[0:len(mpre)-1], ii]

In [None]:
from collections import Counter
class_list = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable','dog', 'horse', 'motorbike', 'person', 'pottedplant','sheep', 'sofa', 'train', 'tvmonitor'] 


def AP(detections, groundtruths, classes= class_list, IOUThreshold = 0.5):
    
    result = []
    
    for c in classes:

        dects = [d for d in detections if d[1] == c]
        gts = [g for g in groundtruths if g[1] == c]

        npos = len(gts)

        dects = sorted(dects, key = lambda conf : conf[2], reverse=True)

        TP = np.zeros(len(dects))
        FP = np.zeros(len(dects))

        det = Counter(cc[0] for cc in gts)

        # number of  ground truth boxes per image
        # {99 : 2, 380 : 4, ....}
        # {99 : [0, 0], 380 : [0, 0, 0, 0], ...}
        for key, val in det.items():
            det[key] = np.zeros(val)

        for d in range(len(dects)):


            gt = [gt for gt in gts if gt[0] == dects[d][0]]

            iouMax = 0

            for j in range(len(gt)):
                iou1 = bbox_iou(torch.unsqueeze(torch.tensor(dects[d][3]), 0), torch.unsqueeze(torch.tensor(gt[j][3]), 0))
                if iou1 > iouMax:
                    iouMax = iou1
                    jmax = j

            if iouMax >= IOUThreshold:
                if det[dects[d][0]][jmax] == 0:
                    TP[d] = 1
                    det[dects[d][0]][jmax] = 1
                else:
                    FP[d] = 1
            else:
                FP[d] = 1

        acc_FP = np.cumsum(FP)
        acc_TP = np.cumsum(TP)
        if npos == 0:
            rec = acc_TP
        else:
            rec = acc_TP / npos
        prec = np.divide(acc_TP, (acc_FP + acc_TP))

        [ap, mpre, mrec, ii] = calculateAveragePrecision(rec, prec)


        r = {
            'class' : c,
            'precision' : prec,
            'recall' : rec,
            'AP' : ap,
            'total positives' : npos,
            'total TP' : np.sum(TP),
            'total FP' : np.sum(FP)
        }

        result.append(r)

    return result

In [None]:
result = AP(detections, groundtruths)
for dic in result:
    print(dic)

## 6. mean Average Precision

In [None]:
def mAP(result):
    ap = 0
    for r in result:
        ap += r['AP']
    mAP = ap / len(result)
    
    return mAP

In [None]:
for r in result:
    print(f"{r['class']} AP : {r['AP']}")
print("---------------------------")
print(f"mAP : {mAP(result)}")