# Refine bounding boxes
Clean edge cases when bounding boxes overlap, cover the painting entierly, and so on.

### 0. Import libraries and load data

In [None]:
%load_ext autoreload
%autoreload 2

import sys
import copy
import json 

MAX_COVERAGE_AREA = 0.95
MIN_OVERLAP_IOU_THRESHOLD = 0.95
ANNOTATIONS_PATH = "../../data/annotations/"

sys.path.append("../annotate_dataset/")

from ground_objects import *
from annotate_paintings_utils import *

In [None]:
filenames = ["annotations_10000_10009", "annotations_30_39", "annotations_100_109", "annotations_1000_1009", "annotations_2400_2409", "annotations_2500_2509", "annotations_7000_7009"]
all_annotations = []

for filename in filenames:
    with open(f"{ANNOTATIONS_PATH}{filename}.json") as f:
        x = json.load(f)
        all_annotations.extend(x["annotations"])

In [None]:
paintings_bboxes = {
    painting_annotation["painting_id"]: painting_annotation["bounding_boxes"]
    for painting_annotation in all_annotations
}


In [None]:
for painting in all_annotations:
    extracted_objects = list(painting["objects"].keys())
    bbox_labels = [bbox[0] for bbox in painting["bounding_boxes"]]

    for bbox_label in bbox_labels:
        if bbox_label not in extracted_objects:
            print(painting["painting_id"], bbox_label, bbox_labels, extracted_objects)

### 1. Define bounding box comparison functions

In [None]:
def compute_bbox_area(bbox):
    return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])

In [None]:
def compute_iou(box_a, box_b):
    # determine the coordinates of the intersection rectangle
    x_left = max(box_a[0], box_b[0])
    y_top = max(box_a[1], box_b[1])
    x_right = min(box_a[2], box_b[2])
    y_bottom = min(box_a[3], box_b[3])

    # compute the area of intersection rectangle
    if x_right < x_left or y_bottom < y_top:
        return 0.0  
    
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    
    # compute the area of both bounding boxes
    area_a = compute_bbox_area(box_a)
    area_b = compute_bbox_area(box_b)

    # compute the intersection over union
    iou = intersection_area / float(area_a + area_b - intersection_area)
    return iou


In [None]:
def is_box_inside(box_a, box_b):
    # determine if box_a is insede box_b
    return (
        box_a[0] >= box_b[0] * 0.99 and 
        box_a[1] >= box_b[1] * 0.99 and 
        box_a[2] <= box_b[2] * 1.01 and  
        box_a[3] <= box_b[3] * 1.01 
    )

In [None]:
def compute_bbox_coverage_area(box, image):
    # compute the ratio between the bounding box are and image area
    image_size = image.size[0] * image.size[1]
    box_size = (box[2] - box[0]) * (box[3] - box[1])

    return box_size / image_size

### 2. Refine all bounding boxes

In [None]:
def refine_bboxes(bboxes, image):
    # 1. if any bounding box covers almost the entire painting, check its label and remove it if it's generic (painting, portrait)
    for bbox in bboxes:
        bbox_coverage_area = compute_bbox_coverage_area(bbox[2], image)
        if bbox_coverage_area >= MAX_COVERAGE_AREA:
            print(f"Box {bbox[0]} has a coverage area of {round(bbox_coverage_area, 2)}%")

    for index_a, bbox_a in enumerate(bboxes):
        for index_b, bbox_b in enumerate(bboxes):
            if index_a == index_b:
                continue

            is_inside = is_box_inside(bbox_b[2], bbox_a[2])
            box_a_coverage_area = compute_bbox_coverage_area(bbox_a[2], image)

            # 2. if a box is inside of another, have the same label and the outer one doesn't cover the entire painting, remove the box with lower probability
            if is_inside and bbox_a[0] == bbox_b[0] and box_a_coverage_area < MAX_COVERAGE_AREA:
                print(f"Box {bbox_b[0]} is inside box {bbox_a[0]}")    
                if bbox_a[1] < bbox_b[1]:
                    del bboxes[index_a]
                    return bboxes, False
                else:
                    del bboxes[index_b]
                    return bboxes, False       

            # 3. if two bounding boxes overlap almost completely, remove the one with the lower probability
            iou = compute_iou(bbox_a[2], bbox_b[2])
            if iou >= MIN_OVERLAP_IOU_THRESHOLD:
                print(f"Box {bbox_a[0]} and {bbox_b[0]} have an iou of {round(iou, 4)}")
                if bbox_a[1] < bbox_b[1]:
                    del bboxes[index_a]
                    return bboxes, False
                else:
                    del bboxes[index_b]
                    return bboxes, False     

    return bboxes, True


In [None]:
def refine_all_bboxes(paintings_bboxes, verbose=True):
    refined_paintings_bboxes = {}

    for painting_id, formatted_bboxes in list(paintings_bboxes.items()):
        print(painting_id)
        print(formatted_bboxes)
        _, image = load_image(painting_id)
        # sort them to traverse boxes from outside towards inside
        formatted_bboxes = sorted(formatted_bboxes, key=lambda x:compute_bbox_area(x[2]), reverse=True)
                    
        if verbose:
            display_annotated_image(copy.deepcopy(image), formatted_bboxes, True)
    
        refined = False
        while not refined and len(formatted_bboxes) != 0:
            formatted_bboxes, refined = refine_bboxes(formatted_bboxes, image)

        if verbose:
            display_annotated_image(image, formatted_bboxes, True)

        refined_paintings_bboxes[painting_id] = formatted_bboxes

    assert len(paintings_bboxes) == len(refined_paintings_bboxes), "There are missing paintings."
    bboxes_no_before = sum([len(bboxes) for bboxes in paintings_bboxes.values()])
    bboxes_no_after = sum([len(bboxes) for bboxes in refined_paintings_bboxes.values()])
    print(f"\nNumber of bounding boxes before and after refinement: {bboxes_no_before} -> {bboxes_no_after}")

    return refined_paintings_bboxes

In [None]:
# update annotations with the refined bounding boxes
refined_paintings_bboxes = refine_all_bboxes(paintings_bboxes, True)
for annotations in all_annotations:
    annotations["bounding_boxes"] = refined_paintings_bboxes[annotations["painting_id"]]