# Ensemble of Object Detection Models based on Detectron2

## Prepare Development Environment
1. Installation
    ```bash
    sudo apt-get install -y python3-dev python3-venv
    python3 -m venv env
    source env/bin/activate
    python -m pip install pip -U
    python -m pip install -r requirements.txt
    python -m ipykernel install --user --name env --display-name ensemble_detectron2
    ```

## Utilize Detection Results

In [1]:
!unzip results.zip
!wget -c http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip annotations_trainval2017.zip
!rm annotations_trainval2017.zip

## Import Modules

In [1]:
import torch
from tqdm import tqdm

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

import ensemble_boxes

## Evaluate Baseline Models
### Load Detection Results and Ground Truth

In [2]:
gt_path = "./annotations/instances_val2017.json"
coco_gt = COCO(gt_path)

dt_paths = [
    "./results/faster_rcnn_R_50_C4_1x/coco_instances_results.json",
    "./results/faster_rcnn_R_50_DC5_1x/coco_instances_results.json",
    "./results/retinanet_R_50_FPN_1x/coco_instances_results.json",
]

coco_dts = [coco_gt.loadRes(dt_path) for dt_path in dt_paths]
img_ids = coco_gt.getImgIds()

loading annotations into memory...
Done (t=0.46s)
creating index...
index created!
Loading and preparing results...
DONE (t=1.46s)
creating index...
index created!
Loading and preparing results...
DONE (t=1.22s)
creating index...
index created!
Loading and preparing results...
DONE (t=2.73s)
creating index...
index created!


### Evaluate Baseline Models

In [5]:
for idx, coco_dt in enumerate(coco_dts):
    print("\n================================================================\n")
    print(dt_paths[idx])
    print("\n================================================================\n")

    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    
    print("\n================================================================\n")




./results/faster_rcnn_R_50_C4_1x/coco_instances_results.json


Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=21.60s).
Accumulating evaluation results...
DONE (t=3.47s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.357
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.561
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.380
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.192
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.409
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.487
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.311
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.485
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.506
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.310
 Av

## Box mAP of Baseline Models

| Baselines                   | Box AP @(IoU=0.50:0.95, area=all, maxDets=100) |
|-----------------------------|------|
|faster_rcnn_R_50_C4_1x.yaml  | 0.357|
|faster_rcnn_R_50_DC5_1x.yaml | 0.373|
|retinanet_R_50_FPN_1x.yaml   | 0.374|

## Ensemble using Weighted Boxes Fusion Method

In [6]:
# parameters
iou_thr = 0.7
skip_box_thr = 0.0001

print("\n================================================================\n")
print("Ensemble using Weighted Boxes Fusion")
print(f"iou thr: {iou_thr}, skip_box_thr: {skip_box_thr}")
print("\n================================================================\n")

ensemble = []
cnt_id = 0
for img_id in img_ids:
    height = float(coco_gt.loadImgs(img_id)[0]["height"])
    width = float(coco_gt.loadImgs(img_id)[0]["width"])

    tmp_anns = []
    boxes_list = []
    scores_list = []
    labels_list = []

    for coco_dt in coco_dts:
        boxes = []
        scores = []
        labels = []
        for ann in coco_dt.imgToAnns[img_id]:
            x1, y1 = ann["bbox"][0], ann["bbox"][1]
            x2 = ann["bbox"][0] + ann["bbox"][2]
            y2 = ann["bbox"][1] + ann["bbox"][3]
            x1, x2 = x1/width, x2/width
            y1, y2 = y1/height, y2/height

            x1 = min(1.000, max(0.000, x1))
            x2 = min(1.000, max(0.000, x2))
            y1 = min(1.000, max(0.000, y1))
            y2 = min(1.000, max(0.000, y2))
                
            boxes.append([x1,y1,x2,y2])
            scores.append(ann["score"])
            labels.append(ann["category_id"])

        boxes_list.append(boxes)
        scores_list.append(scores)
        labels_list.append(labels)
    
    boxes, scores, labels = ensemble_boxes.weighted_boxes_fusion(
                                            boxes_list, 
                                            scores_list, 
                                            labels_list, 
                                            weights=None, 
                                            iou_thr=iou_thr, 
                                            skip_box_thr=skip_box_thr)

    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box
        
        x1 *= width
        x2 *= width
        y1 *= height
        y2 *= height

        ann = dict(
            image_id=img_id,
            category_id=label,
            bbox=[x1, y1, x2-x1, y2-y1],
            score=score,
            id=cnt_id,
        )

        ensemble.append(ann)
        
        cnt_id += 1

coco_ensemble = coco_gt.loadRes(ensemble)
coco_eval = COCOeval(coco_gt, coco_ensemble, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
print("\n================================================================\n")



Ensemble using Weighted Boxes Fusion
iou thr: 0.7, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=3.40s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=40.61s).
Accumulating evaluation results...
DONE (t=7.61s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.601
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.443
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.243
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.453
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.334
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.545
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ]

## Results

| Models                   | Box AP @(IoU=0.50:0.95, area=all, maxDets=100) |
|-----------------------------|------|
|faster_rcnn_R_50_C4_1x.yaml  | 0.357|
|faster_rcnn_R_50_DC5_1x.yaml | 0.373|
|retinanet_R_50_FPN_1x.yaml   | 0.374|
|__Ensemble Model__   | __*0.403 (+0.029)*__|