# Ensemble of Object Detection Models based on Detectron2



## Import Modules

In [1]:
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.evaluation.coco_evaluation import COCOEvaluator
from detectron2.data.build import build_detection_test_loader
from detectron2.evaluation.evaluator import inference_on_dataset
import torch
from detectron2.evaluation.evaluator import inference_context
from tqdm import tqdm
from detectron2.structures import Instances, Boxes
import numpy as np
import ensemble_boxes


## Prepare Models, Data and Evaluation

In [2]:
register_coco_instances("dataset_val", {}, "./data/coco/annotations/instances_val2017.json", "./data/coco/images/val2017")

In [3]:
model_configs = [
    "faster_rcnn_R_50_C4_1x.yaml",
    "faster_rcnn_R_50_DC5_1x.yaml",
    "retinanet_R_50_FPN_1x.yaml",
]

models = dict()
for config in model_configs:
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(f"COCO-Detection/{config}"))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"COCO-Detection/{config}")
    cfg.DATASETS.VAL = ("dataset_val",)

    models[config] = DefaultPredictor(cfg).model

cfg = get_cfg()
cfg.DATASETS.VAL = ("dataset_val",)
val_loader = build_detection_test_loader(cfg, "dataset_val")

Loading config /home/kyungpyo/git/Ensemble-Object-Detection-using-Detectron2/env/lib/python3.8/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.
The checkpoint state_dict contains keys that are not used by the model:
  [35mpixel_mean[0m
  [35mpixel_std[0m

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.



## Evaluate Baseline Models

In [4]:
for config, model in models.items():
    evaluator = COCOEvaluator("dataset_val", False, output_dir=config)
    evaluator.reset()
    with inference_context(model), torch.no_grad():
        iter = tqdm(val_loader, total=len(val_loader))
        for idx, inputs in enumerate(iter):
            outputs = model(inputs)
            torch.cuda.synchronize()
            evaluator.process(inputs, outputs)

    print("\n================================================================\n")
    print(config)
    print("\n================================================================\n")
    results = evaluator.evaluate()
    print("\n================================================================\n")


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 5000/5000 [10:25<00:00,  8.00it/s]




faster_rcnn_R_50_C4_1x.yaml


Loading and preparing results...
DONE (t=0.53s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.357
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.561
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.380
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.192
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.409
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.487
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.311
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.485
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.506
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.310
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.563
 Averag

100%|██████████| 5000/5000 [05:56<00:00, 14.02it/s]




faster_rcnn_R_50_DC5_1x.yaml


Loading and preparing results...
DONE (t=0.69s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.373
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.587
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.397
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.201
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.417
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.500
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.313
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.488
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.511
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.299
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.565
 Avera

  max_size = (max_size + (stride - 1)) // stride * stride
100%|██████████| 5000/5000 [05:03<00:00, 16.50it/s]




retinanet_R_50_FPN_1x.yaml


Loading and preparing results...
DONE (t=1.08s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.592
 Average

| Baselines                   | Box AP @(IoU=0.50:0.95, area=all, maxDets=100) |
|-----------------------------|------|
|faster_rcnn_R_50_C4_1x.yaml  | 0.357|
|faster_rcnn_R_50_DC5_1x.yaml | 0.373|
|retinanet_R_50_FPN_1x.yaml   | 0.374|

## Ensemble using Non-Maximum Suppression

In [47]:
def build_ensemble_inputs(candidates):
    boxes_list = []
    scores_list = []
    classes_list = []
    
    for candiate in candidates:
        assert len(candiate) == 1
        assert len(candiate[0]) == 1

        instances = candidates[0][0]["instances"]
        image_size = instances.image_size
        pred_boxes = instances.pred_boxes
        scores = instances.scores
        pred_classes = instances.pred_classes

        # normalize
        boxes = pred_boxes.tensor.tolist()
        for box in boxes:
            box[0] = box[0] / image_size[1]
            box[1] = box[1] / image_size[0]
            box[2] = box[2] / image_size[1]
            box[3] = box[3] / image_size[0]

        boxes_list.append(boxes)
        scores_list.append(scores.tolist())
        classes_list.append(pred_classes.tolist())

    return boxes_list, scores_list, classes_list, image_size

def build_instances(boxes, scores, labels, image_size):
    for box in boxes:
        box[0] = box[0] * image_size[1]
        box[1] = box[1] * image_size[0]
        box[2] = box[2] * image_size[1]
        box[3] = box[3] * image_size[0]

    ensemble_instances = Instances(image_size)
    ensemble_instances.pred_boxes = Boxes(torch.as_tensor(boxes))
    ensemble_instances.scores = torch.as_tensor(scores)
    ensemble_instances.pred_classes = torch.as_tensor(labels)

    return [dict(instances=ensemble_instances)]


def predict_ensemble_nms(models, inputs, iou_thr):
    candidates = []
    for config, model in models.items():
        with inference_context(model):
            candidates.append(model(inputs))

    boxes_list, scores_list, classes_list, image_size = build_ensemble_inputs(candidates)

    weights = [1, 1, 1]

    boxes, scores, labels = ensemble_boxes.nms(boxes_list, scores_list, classes_list, weights=weights, iou_thr=iou_thr)
    
    return build_instances(boxes, scores, labels, image_size)

for i in range(4, 10):
    iou_thr = i * 0.1
    with torch.no_grad():
        evaluator.reset()
        iter = tqdm(val_loader, total=len(val_loader))

        for idx, inputs in enumerate(iter):
            outputs = predict_ensemble_nms(models, inputs, iou_thr)
            evaluator.process(inputs, outputs)

    print("\n================================================================\n")
    print("Ensemble using Non-Maximum Suppression")
    print(f"iou_thr: {iou_thr}")
    print("\n================================================================\n")
    results = evaluator.evaluate()
    print("\n================================================================\n")

  max_size = (max_size + (stride - 1)) // stride * stride
100%|██████████| 5000/5000 [20:47<00:00,  4.01it/s]




Ensemble using Non-Maximum Suppression
iou_thr: 0.4


Loading and preparing results...
DONE (t=0.51s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.367
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.559
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.397
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.223
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.408
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.477
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.494
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.512
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.342
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDe

100%|██████████| 5000/5000 [20:45<00:00,  4.01it/s]




Ensemble using Non-Maximum Suppression
iou_thr: 0.5


Loading and preparing results...
DONE (t=1.06s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.517
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.550
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDe

100%|██████████| 5000/5000 [20:48<00:00,  4.01it/s]




Ensemble using Non-Maximum Suppression
iou_thr: 0.6000000000000001


Loading and preparing results...
DONE (t=1.14s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area

100%|██████████| 5000/5000 [20:49<00:00,  4.00it/s]




Ensemble using Non-Maximum Suppression
iou_thr: 0.7000000000000001


Loading and preparing results...
DONE (t=0.70s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area

100%|██████████| 5000/5000 [20:50<00:00,  4.00it/s]




Ensemble using Non-Maximum Suppression
iou_thr: 0.8


Loading and preparing results...
DONE (t=1.07s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDe

100%|██████████| 5000/5000 [20:50<00:00,  4.00it/s]




Ensemble using Non-Maximum Suppression
iou_thr: 0.9


Loading and preparing results...
DONE (t=1.05s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDe

In [38]:
def predict_ensemble_softnms(models, inputs):
    candidates = []
    for _, model in models.items():
        with inference_context(model):
            candidates.append(model(inputs))

    boxes_list, scores_list, classes_list, image_size = build_ensemble_inputs(candidates)
        
    weights = [1, 1, 1]
    iou_thr = 0.5
    skip_box_thr = 0.0001
    sigma = 0.1

    boxes, scores, labels = ensemble_boxes.soft_nms(boxes_list, scores_list, classes_list, weights=weights, iou_thr=iou_thr, sigma=sigma, thresh=skip_box_thr)
    
    return build_instances(boxes, scores, labels, image_size)


with torch.no_grad():
    evaluator.reset()
    iter = tqdm(val_loader, total=len(val_loader))

    for idx, inputs in enumerate(iter):
        outputs = predict_ensemble_softnms(models, inputs)
        evaluator.process(inputs, outputs)

print("\n================================================================\n")
print("Ensemble using Soft Non-Maximum Suppression")
print("\n================================================================\n")
results = evaluator.evaluate()
print("\n================================================================\n")

100%|██████████| 5000/5000 [21:44<00:00,  3.83it/s]




Ensemble using Soft Non-Maximum Suppression


Loading and preparing results...
DONE (t=0.94s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.372
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.565
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.401
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.228
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.412
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.482
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.510
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.538
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.364
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]

In [39]:
def predict_ensemble_non_maximum_weighted(models, inputs):
    candidates = []
    for _, model in models.items():
        with inference_context(model):
            candidates.append(model(inputs))

    boxes_list, scores_list, classes_list, image_size = build_ensemble_inputs(candidates)
        
    weights = [1, 1, 1]
    iou_thr = 0.5
    skip_box_thr = 0.0001

    boxes, scores, labels = ensemble_boxes.non_maximum_weighted(boxes_list, scores_list, classes_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    return build_instances(boxes, scores, labels, image_size)


with torch.no_grad():
    evaluator.reset()
    iter = tqdm(val_loader, total=len(val_loader))

    for idx, inputs in enumerate(iter):
        outputs = predict_ensemble_non_maximum_weighted(models, inputs)
        evaluator.process(inputs, outputs)

print("\n================================================================\n")
print("Ensemble using Soft Non-Maximum Weighted")
print("\n================================================================\n")
results = evaluator.evaluate()
print("\n================================================================\n")

  max_size = (max_size + (stride - 1)) // stride * stride
100%|██████████| 5000/5000 [21:38<00:00,  3.85it/s]




Ensemble using Soft Non-Maximum Weighted


Loading and preparing results...
DONE (t=1.05s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.373
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.402
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.481
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.318
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.516
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.550
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 

In [46]:
def predict_ensemble_weighted_boxes_fusion(models, inputs, param):
    candidates = []
    for _, model in models.items():
        with inference_context(model):
            candidates.append(model(inputs))

    boxes_list, scores_list, classes_list, image_size = build_ensemble_inputs(candidates)


    iou_thr, skip_box_thr = param

    boxes, scores, labels = ensemble_boxes.weighted_boxes_fusion(boxes_list, scores_list, classes_list, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    return build_instances(boxes, scores, labels, image_size)


for i in range(4,10):
    iou_thr = i * 0.1
    skip_box_thr = 0.0001
    with torch.no_grad():
        evaluator.reset()
        iter = tqdm(val_loader, total=len(val_loader))

        for idx, inputs in enumerate(iter):
            outputs = predict_ensemble_weighted_boxes_fusion(models, inputs, (iou_thr, skip_box_thr))
            evaluator.process(inputs, outputs)
    print("\n================================================================\n")
    print("Ensemble using Weighted Boxes Fusion")
    print(f"iou thr: {iou_thr}, skip_box_thr: {skip_box_thr}")
    print("\n================================================================\n")
    results = evaluator.evaluate()
    print("\n================================================================\n")

  max_size = (max_size + (stride - 1)) // stride * stride
100%|██████████| 5000/5000 [21:23<00:00,  3.89it/s]




Ensemble using Weighted Boxes Fusion
iou thr: 0.4, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=0.88s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.246
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.461
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.224
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.169
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.287
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.269
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.257
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.418
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.438
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.313
 Average Recall     (AR) @[ IoU=0.50:0.95 |

100%|██████████| 5000/5000 [21:21<00:00,  3.90it/s]




Ensemble using Weighted Boxes Fusion
iou thr: 0.5, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=1.14s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.363
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.556
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.390
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.230
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.409
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.454
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.314
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.515
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.549
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 |

100%|██████████| 5000/5000 [21:22<00:00,  3.90it/s]




Ensemble using Weighted Boxes Fusion
iou thr: 0.6000000000000001, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=1.12s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.369
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.562
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.397
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.413
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.468
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.318
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ 

100%|██████████| 5000/5000 [21:18<00:00,  3.91it/s]




Ensemble using Weighted Boxes Fusion
iou thr: 0.7000000000000001, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=1.18s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.371
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.564
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.399
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.415
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.474
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.318
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ 

100%|██████████| 5000/5000 [21:22<00:00,  3.90it/s]




Ensemble using Weighted Boxes Fusion
iou thr: 0.8, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=1.13s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.373
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.566
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.402
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.481
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 |

100%|██████████| 5000/5000 [21:23<00:00,  3.90it/s]




Ensemble using Weighted Boxes Fusion
iou thr: 0.9, skip_box_thr: 0.0001


Loading and preparing results...
DONE (t=1.12s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.319
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.551
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372
 Average Recall     (AR) @[ IoU=0.50:0.95 |

In [None]:
with torch.no_grad():
    torch.cuda.empty_cache()

    model_config = [
        "retinanet_R_50_FPN_1x.yaml",
        "faster_rcnn_R_50_DC5_1x.yaml",
        "faster_rcnn_R_50_C4_1x.yaml",
    ]

    for config in model_config:
        print("\n================================================================\n")
        print(config )
        print("\n================================================================\n")

        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file(f"COCO-Detection/{config}"))
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"COCO-Detection/{config}")
        cfg.DATASETS.VAL = ("dataset_val",)

        predictor = DefaultPredictor(cfg)

        evaluator = COCOEvaluator("dataset_val", False, output_dir="output")
        val_loader = build_detection_test_loader(cfg, "dataset_val")
        inference_on_dataset(predictor.model, val_loader, evaluator)

        print("\n================================================================\n")
