In [1]:
import json
import numpy as np
import os
import torch
import tqdm
import time
from shutil import copyfile

# Detectron imports
from detectron2.engine import launch
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model
from detectron2.data import build_detection_test_loader, MetadataCatalog

from detectron2.utils import comm
# Project imports
import core.datasets.metadata as metadata

from core.setup import setup_config, setup_arg_parser
from offline_evaluation import compute_average_precision, compute_probabilistic_metrics, compute_calibration_errors
from probabilistic_inference.probabilistic_inference import build_predictor
from probabilistic_inference.inference_utils import instances_to_json

from train_utils import ActiveTrainer, compute_cls_entropy, compute_cls_max_conf
from mp_utils import parallel_predict

from torch.nn.parallel import DistributedDataParallel

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
import json
import shutil
import os
import tqdm
import datetime
import logging
import time
from collections import OrderedDict
from contextlib import contextmanager
import torch

import multiprocessing as mp
import numpy as np

from detectron2.utils.comm import get_world_size, is_main_process
from detectron2.utils.logger import log_every_n_seconds

from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model
from detectron2.data import build_detection_test_loader, MetadataCatalog

from detectron2.data.build import DatasetMapper, get_detection_dataset_dicts
from detectron2.data.common import MapDataset, DatasetFromList
from detectron2.data.samplers import TrainingSampler, InferenceSampler

import core.datasets.metadata as metadata

from probabilistic_inference.probabilistic_inference import build_predictor
from train_utils import ActiveTrainer, compute_cls_entropy, compute_cls_max_conf

import concurrent.futures
import time

from probabilistic_inference.inference_utils import instances_to_json

In [3]:
arg_parser = setup_arg_parser()
args = arg_parser.parse_args("")
# Support single gpu inference only.
args.num_gpus = 4
#args.dataset_dir = '/public-dataset/BDD/bdd100k'
#args.test_dataset = 'bdd_val'
args.dataset_dir = '~/datasets/VOC2012'
args.test_dataset = 'cocovoc2012_val'
#args.config_file = '/home/richard.tanai/cvpr2/pod_compare/src/configs/BDD-Detection/retinanet/active_retinanet_R_50_FPN_1x_reg_cls_var_dropout.yaml'
args.config_file = '/home/richard.tanai/cvpr2/pod_compare/src/configs/VOC-Detection/retinanet/ent_10.yaml'
args.inference_config = '/home/richard.tanai/cvpr2/pod_compare/src/configs/Inference/bayes_od_mc_dropout.yaml'
args.random_seed = 2000
args.resume=False
print("Command Line Args:", args)

Command Line Args: Namespace(config_file='/home/richard.tanai/cvpr2/pod_compare/src/configs/VOC-Detection/retinanet/ent_10.yaml', dataset_dir='~/datasets/VOC2012', dist_url='tcp://127.0.0.1:50162', eval_only=False, inference_config='/home/richard.tanai/cvpr2/pod_compare/src/configs/Inference/bayes_od_mc_dropout.yaml', iou_correct=0.7, iou_min=0.1, machine_rank=0, min_allowed_score=0.0, num_gpus=4, num_machines=1, opts=[], random_seed=2000, resume=False, test_dataset='cocovoc2012_val')


In [4]:
cfg = setup_config(args, random_seed=args.random_seed, is_testing=False)

#cfg = setup_config(args, random_seed=args.random_seed, is_testing=True)

# Make sure only 1 data point is processed at a time. This simulates
# deployment.
cfg.defrost()
cfg.DATALOADER.NUM_WORKERS = 32
#cfg.SOLVER.IMS_PER_BATCH = 1

cfg.MODEL.DEVICE = device.type

# Set up number of cpu threads
torch.set_num_threads(cfg.DATALOADER.NUM_WORKERS)

# Create inference output directory and copy inference config file to keep
# track of experimental settings
inference_output_dir = os.path.join(
    cfg['OUTPUT_DIR'],
    'inference',
    args.test_dataset,
    os.path.split(args.inference_config)[-1][:-5])
os.makedirs(inference_output_dir, exist_ok=True)
copyfile(args.inference_config, os.path.join(
    inference_output_dir, os.path.split(args.inference_config)[-1]))

# Get category mapping dictionary:
train_thing_dataset_id_to_contiguous_id = MetadataCatalog.get(
    cfg.DATASETS.TRAIN[0]).thing_dataset_id_to_contiguous_id
test_thing_dataset_id_to_contiguous_id = MetadataCatalog.get(
    args.test_dataset).thing_dataset_id_to_contiguous_id

# If both dicts are equal or if we are performing out of distribution
# detection, just flip the test dict.
if (train_thing_dataset_id_to_contiguous_id == test_thing_dataset_id_to_contiguous_id) or (
        cfg.DATASETS.TRAIN[0] == 'coco_not_in_voc_2017_train'):
    cat_mapping_dict = dict(
        (v, k) for k, v in test_thing_dataset_id_to_contiguous_id.items())
else:
    # If not equal, two situations: 1) BDD to KITTI and 2) COCO to PASCAL
    cat_mapping_dict = dict(
        (v, k) for k, v in test_thing_dataset_id_to_contiguous_id.items())
    if 'voc' in args.test_dataset and 'coco' in cfg.DATASETS.TRAIN[0]:
        dataset_mapping_dict = dict(
            (v, k) for k, v in metadata.COCO_TO_VOC_CONTIGUOUS_ID.items())
    elif 'kitti' in args.test_dataset and 'bdd' in cfg.DATASETS.TRAIN[0]:
        dataset_mapping_dict = dict(
            (v, k) for k, v in metadata.BDD_TO_KITTI_CONTIGUOUS_ID.items())
    else:
        ValueError(
            'Cannot generate category mapping dictionary. Please check if training and inference datasets are compatible.')
    cat_mapping_dict = dict(
        (dataset_mapping_dict[k], v) for k, v in cat_mapping_dict.items())

# Build predictor
model = build_model(cfg)



if comm.get_world_size() > 1:
    print(f"Using GPUs {comm.get_local_rank()}")
    model = DistributedDataParallel(model, device_ids=[comm.get_local_rank()], broadcast_buffers=False)
#
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(cfg.MODEL.WEIGHTS, resume=False)
#model = DistributedDataParallel(model, device_ids=[1,2,3,4], broadcast_buffers=False)

trainer = ActiveTrainer(cfg, model)

train_step = 1
max_step = cfg.ACTIVE_LEARNING.MAX_STEP
label_per_step = cfg.ACTIVE_LEARNING.STEP_N
out_dir = cfg.ACTIVE_LEARNING.OUT_DIR
det_cls_score = cfg.ACTIVE_LEARNING.DET_CLS_SCORE
det_cls_merge_mode = cfg.ACTIVE_LEARNING.DET_CLS_MERGE_MODE
w_cls_score = cfg.ACTIVE_LEARNING.W_CLS_SCORE

os.makedirs(out_dir, exist_ok=True)

process_gpu_list = [0, 1, 2, 3]

start = time.perf_counter()

while(1):
    print(f"performing train step {train_step}")
    trainer.train()
    model_full_path = f"{out_dir}/checkpoint_step{train_step}.pth"
    torch.save(model.state_dict(), model_full_path)

    if len(trainer.dataset.pool) <= 0 or train_step > max_step:
        print("training completed")
        break
    if comm.is_main_process():
        if det_cls_score == 'random':
            if len(trainer.dataset.pool) >= label_per_step:
                trainer.dataset.label_randomly(label_per_step)
            elif len(trainer.dataset.pool) > 0:
                trainer.dataset.label_randomly(len(trainer.dataset.pool))
            else:
                break

        else:
            pool_dataset = trainer.dataset.pool

            final_output_list, cls_score_list, box_score_list = parallel_predict(cfg, model_full_path, cat_mapping_dict, pool_dataset, process_gpu_list)

            cls_score_rank = np.array(cls_score_list).argsort().argsort()
            box_score_rank = (-np.array(box_score_list)).argsort().argsort()

            #possible weighted fusion can be added here
            total_sort = np.argsort((w_cls_score)*cls_score_rank + (1-w_cls_score)*box_score_rank)

            if len(trainer.dataset.pool) >= label_per_step:
                idx_to_label = total_sort[:label_per_step].tolist()
                trainer.dataset.label(idx_to_label)
            elif len(trainer.dataset.pool) > 0:
                trainer.dataset.label_randomly(len(trainer.dataset.pool))
            else:
                break

    trainer.rebuild_trainer()
    train_step += 1

finish = time.perf_counter()
print(f'Finished Active Learning Loop in {round(finish-start, 2)} second(s)')

Loading config /home/richard.tanai/cvpr2/pod_compare/src/configs/VOC-Detection/retinanet/../../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.
Config '/home/richard.tanai/cvpr2/pod_compare/src/configs/Inference/bayes_od_mc_dropout.yaml' has no VERSION. Assuming it to be compatible with latest v2.


[32m[02/24 13:06:14 detectron2]: [0mRank of current process: 0. World size: 1
[32m[02/24 13:06:14 detectron2]: [0mEnvironment info:
----------------------  ----------------------------------------------------------------------
sys.platform            linux
Python                  3.8.5 (default, Sep  4 2020, 07:30:14) [GCC 7.3.0]
numpy                   1.19.5
detectron2              0.3 @/opt/anaconda3/envs/pod/lib/python3.8/site-packages/detectron2
Compiler                GCC 7.3
CUDA compiler           CUDA 10.2
detectron2 arch flags   3.7, 5.0, 5.2, 6.0, 6.1, 7.0, 7.5
DETECTRON2_ENV_MODULE   <not set>
PyTorch                 1.7.1 @/opt/anaconda3/envs/pod/lib/python3.8/site-packages/torch
PyTorch debug build     False
GPU available           True
GPU 0,1,2,3             Tesla V100-SXM2-32GB (arch=7.0)
CUDA_HOME               /usr/local/cuda
Pillow                  8.1.0
torchvision             0.8.2 @/opt/anaconda3/envs/pod/lib/python3.8/site-packages/torchvision
torchvision ar

[32m[02/24 13:06:14 detectron2]: [0mFull config saved to /home/richard.tanai/cvpr2/pod_compare/data/VOC-Detection/retinanet/ent_10/random_seed_2000/config.yaml
[32m[02/24 13:06:18 fvcore.common.checkpoint]: [0mLoading checkpoint from detectron2://ImageNetPretrained/MSRA/R-50.pkl
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mRemapping C2 weights ......
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.0.conv1.norm.bias            loaded from res2_0_branch2a_bn_beta           of shape (64,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.0.conv1.norm.running_mean    loaded from res2_0_branch2a_bn_running_mean   of shape (64,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.0.conv1.norm.running_var     loaded from res2_0_branch2a_bn_running_var    of shape (64,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.0.conv1.norm.weight          loa

[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.2.conv3.norm.bias            loaded from res2_2_branch2c_bn_beta           of shape (256,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.2.conv3.norm.running_mean    loaded from res2_2_branch2c_bn_running_mean   of shape (256,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.2.conv3.norm.running_var     loaded from res2_2_branch2c_bn_running_var    of shape (256,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.2.conv3.norm.weight          loaded from res2_2_branch2c_bn_gamma          of shape (256,)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res2.2.conv3.weight               loaded from res2_2_branch2c_w                 of shape (256, 64, 1, 1)
[32m[02/24 13:06:18 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.0.conv1.norm.bias            loaded from res

[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.2.conv2.norm.running_var     loaded from res3_2_branch2b_bn_running_var    of shape (128,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.2.conv2.norm.weight          loaded from res3_2_branch2b_bn_gamma          of shape (128,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.2.conv2.weight               loaded from res3_2_branch2b_w                 of shape (128, 128, 3, 3)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.2.conv3.norm.bias            loaded from res3_2_branch2c_bn_beta           of shape (512,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.2.conv3.norm.running_mean    loaded from res3_2_branch2c_bn_running_mean   of shape (512,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res3.2.conv3.norm.running_var     loaded from re

[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.1.conv1.weight               loaded from res4_1_branch2a_w                 of shape (256, 1024, 1, 1)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.1.conv2.norm.bias            loaded from res4_1_branch2b_bn_beta           of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.1.conv2.norm.running_mean    loaded from res4_1_branch2b_bn_running_mean   of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.1.conv2.norm.running_var     loaded from res4_1_branch2b_bn_running_var    of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.1.conv2.norm.weight          loaded from res4_1_branch2b_bn_gamma          of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.1.conv2.weight               loaded from r

[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.4.conv2.norm.running_mean    loaded from res4_4_branch2b_bn_running_mean   of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.4.conv2.norm.running_var     loaded from res4_4_branch2b_bn_running_var    of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.4.conv2.norm.weight          loaded from res4_4_branch2b_bn_gamma          of shape (256,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.4.conv2.weight               loaded from res4_4_branch2b_w                 of shape (256, 256, 3, 3)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.4.conv3.norm.bias            loaded from res4_4_branch2c_bn_beta           of shape (1024,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res4.4.conv3.norm.running_mean    loaded from r

[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res5.1.conv1.norm.weight          loaded from res5_1_branch2a_bn_gamma          of shape (512,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res5.1.conv1.weight               loaded from res5_1_branch2a_w                 of shape (512, 2048, 1, 1)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res5.1.conv2.norm.bias            loaded from res5_1_branch2b_bn_beta           of shape (512,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res5.1.conv2.norm.running_mean    loaded from res5_1_branch2b_bn_running_mean   of shape (512,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res5.1.conv2.norm.running_var     loaded from res5_1_branch2b_bn_running_var    of shape (512,)
[32m[02/24 13:06:19 d2.checkpoint.c2_model_loading]: [0mbackbone.bottom_up.res5.1.conv2.norm.weight          loaded from r

performing train step 1
[32m[02/24 13:06:19 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/24 13:07:07 d2.utils.events]: [0m eta: 0:01:08  iter: 99  total_loss: 1.511  loss_cls: 0.959  loss_box_reg: 0.5706  time: 0.4544  data_time: 0.0087  lr: 0.00099001  max_mem: 16795M
[32m[02/24 13:07:51 d2.utils.events]: [0m eta: 0:00:22  iter: 199  total_loss: 1.758  loss_cls: 1.119  loss_box_reg: 0.6228  time: 0.4501  data_time: 0.0089  lr: 0.001  max_mem: 16795M
[32m[02/24 13:08:14 fvcore.common.checkpoint]: [0mSaving checkpoint to /home/richard.tanai/cvpr2/pod_compare/data/VOC-Detection/retinanet/ent_10/random_seed_2000/model_final.pth
[32m[02/24 13:08:14 d2.utils.events]: [0m eta: 0:00:00  iter: 249  total_loss: 1.587  loss_cls: 1.023  loss_box_reg: 0.5535  time: 0.4496  data_time: 0.0086  lr: 0.001  max_mem: 16795M
[32m[02/24 13:08:14 d2.engine.hooks]: [0mOverall training speed: 248 iterations in 0:01:51 (0.4497 s / it)
[32m[02/24 13:08:14 d2.engine.hooks]: 

KeyError: "No object named 'ProbabilisticRetinaNet' found in 'META_ARCH' registry!"