In [1]:
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg

In [2]:
from detectron2.utils.logger import setup_logger
setup_logger()
import numpy as np

In [13]:
class Detectron2:
    def __init__(self, cfg_path, weights_path ):
        self.cfg = get_cfg()
        self.cfg.merge_from_file(cfg_path)
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
        self.cfg.MODEL.WEIGHTS = weights_path
        self.cfg.MODEL.DEVICE = "cuda"
        self.predictor = DefaultPredictor(self.cfg)

    def bbox(self, img):
        rows = np.any(img, axis=1)
        cols = np.any(img, axis=0)
        rmin, rmax = np.where(rows)[0][[0, -1]]
        cmin, cmax = np.where(cols)[0][[0, -1]]
        return cmin, rmin, cmax, rmax

    def detect(self, im):
        outputs = self.predictor(im)
        boxes = outputs["instances"].pred_boxes.tensor.cpu().numpy()
        classes = outputs["instances"].pred_classes.cpu().numpy()
        scores = outputs["instances"].scores.cpu().numpy()

        bbox_xyxy, cls_conf, cls_ids = [], [], []

        for (box, _class, score) in zip(boxes, classes, scores):

            if _class >= 0:
                x0, y0, x1, y1 = box
                bbox_xyxy.append(box)
                cls_conf.append(score)
                cls_ids.append(_class)

        return np.array(bbox_xyxy, dtype=np.float64), np.array(cls_conf), np.array(cls_ids)

In [4]:
import sys

In [5]:
sys.path.insert(0, "..")

In [6]:
from util import draw_bboxes

In [14]:
cf_p = "/data/drone_experiments/exp1/weights/config.yaml"
w_p = "/data/drone_experiments/exp1/weights/model_0111599.pth"
det = Detectron2(cf_p, w_p)

In [15]:
import cv2

In [16]:
im = cv2.imread("/nfs/gpu14_datasets/drone_datasets/visdrone/Task-1/VisDrone2019-DET-train/images/0000002_00005_d_0000014.jpg")
bbox, cls_conf, cls_id = det.detect(im)

In [17]:
bbox

array([[ 55.42174149, 338.27487183,  66.65877533, 357.55639648],
       [ 46.96202087, 389.12811279,  58.28924179, 409.92724609],
       [898.3336792 , 223.84062195, 906.63964844, 240.69818115],
       [193.8644104 , 233.23147583, 201.37887573, 246.21234131],
       [ 53.53175735, 311.24551392,  63.49562836, 329.12167358],
       [221.47994995, 344.52731323, 232.79364014, 360.77288818],
       [881.04504395, 255.82118225, 891.04034424, 272.59277344],
       [117.27410126, 199.95082092, 125.26366425, 213.59176636],
       [111.02787018,  98.33027649, 116.03022766, 108.52578735],
       [137.26908875, 113.86061859, 141.86730957, 121.92194366],
       [177.96107483, 232.46159363, 185.67141724, 244.75863647],
       [220.83100891, 116.60059357, 225.6178894 , 126.64091492],
       [ 85.69707489, 192.99736023,  94.08559418, 210.08642578],
       [158.18556213,  59.32807541, 162.15684509,  66.1604538 ],
       [110.20477295,  96.55470276, 115.3157959 , 104.83091736],
       [233.103302  , 230

In [18]:
cls_id

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [19]:
im = draw_bboxes(im, bbox)

In [21]:
from matplotlib import pyplot as plt

In [24]:
from IPython.display import Video