In [9]:
import yaml
import os
from mmdet.apis import init_detector, inference_detector
from jetnet.utils import download, make_parent_dir
from torch2trt import torch2trt
import torch

def find_mm_models(root):
    models = []
    root_model_index_path = os.path.join(root, 'model-index.yml')
    with open(root_model_index_path, 'r') as f:
        root_model_index = yaml.load(f)
    for model_index_path in root_model_index['Import']:
        
        model_index_path = os.path.join(root, model_index_path)
        
        with open(model_index_path, 'r') as f:
            model_index = yaml.load(f)
        for model in model_index['Models']:
            models.append(model)
            
    return models


def find_mmdet_models():
    return find_mm_models(os.environ['MMDET_DIR'])


def find_mmocr_models():
    return find_mm_models(os.environ['MMOCR_DIR'])

def is_task(model, task):
    for res in model['Results']:
        if res['Task'] == task:
            return True
    return False
    return len(list(result for result in model['Results'] if result))


INSTANCE_SEGMENTATION = 'Instance Segmentation'
OBJECT_DETECTION = 'Object Detection'


def find_mmdet_instance_segmentation_models():
    return [m for m in find_mmdet_models() if is_task(m, INSTANCE_SEGMENTATION)]


def find_mmdet_object_detection_models():
    return [m for m in find_mmdet_models() if is_task(m, OBJECT_DETECTION)]
    
def init_detector_by_name(name, mmdet_dir=None, weights_dir="data/mmdet"):
    
    if mmdet_dir is None:
        mmdet_dir = os.environ.get('MMDET_DIR')
    assert mmdet_dir is not None
    
    model = next(model for model in find_mmdet_models() if model['Name'] == name)
    config = os.path.join(os.environ['MMDET_DIR'], model['Config'])
    weights_url = model['Weights']
    weights_path = os.path.join(weights_dir, os.path.basename(weights_url))
    if not os.path.exists(weights_path):
        make_parent_dir(weights_path)
        download(weights_url, weights_path)
    
    return init_detector(config, weights_path)

def get_feat_strides(cfg):
    return cfg['model']['roi_head']['bbox_roi_extractor']['featmap_strides']

def get_neck_input_shapes(cfg, shape):
    height, width = shape
    strides = get_feat_strides(cfg)
    in_channels = cfg['model']['neck']['in_channels']
    return [[1, ic, height // s, width // s] for ic, s in zip(in_channels, strides)]

def get_bbox_max_input_shapes(cfg):
    roi_size = det.cfg['model']['roi_head']['bbox_head']['roi_feat_size']
    in_channels = det.cfg['model']['roi_head']['bbox_head']['in_channels']
    max_count = det.cfg['model']['test_cfg']['rpn']['max_per_img']
    return [[max_count, in_channels, roi_size, roi_size]]

def get_bbox_min_input_shapes(cfg):
    shape = get_bbox_max_input_shapes(cfg)
    shape[0][0] = 1
    return shape

def get_mask_max_input_shapes(cfg):
    max_count = det.cfg['model']['test_cfg']['rcnn']['max_per_img']
    roi_size = det.cfg['model']['roi_head']['mask_roi_extractor']['roi_layer']['output_size']
    in_channels = det.cfg['model']['roi_head']['mask_head']['in_channels']
    return [[max_count, in_channels, roi_size, roi_size]]

def get_mask_min_input_shapes(cfg):
    shape = get_mask_max_input_shapes(cfg)
    shape[0][0] = 1
    return shape

def get_shapes(cfg, min_shape, max_shape, opt_shape):
    min_shape = list(min_shape)
    max_shape = list(max_shape)
    opt_shape = list(opt_shape)
    shapes = {
        'bbox': {
            'min': get_bbox_min_input_shapes(cfg),
            'max': get_bbox_max_input_shapes(cfg),
            'opt': get_bbox_min_input_shapes(cfg)
        },
        'neck': {
            'min': get_neck_input_shapes(cfg, min_shape),
            'max': get_neck_input_shapes(cfg, max_shape),
            'opt': get_neck_input_shapes(cfg, opt_shape)
        },
        'backbone': {
            'min': [[1, 3] + min_shape],
            'max': [[1, 3] + max_shape],
            'opt': [[1, 3] + opt_shape]
        },
        'mask': {
            'min': get_mask_min_input_shapes(cfg),
            'max': get_mask_max_input_shapes(cfg),
            'opt': get_mask_min_input_shapes(cfg)
        }
    }
    return shapes

def make_inputs(desc):
    return [torch.randn(d).cuda() for d in desc['opt']]

def mmdet_mask_rcnn_build_torch2trt_modules(det, min_shape, max_shape, opt_shape, fp16_mode=False):
    backbone = det.backbone
    neck = det.neck
    bbox = det.roi_head.bbox_head
    mask = det.roi_head.mask_head
    
    shapes = get_shapes(det.cfg, min_shape, max_shape, opt_shape)
    
    def _run_torch2trt(module, desc, fp16_mode, expand=True):
        print(f"Optimizing... {desc}")
        inputs = make_inputs(desc)
        if not expand:
            inputs = [inputs]
            min_shapes = [desc['min']]
            max_shapes = [desc['max']]
            opt_shapes = [desc['opt']]
        else:
            min_shapes = desc['min']
            max_shapes = desc['max']
            opt_shapes = desc['opt']
            
        return torch2trt(
            module,
            inputs,
            fp16_mode=fp16_mode,
            use_onnx=True,
            min_shapes=min_shapes,
            max_shapes=max_shapes,
            opt_shapes=opt_shapes,
            onnx_opset=11
        )
    
    backbone_trt = _run_torch2trt(backbone, shapes['backbone'], fp16_mode)
    neck_trt = _run_torch2trt(neck, shapes['neck'], fp16_mode, expand=False)
    bbox_trt = _run_torch2trt(bbox, shapes['bbox'], fp16_mode)
    mask_trt = _run_torch2trt(mask, shapes['mask'], fp16_mode)
    return {
        "backbone": backbone_trt,
        "neck": neck_trt,
        "bbox": bbox_trt,
        "mask": mask_trt
    }


def mmdet_mask_rcnn_inject_torch2trt_modules(det, modules):
    det.backbone = modules['backbone']
    det.neck = modules['neck']
    det.bbox = modules['bbox']
    det.mask = modules['mask']
    return det


det = init_detector_by_name("mask_rcnn_r50_caffe_fpn_1x_coco")

torch2trt_modules = mmdet_mask_rcnn_build_torch2trt_modules(det, [256, 320], [800, 1344], [800, 1344], fp16_mode=True)


  root_model_index = yaml.load(f)
  model_index = yaml.load(f)


load checkpoint from local path: data/mmdet/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth
Optimizing... {'min': [[1, 3, 256, 320]], 'max': [[1, 3, 800, 1344]], 'opt': [[1, 3, 800, 1344]]}
Optimizing... {'min': [[1, 256, 64, 80], [1, 512, 32, 40], [1, 1024, 16, 20], [1, 2048, 8, 10]], 'max': [[1, 256, 200, 336], [1, 512, 100, 168], [1, 1024, 50, 84], [1, 2048, 25, 42]], 'opt': [[1, 256, 200, 336], [1, 512, 100, 168], [1, 1024, 50, 84], [1, 2048, 25, 42]]}
Optimizing... {'min': [[1, 256, 7, 7]], 'max': [[1000, 256, 7, 7]], 'opt': [[1, 256, 7, 7]]}
Optimizing... {'min': [[1, 256, 14, 14]], 'max': [[100, 256, 14, 14]], 'opt': [[1, 256, 14, 14]]}


In [10]:
def mmdet_mask_rcnn_inject_torch2trt_modules(det, modules):
    det.backbone = modules['backbone']
    det.neck = modules['neck']
    det.bbox = modules['bbox']
    det.mask = modules['mask']
    return det

In [11]:
det_trt = mmdet_mask_rcnn_inject_torch2trt_modules(det, torch2trt_modules)

In [12]:
det = init_detector_by_name("mask_rcnn_r50_caffe_fpn_1x_coco")

  root_model_index = yaml.load(f)
  model_index = yaml.load(f)


load checkpoint from local path: data/mmdet/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth


In [158]:
det_trt.neck.engine

In [186]:
get_neck_input_shapes(det.cfg, (800, 1344))

[[1, 256, 200, 336], [1, 512, 100, 168], [1, 1024, 50, 84], [1, 2048, 25, 42]]

In [7]:
neck_trt = torch2trt(det.neck, [[torch.randn(t).cuda() for t in get_neck_input_shapes(det.cfg, (800, 1344))]],
    min_shapes=[[t for t in get_neck_input_shapes(det.cfg, (256, 320))]],
    max_shapes=[[t for t in get_neck_input_shapes(det.cfg, (800, 1344))]],
    opt_shapes=[[t for t in get_neck_input_shapes(det.cfg, (800, 1344))]],
    use_onnx=True,
    onnx_opset=11
)

In [8]:
neck_trt.engine

<tensorrt.tensorrt.ICudaEngine at 0x7f7d49630fb0>

In [192]:
det_trt.neck = neck_trt

In [22]:
import cv2
image = cv2.imread('assets/dog.jpg')


In [24]:
import time
t0 = time.perf_counter()
for i in range(100):
    out = inference_detector(det, image)
t1 = time.perf_counter()

print(100 / (t1 - t0))

18.108974122227274


In [19]:
inference_detector(det_trt, 'assets/dog.jpg')

([array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([[8.1191345e+01, 1.1784702e+02, 6.4000000e+02, 4.2525021e+02,
          6.5555938e-02]], dtype=float32),
  array([[9.8559860e+01, 6.8717400e+01, 6.4000000e+02, 4.2486740e+02,
          9.1210753e-01],
         [1.5698442e+02, 6.7244896e+01, 4.3176617e+02, 2.8106924e+02,
          4.4710591e-01]], dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  arra

In [16]:
out

([array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([[8.1191345e+01, 1.1784702e+02, 6.4000000e+02, 4.2525021e+02,
          6.5555938e-02]], dtype=float32),
  array([[9.8559860e+01, 6.8717400e+01, 6.4000000e+02, 4.2486740e+02,
          9.1210753e-01],
         [1.5698442e+02, 6.7244896e+01, 4.3176617e+02, 2.8106924e+02,
          4.4710591e-01]], dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  arra

In [142]:
mod = det.neck
mod_in = []
mod_out = []

def mod_track(m, input, output):
    mod_in.append(input)
    mod_out.append(output)
    
    
hook = mod.register_forward_hook(mod_track)
image = cv2.imread("assets/dog.jpg")
# image = cv2.resize(image, (1280, 32))
width, height = 1216, 800
inference_detector(det, image)

hook.remove()

print("IN")
for o in mod_in[0]:
    if isinstance(o, tuple):
        print([of.shape for of in o])
    else:
        print(o.shape)
        
print("OUT")

for o in mod_out[0]:
    if isinstance(o, (tuple, list)):
        print([of.shape for of in o])
    else:
        print(o.shape)
        
print("FEAT_STRIDES")
print(get_feat_strides(det.cfg))

# print("NECK IN")
# print(get_neck_input_shapes(det.cfg, height, width))
# print("BBOX IN")
# print(get_bbox_max_input_shapes(det.cfg))
# print("MASK IN")
# print(get_mask_max_input_shapes(det.cfg))

IN
[torch.Size([1, 256, 200, 304]), torch.Size([1, 512, 100, 152]), torch.Size([1, 1024, 50, 76]), torch.Size([1, 2048, 25, 38])]
OUT
torch.Size([1, 256, 200, 304])
torch.Size([1, 256, 100, 152])
torch.Size([1, 256, 50, 76])
torch.Size([1, 256, 25, 38])
torch.Size([1, 256, 13, 19])
FEAT_STRIDES
[4, 8, 16, 32]


In [130]:
1344, 800

(1344, 800)

In [137]:
get_shapes(det.cfg, (256, 320), (800, 1344), (800, 1344))

{'bbox': {'min': [[1, 256, 7, 7]],
  'max': [[1000, 256, 7, 7]],
  'opt': [[1, 256, 7, 7]]},
 'neck': {'min': [[1, 256, 64, 80],
   [1, 512, 32, 40],
   [1, 1024, 16, 20],
   [1, 2048, 8, 10]],
  'max': [[1, 256, 200, 336],
   [1, 512, 100, 168],
   [1, 1024, 50, 84],
   [1, 2048, 25, 42]],
  'opt': [[1, 256, 200, 336],
   [1, 512, 100, 168],
   [1, 1024, 50, 84],
   [1, 2048, 25, 42]]},
 'backbone': {'min': [[1, 3, 256, 320]],
  'max': [[1, 3, 800, 1344]],
  'opt': [[1, 3, 800, 1344]]},
 'mask': {'min': [[1, 256, 14, 14]],
  'max': [[100, 256, 14, 14]],
  'opt': [[1, 256, 14, 14]]}}

In [122]:
print(det.cfg['model']['roi_head']['mask_roi_extractor']['roi_layer']['output_size'])
print(det.cfg['model']['roi_head']['mask_head']['in_channels'])
det.cfg['model']['roi_head']

14
256


{'type': 'StandardRoIHead',
 'bbox_roi_extractor': {'type': 'SingleRoIExtractor',
  'roi_layer': {'type': 'RoIAlign', 'output_size': 7, 'sampling_ratio': 0},
  'out_channels': 256,
  'featmap_strides': [4, 8, 16, 32]},
 'bbox_head': {'type': 'Shared2FCBBoxHead',
  'in_channels': 256,
  'fc_out_channels': 1024,
  'roi_feat_size': 7,
  'num_classes': 80,
  'bbox_coder': {'type': 'DeltaXYWHBBoxCoder',
   'target_means': [0.0, 0.0, 0.0, 0.0],
   'target_stds': [0.1, 0.1, 0.2, 0.2]},
  'reg_class_agnostic': False,
  'loss_cls': {'type': 'CrossEntropyLoss',
   'use_sigmoid': False,
   'loss_weight': 1.0},
  'loss_bbox': {'type': 'L1Loss', 'loss_weight': 1.0}},
 'mask_roi_extractor': {'type': 'SingleRoIExtractor',
  'roi_layer': {'type': 'RoIAlign', 'output_size': 14, 'sampling_ratio': 0},
  'out_channels': 256,
  'featmap_strides': [4, 8, 16, 32]},
 'mask_head': {'type': 'FCNMaskHead',
  'num_convs': 4,
  'in_channels': 256,
  'conv_out_channels': 256,
  'num_classes': 80,
  'loss_mask': {'t

In [119]:
det.cfg['model']['test_cfg']['rpn']['max_per_img']

1000

In [97]:
get_feat_strides(det.cfg)

[4, 8, 16, 32]

In [99]:
det.cfg['model']['neck']

{'type': 'FPN',
 'in_channels': [256, 512, 1024, 2048],
 'out_channels': 256,
 'num_outs': 5}

In [96]:
from dataclasses import dataclass
from typing import Sequence

det.cfg['model']

feat_strides = det.cfg['model']['roi_head']['bbox_roi_extractor']['featmap_strides']
bbox_roi_size = det.cfg['model']['roi_head']['bbox_head']['roi_feat_size']
mask_roi_size = det.cfg['model']['roi_head']['mask_roi_extractor']['roi_layer']['output_size']


@dataclass
class Torch2trtInput:
    trace: Sequence[int]
    min: Sequence[int]
    max: Sequence[int]
    opt: Sequence[int]
        
        
def get_feat_strides(cfg):
    return cfg['model']['roi_head']['bbox_roi_extractor']['featmap_strides']
    
def get_rpn_input_shapes(cfg, height, width):
    """get rpn shapes from backbone width height and config"""
    strides = cfg['model']['rpn_head']['anchor_generator']['strides']
    in_channels = cfg['model']['rpn_head']['in_channels']
    return [[1, in_channels, height // s, width // s] for s in strides]

def get_neck_input_shapes(cfg, height, width):
    strides = cfg['model']['roi_head']['bbox_roi_extractor']['featmap_strides']
    in_channels = cfg['model']['neck']['in_channels']
    return [[1, ic, height // s, width // s] for ic, s in zip(in_channels, strides)]

def get_bbox_head_shapes(cfg, height, width):
    return get_rpn_shapes(cfg, height, width)

def torch2trt_mmdet_instance_seg(detector, opt_shape, max_shape, min_shape):
    
    
    feat_strides = detector.cfg['model']['roi_head']['bbox_roi_extractor']['featmap_strides']
    bbox_roi_size = detector.cfg['model']['roi_head']['bbox_head']['roi_feat_size']
    mask_roi_size = detector.cfg['model']['roi_head']['mask_roi_extractor']['roi_layer']['output_size']
    
    # get neck shape
    neck_channels = detector.cfg['model']['roi_head']['bbox_roi_extractor']['featmap_strides']
    # get rpn shape
    rpn_strides = detector.cfg['model']['rpn_head']['anchor_generator']['strides']
    rpn_channels = detector.cfg['model']['rpn_head']['in_channels']
    rpn_shapes = None
    
    input_channels
    neck_input_channels = [...]
    bbox_input_channels = [...]
    rpn_shapes = [[1, rpn_in_channels, stride, stride], ...] # x5 anchor gen strides
    
    
    backbone_input_shape = None
    neck_input_shape = None
    rpn_input_shape = None
    bbox_head_input_shape = None
    mask_head_input_shape = None

In [61]:
closest_multiple_gt(, 32)

800

In [71]:
get_neck_shapes(det.cfg, 800, 1216)

[[1, 256, 200, 304], [1, 512, 100, 152], [1, 1024, 50, 76], [1, 2048, 25, 38]]

In [74]:
get_rpn_shapes(det.cfg, 800, 1216)

[[1, 256, 200, 304],
 [1, 256, 100, 152],
 [1, 256, 50, 76],
 [1, 256, 25, 38],
 [1, 256, 12, 19]]

In [49]:
import cv2

cv2.imread('assets/dog.jpg').shape

(426, 640, 3)

In [89]:
mod = det.roi_head.mask_head
mod_in = []
mod_out = []

def mod_track(m, input, output):
    mod_in.append(input)
    mod_out.append(output)
    
    
hook = mod.register_forward_hook(mod_track)
image = cv2.imread("assets/dog.jpg")
# image = cv2.resize(image, (1280, 32))
inference_detector(det, image)

hook.remove()

for o in mod_in[0]:
    if isinstance(o, tuple):
        print([of.shape for of in o])
    else:
        print(o.shape)
        

for o in mod_out[0]:
    if isinstance(o, tuple):
        print([of.shape for of in o])
    else:
        print(o.shape)

torch.Size([7, 256, 14, 14])
torch.Size([80, 28, 28])
torch.Size([80, 28, 28])
torch.Size([80, 28, 28])
torch.Size([80, 28, 28])
torch.Size([80, 28, 28])
torch.Size([80, 28, 28])
torch.Size([80, 28, 28])




In [79]:
mod_out[0]

AttributeError: 'tuple' object has no attribute 'shape'

In [66]:
det.cfg['model']

{'type': 'MaskRCNN',
 'backbone': {'type': 'ResNet',
  'depth': 50,
  'num_stages': 4,
  'out_indices': (0, 1, 2, 3),
  'frozen_stages': 1,
  'norm_cfg': {'type': 'BN', 'requires_grad': False},
  'norm_eval': True,
  'style': 'caffe',
  'init_cfg': None},
 'neck': {'type': 'FPN',
  'in_channels': [256, 512, 1024, 2048],
  'out_channels': 256,
  'num_outs': 5},
 'rpn_head': {'type': 'RPNHead',
  'in_channels': 256,
  'feat_channels': 256,
  'anchor_generator': {'type': 'AnchorGenerator',
   'scales': [8],
   'ratios': [0.5, 1.0, 2.0],
   'strides': [4, 8, 16, 32, 64]},
  'bbox_coder': {'type': 'DeltaXYWHBBoxCoder',
   'target_means': [0.0, 0.0, 0.0, 0.0],
   'target_stds': [1.0, 1.0, 1.0, 1.0]},
  'loss_cls': {'type': 'CrossEntropyLoss',
   'use_sigmoid': True,
   'loss_weight': 1.0},
  'loss_bbox': {'type': 'L1Loss', 'loss_weight': 1.0}},
 'roi_head': {'type': 'StandardRoIHead',
  'bbox_roi_extractor': {'type': 'SingleRoIExtractor',
   'roi_layer': {'type': 'RoIAlign', 'output_size': 7

In [52]:
image = cv2.resize

1066.6666666666667

In [51]:
det.cfg['test_pipeline']

[{'type': 'LoadImageFromFile'},
 {'type': 'MultiScaleFlipAug',
  'img_scale': (1333, 800),
  'flip': False,
  'transforms': [{'type': 'Resize', 'keep_ratio': True},
   {'type': 'RandomFlip'},
   {'type': 'Normalize',
    'mean': [103.53, 116.28, 123.675],
    'std': [1.0, 1.0, 1.0],
    'to_rgb': False},
   {'type': 'Pad', 'size_divisor': 32},
   {'type': 'ImageToTensor', 'keys': ['img']},
   {'type': 'Collect', 'keys': ['img']}]}]

In [18]:
import torch
out = det.backbone(torch.randn(1, 3, 256, 256).cuda())

In [22]:
det.backbone.

(1, 2, 2, 2)

In [20]:
det.cfg['model']

{'type': 'MaskRCNN',
 'backbone': {'type': 'ResNet',
  'depth': 50,
  'num_stages': 4,
  'out_indices': (0, 1, 2, 3),
  'frozen_stages': 1,
  'norm_cfg': {'type': 'BN', 'requires_grad': False},
  'norm_eval': True,
  'style': 'caffe',
  'init_cfg': None},
 'neck': {'type': 'FPN',
  'in_channels': [256, 512, 1024, 2048],
  'out_channels': 256,
  'num_outs': 5},
 'rpn_head': {'type': 'RPNHead',
  'in_channels': 256,
  'feat_channels': 256,
  'anchor_generator': {'type': 'AnchorGenerator',
   'scales': [8],
   'ratios': [0.5, 1.0, 2.0],
   'strides': [4, 8, 16, 32, 64]},
  'bbox_coder': {'type': 'DeltaXYWHBBoxCoder',
   'target_means': [0.0, 0.0, 0.0, 0.0],
   'target_stds': [1.0, 1.0, 1.0, 1.0]},
  'loss_cls': {'type': 'CrossEntropyLoss',
   'use_sigmoid': True,
   'loss_weight': 1.0},
  'loss_bbox': {'type': 'L1Loss', 'loss_weight': 1.0}},
 'roi_head': {'type': 'StandardRoIHead',
  'bbox_roi_extractor': {'type': 'SingleRoIExtractor',
   'roi_layer': {'type': 'RoIAlign', 'output_size': 7

In [19]:
[o.shape for o in out]

[torch.Size([1, 256, 64, 64]),
 torch.Size([1, 512, 32, 32]),
 torch.Size([1, 1024, 16, 16]),
 torch.Size([1, 2048, 8, 8])]

In [8]:
backbone_data = []

def cb_backbone(module, input, output):
    backbone_data.append(input)

hook = det.backbone.register_forward_hook(cb_backbone)

In [13]:
det.cfg.test_pipeline

[{'type': 'LoadImageFromFile'},
 {'type': 'MultiScaleFlipAug',
  'img_scale': (1333, 800),
  'flip': False,
  'transforms': [{'type': 'Resize', 'keep_ratio': True},
   {'type': 'RandomFlip'},
   {'type': 'Normalize',
    'mean': [103.53, 116.28, 123.675],
    'std': [1.0, 1.0, 1.0],
    'to_rgb': False},
   {'type': 'Pad', 'size_divisor': 32},
   {'type': 'ImageToTensor', 'keys': ['img']},
   {'type': 'Collect', 'keys': ['img']}]}]

In [11]:
backbone_data[0][0].shape

torch.Size([1, 3, 800, 1216])

In [9]:
inference_detector(det, "assets/dog.jpg")

([array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  array([[8.0929634e+01, 1.1799213e+02, 6.4000000e+02, 4.2523090e+02,
          6.4844467e-02]], dtype=float32),
  array([[9.8399818e+01, 6.8688713e+01, 6.4000000e+02, 4.2488736e+02,
          9.1171265e-01],
         [1.5696581e+02, 6.7235023e+01, 4.3174054e+02, 2.8104599e+02,
          4.5043728e-01]], dtype=float32),
  array([], shape=(0, 5), dtype=float32),
  arra