In [1]:
!git clone https://github.com/facebookresearch/detectron2

Cloning into 'detectron2'...
remote: Enumerating objects: 15671, done.[K
remote: Counting objects: 100% (394/394), done.[K
remote: Compressing objects: 100% (293/293), done.[K
remote: Total 15671 (delta 174), reused 274 (delta 101), pack-reused 15277[K
Receiving objects: 100% (15671/15671), 6.50 MiB | 10.97 MiB/s, done.
Resolving deltas: 100% (11290/11290), done.


In [None]:
!pip install -e ./detectron2

In [3]:
import detectron2
from detectron2.utils.logger import setup_logger

setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances

register_coco_instances(
    "coco_pod_dataset/train",
    {},
    "dataset/Object_Detection/coco/train/train_annotations.json",
    "dataset/Object_Detection/coco/train",
)
register_coco_instances(
    "coco_pod_dataset/val",
    {},
    "dataset/Object_Detection/coco/valid/valid_annotations.json",
    "dataset/Object_Detection/coco/valid",
)


In [4]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator
import yaml
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)


cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml")
)
cfg.DATASETS.TRAIN = ("coco_pod_dataset/train", )
cfg.DATASETS.TEST = ("coco_pod_dataset/val",)
# cfg.DATASETS.VALID = ("coco_pod_dataset/val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml"
)  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = (
    2  # This is the real "batch size" commonly known to deep learning people
)
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300  # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []  # do not decay learning rate
cfg.MODEL.DEVICE = "cpu"
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
cfg.OUTPUT_DIR = "detectron2/output/faster_rcnn_R_50_FPN_1x"

with open("detectron2__faster_rcnn_R_50_FPN_1x_trainner.yaml", 'w' ) as outfile:
    outfile.write(cfg.dump())
    


In [5]:

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)  

trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[05/09 12:19:30 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (6, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (6,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (20, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls

[32m[05/09 12:19:30 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[05/09 12:19:43 d2.engine.hooks]: [0mTotal training time: 0:00:00 (0:00:00 on hooks)
[32m[05/09 12:19:43 d2.utils.events]: [0m iter: 2  total_loss: 2.898  loss_cls: 1.769  loss_box_reg: 0.7479  loss_rpn_cls: 0.2468  loss_rpn_loc: 0.1345    data_time: 0.0697  last_data_time: 0.0029   lr: 1.0825e-06  


KeyboardInterrupt: 

In [6]:
# from the `tools/export_model.py` to reload model in executable model

import argparse
import os
from typing import Dict, List, Tuple
import torch
from torch import Tensor, nn

import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader, detection_utils
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import (
    STABLE_ONNX_OPSET_VERSION,
    TracingAdapter,
    dump_torchscript_IR,
    scripting_with_instances,
)
from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.projects.point_rend import add_pointrend_config
from detectron2.structures import Boxes
from detectron2.utils.env import TORCH_VERSION
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import setup_logger
from collections import namedtuple

def load_model():
    torch_model = build_model(cfg)
    DetectionCheckpointer(torch_model).resume_or_load(
        "detectron2/output/faster_rcnn_R_50_FPN_1x/model_final.pth"
    )
    torch_model.eval()
    return torch_model


# import from export_model.py


def get_sample_inputs(args):
    # get a first batch from dataset
    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
    first_batch = next(iter(data_loader))
    return first_batch


# experimental. API not yet final
def export_tracing(torch_model, inputs , args):
    assert TORCH_VERSION >= (1, 8)
    image = inputs[0]["image"]
    inputs = [{"image": image}]  # remove other unused keys

    if isinstance(torch_model, GeneralizedRCNN):

        def inference(model, inputs):
            # use do_postprocess=False so it returns ROI mask
            inst = model.inference(inputs, do_postprocess=False)[0]
            return [{"instances": inst}]

    else:
        inference = None  # assume that we just call the model directly

    traceable_model = TracingAdapter(torch_model, inputs, inference)

    if args.format == "torchscript":
        ts_model = torch.jit.trace(traceable_model, (image,))
        with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
            torch.jit.save(ts_model, f)
        dump_torchscript_IR(ts_model, args.output)
    elif args.format == "onnx":
        with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f:
            torch.onnx.export(
                traceable_model, (image,), f, opset_version=STABLE_ONNX_OPSET_VERSION
            )
    # logger.info("Inputs schema: " + str(traceable_model.inputs_schema))
    # logger.info("Outputs schema: " + str(traceable_model.outputs_schema))

    if args.format != "torchscript":
        return None
    if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)):
        return None

    def eval_wrapper(inputs):
        """
        The exported model does not contain the final resize step, which is typically
        unused in deployment but needed for evaluation. We add it manually here.
        """
        input = inputs[0]
        instances = traceable_model.outputs_schema(ts_model(input["image"]))[0][
            "instances"
        ]
        postprocessed = detector_postprocess(instances, input["height"], input["width"])
        return [{"instances": postprocessed}]

    return eval_wrapper


def export_onnx():
    args_dict = {
        "export_method": "tracing",
        "format": "onnx",
        "output": "detectron2/output/faster_rcnn_R_50_FPN_1x/onnx",
    }
    args = namedtuple("Struct", args_dict.keys())(*args_dict.values())
    torch_model = build_model(cfg)
    DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
    torch_model.eval()
    sample_inputs = get_sample_inputs(args)
    exported_model = export_tracing(torch_model, sample_inputs, args)


test_model = load_model()

[32m[05/09 12:19:48 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from detectron2/output/faster_rcnn_R_50_FPN_1x/model_final.pth ...


In [20]:
export_onnx()

[32m[05/09 06:41:40 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl ...


Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (6, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (6,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (20, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls

[32m[05/09 06:41:40 d2.data.datasets.coco]: [0mLoaded 226 images in COCO format from dataset/Object_Detection/coco/valid/valid_annotations.json
[32m[05/09 06:41:40 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[05/09 06:41:40 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[05/09 06:41:40 d2.data.common]: [0mSerializing 226 elements to byte tensors and concatenating them all ...
[32m[05/09 06:41:40 d2.data.common]: [0mSerialized dataset takes 0.21 MiB


  assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
  if tensor.numel() == 0:
  assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
  if tensor.numel() == 0:
  assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
  if not valid_mask.all():
  assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
  h, w = box_size
  assert boxes.shape[-1] == 4
  if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000) and not torchvision._is_tracing():
  return self.item().__format__(format_spec)
  assert condition, message
  assert rois.dim() == 2 and rois.size(1) == 5
  if not valid_mask.all():
  if tensor.numel() == 0:
  assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
  assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
  h, w = box_size
  if num_bbox_reg_classes == 1:
  assert boxes.shape[-1] == 4
  if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000) and not torchvision._is_

In [10]:
# print the summary
from torchinfo import summary
summary(test_model)


Layer (type:depth-idx)                                  Param #
GeneralizedRCNN                                         --
├─FPN: 1-1                                              --
│    └─Conv2d: 2-1                                      65,792
│    └─Conv2d: 2-2                                      590,080
│    └─Conv2d: 2-3                                      131,328
│    └─Conv2d: 2-4                                      590,080
│    └─Conv2d: 2-5                                      262,400
│    └─Conv2d: 2-6                                      590,080
│    └─Conv2d: 2-7                                      524,544
│    └─Conv2d: 2-8                                      590,080
│    └─LastLevelMaxPool: 2-9                            --
│    └─ResNet: 2-10                                     --
│    │    └─BasicStem: 3-1                              (9,408)
│    │    └─Sequential: 3-2                             (212,992)
│    │    └─Sequential: 3-3                             1,2