In [6]:
# IMPORT REQUIRED LIBRARIES
import torch
import torchvision
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import os, json, cv2, random

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.modeling import build_model

## Load data
First we load the data from the `dataset` folder.

In [2]:
register_coco_instances("kaggle_dataset_train", {}, "dataset/annotations_train.json", "dataset")
register_coco_instances("kaggle_dataset_test", {}, "dataset/annotations_val.json", "dataset")

## Train Detectron2 Model
Now that the data is loaded we can train the Detectron2 model.

In [3]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.DATASETS.TRAIN = ("kaggle_dataset_train",)
cfg.DATASETS.TEST = ("kaggle_dataset_test",)
cfg.DATALOADER.NUM_WORKERS = 8
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0025  # pick a good LR
cfg.SOLVER.MAX_ITER = 1000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 400   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=True)
trainer.train()

[32m[11/09 17:04:56 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[11/09 17:04:57 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from dataset/annotations_train.json
[32m[11/09 17:04:58 d2.data.build]: [0mRemoved 0 images with no usable annotations. 485 images left.
[32m[11/09 17:04:58 d2.data.build]: [0mDistribution of instances among all 3 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|   shsy5y   | 41615        |   astro    | 8122         |    cort    | 8492         |
|            |              |            |              |            |              |
|   total    | 58229        |            |              |            |              |[0m
[32m[11/09 17:04:58 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[11/09 17:04:58 d2.data

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[11/09 17:05:12 d2.utils.events]: [0m eta: 0:06:15  iter: 319  total_loss: 1.807  loss_cls: 0.4688  loss_box_reg: 0.581  loss_mask: 0.3118  loss_rpn_cls: 0.1572  loss_rpn_loc: 0.2737  time: 0.6072  data_time: 0.1575  lr: 0.007992  max_mem: 4553M
[32m[11/09 17:05:25 d2.utils.events]: [0m eta: 0:06:07  iter: 339  total_loss: 1.778  loss_cls: 0.4193  loss_box_reg: 0.5526  loss_mask: 0.3071  loss_rpn_cls: 0.18  loss_rpn_loc: 0.2637  time: 0.6112  data_time: 0.0393  lr: 0.0084915  max_mem: 5149M
[32m[11/09 17:05:36 d2.utils.events]: [0m eta: 0:05:51  iter: 359  total_loss: 1.644  loss_cls: 0.3852  loss_box_reg: 0.5621  loss_mask: 0.3125  loss_rpn_cls: 0.09831  loss_rpn_loc: 0.2538  time: 0.5918  data_time: 0.0199  lr: 0.008991  max_mem: 5149M
[32m[11/09 17:05:47 d2.utils.events]: [0m eta: 0:05:40  iter: 379  total_loss: 1.722  loss_cls: 0.4697  loss_box_reg: 0.5419  loss_mask: 0.3024  loss_rpn_cls: 0.1642  loss_rpn_loc: 0.2703  time: 0.5911  data_time: 0.0277  lr: 0.0094905  max

[32m[11/09 17:11:50 d2.utils.events]: [0m eta: 0:00:11  iter: 979  total_loss: 1.699  loss_cls: 0.4343  loss_box_reg: 0.5496  loss_mask: 0.2904  loss_rpn_cls: 0.1535  loss_rpn_loc: 0.3052  time: 0.5982  data_time: 0.0411  lr: 0.024476  max_mem: 6325M
[32m[11/09 17:12:06 d2.utils.events]: [0m eta: 0:00:00  iter: 999  total_loss: 1.741  loss_cls: 0.4183  loss_box_reg: 0.5419  loss_mask: 0.2974  loss_rpn_cls: 0.1901  loss_rpn_loc: 0.3154  time: 0.5991  data_time: 0.0430  lr: 0.024975  max_mem: 6325M
[32m[11/09 17:12:07 d2.engine.hooks]: [0mOverall training speed: 698 iterations in 0:06:58 (0.5991 s / it)
[32m[11/09 17:12:07 d2.engine.hooks]: [0mTotal training time: 0:07:05 (0:00:07 on hooks)
[32m[11/09 17:12:07 d2.data.datasets.coco]: [0mLoaded 121 images in COCO format from dataset/annotations_val.json
[32m[11/09 17:12:07 d2.data.build]: [0mDistribution of instances among all 3 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances 

## Evaluate
As evaluation metric the `intersection over union objects` is computed.

In [10]:
model = build_model(cfg)
trainer.__attributes__

AttributeError: 'DefaultTrainer' object has no attribute '__attributes__'