In [1]:
import sys, os, distutils.core
sys.path.insert(0, os.path.abspath('./detectron2'))

In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Mar_28_02:18:24_PDT_2024
Cuda compilation tools, release 12.4, V12.4.131
Build cuda_12.4.r12.4/compiler.34097967_0
torch:  2.5 ; cuda:  cu124
detectron2: 0.6


In [4]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [5]:
!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O input.jpg
im = cv2.imread("./input.jpg")

In [6]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

[32m[12/12 15:20:07 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...


model_final_f10217.pkl: 178MB [00:05, 34.9MB/s]                            
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [7]:
# look at the outputs. See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format for specification
print(outputs["instances"].pred_classes)
print(outputs["instances"].pred_boxes)

tensor([17,  0,  0,  0,  0,  0,  0,  0, 25,  0, 25, 25,  0,  0, 24],
       device='cuda:0')
Boxes(tensor([[126.5985, 244.9039, 459.8283, 480.0000],
        [251.1051, 157.8163, 338.9748, 413.6253],
        [114.8519, 268.6908, 148.2369, 398.8166],
        [  0.8215, 281.0366,  78.6025, 478.4248],
        [ 49.3943, 274.1233,  80.1552, 342.9868],
        [561.2267, 271.5826, 596.2765, 385.2516],
        [385.9061, 270.3122, 413.7124, 304.0400],
        [515.9238, 278.3691, 562.2795, 389.3792],
        [335.2389, 251.9167, 414.7491, 275.9345],
        [350.9279, 269.2094, 386.0966, 297.9086],
        [331.6266, 231.0002, 393.2768, 257.2017],
        [510.7307, 263.2701, 570.9870, 295.9414],
        [409.0865, 271.8633, 460.5579, 356.8701],
        [506.8876, 283.3300, 529.9465, 324.0268],
        [594.5670, 283.4811, 609.0570, 311.4140]], device='cuda:0'))


In [9]:
# We can use `Visualizer` to draw the predictions on the image.
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imwrite("output.jpg", out.get_image()[:, :, ::-1])

True

In [10]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, "./detectron_datasets/1/train/_annotations.coco.json", "./detectron_datasets/1/train")
register_coco_instances("my_dataset_val", {}, "./detectron_datasets/1/valid/_annotations.coco.json", "./detectron_datasets/1/valid")

In [12]:
class_labels = ["coral", "Acanthastrea", "Acropora", "Coeloseris mayeri", "Diploastrea", "Favia", "Fungia", "Goniastrea",
 "Goniopora", "Isopora", "Leptastrea", "Lobophyllia", "Montipora", "Pavona", "Platygyra", "Pocillopora", "Porites", "Sarcophyton", "Stylophora pistillata", "Turbinaria"]
len(class_labels)

20

In [17]:
MetadataCatalog.get("my_dataset_train").thing_classes = class_labels
corals_metadata = MetadataCatalog.get("my_dataset_train")

In [20]:
dataset_dicts = DatasetCatalog.get("my_dataset_train")
for i, d in enumerate(random.sample(dataset_dicts, 3)):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=corals_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2.imwrite(f"corals{i}.jpg", out.get_image()[:, :, ::-1])

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[12/12 15:49:00 d2.data.datasets.coco]: [0mLoaded 194 images in COCO format from ./detectron_datasets/1/train/_annotations.coco.json


In [22]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.OUTPUT_DIR = "detectron_model"
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 2000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[12/12 15:52:20 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

model_final_280758.pkl: 167MB [00:01, 101MB/s]                             
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (20, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (76, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (76,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_hea

[32m[12/12 15:52:22 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[12/12 15:52:35 d2.utils.events]: [0m eta: 0:18:58  iter: 19  total_loss: 3.452  loss_cls: 2.958  loss_box_reg: 0.235  loss_rpn_cls: 0.2143  loss_rpn_loc: 0.03134    time: 0.5796  last_time: 0.5423  data_time: 0.0481  last_data_time: 0.0082   lr: 4.9953e-06  max_mem: 3472M
[32m[12/12 15:52:46 d2.utils.events]: [0m eta: 0:18:07  iter: 39  total_loss: 3.151  loss_cls: 2.749  loss_box_reg: 0.1525  loss_rpn_cls: 0.2195  loss_rpn_loc: 0.03151    time: 0.5632  last_time: 0.4512  data_time: 0.0079  last_data_time: 0.0091   lr: 9.9902e-06  max_mem: 3472M
[32m[12/12 15:52:57 d2.utils.events]: [0m eta: 0:18:06  iter: 59  total_loss: 2.557  loss_cls: 2.269  loss_box_reg: 0.2036  loss_rpn_cls: 0.08523  loss_rpn_loc: 0.02542    time: 0.5606  last_time: 0.6051  data_time: 0.0062  last_data_time: 0.0049   lr: 1.4985e-05  max_mem: 3472M
[32m[12/12 15:53:08 d2.utils.events]: [0m eta: 0:17:26  iter: 79  total_

In [31]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[32m[12/12 16:17:17 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from detectron_model/model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


In [28]:
from detectron2.utils.visualizer import ColorMode
DatasetCatalog.get("my_dataset_val")
for i, d in enumerate(random.sample(dataset_dicts, 3)):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=corals_metadata, 
                   scale=0.5, 
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imwrite(f"predictions/detectron/val{i}.jpg", out.get_image()[:, :, ::-1])

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[12/12 16:14:28 d2.data.datasets.coco]: [0mLoaded 16 images in COCO format from ./detectron_datasets/1/valid/_annotations.coco.json


In [33]:
from detectron2.utils.visualizer import ColorMode
dir_path = "./test_imgs"
for i, path in enumerate(random.sample(os.listdir(dir_path), 3)):    
    im = cv2.imread(os.path.join(dir_path, path))
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=corals_metadata, 
                   scale=0.5, 
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imwrite(f"predictions/detectron/{path}.jpg", out.get_image()[:, :, ::-1])