# Detectron2 Beginner's Tutorial -- AMMI 2023 CV Week 2 Lab 1

<img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="500">

Welcome to detectron2! This is the official colab tutorial of detectron2. Here, we will go through some basics usage of detectron2, including the following:
* Run inference on images or videos, with an existing detectron2 model
* Train a detectron2 model on a new dataset

You can make a copy of this tutorial by "File -> Open in playground mode" and make changes there. __DO NOT__ request access to this tutorial.


# Install detectron2

In [1]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyyaml
  Building wheel for pyyaml (setup.py) ... [?25l[?25hdone
  Created wheel for pyyaml: filename=PyYAML-5.1-cp310-cp310-linux_x86_64.whl size=44090 sha256=2b89aed869aa3446c1b6128c005206922203bf82a1a76b79cdc27203232ef5d7
  Stored in directory: /root/.cache/pip/wheels/70/83/31/975b737609aba39a4099d471d5684141c1fdc3404f97e7f68a
Successfully built pyyaml
Installing collected packages: pyyaml
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 6.0
    Uninstalling PyYAML-6.0:
      Successfully uninstalled PyYAML-6.0
[31mERROR: pip's dependency resolver does not currently take into accoun

In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
torch:  2.0 ; cuda:  cu118
detectron2: 0.6


In [3]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

# Run a pre-trained detectron2 model

We first download an image from the COCO dataset:

In [4]:
# download, decompress the data
!wget https://github.com/gkioxari/aims2020_visualrecognition/releases/download/v1.0/videoclip.zip
!unzip videoclip.zip > /dev/null

--2023-05-17 14:50:29--  https://github.com/gkioxari/aims2020_visualrecognition/releases/download/v1.0/videoclip.zip
Resolving github.com (github.com)... 192.30.255.113
Connecting to github.com (github.com)|192.30.255.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/255177940/09ad9d80-7f47-11ea-93bc-002a89d4791c?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230517%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230517T145029Z&X-Amz-Expires=300&X-Amz-Signature=a1a4e2880c1e149fb43e44bf71862aa4ad7ed016a3e20848846eca118d40ea4b&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=255177940&response-content-disposition=attachment%3B%20filename%3Dvideoclip.zip&response-content-type=application%2Foctet-stream [following]
--2023-05-17 14:50:29--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/255177940/09ad9d80-7f47-11ea-93bc-002a

Then, we create a detectron2 config and a detectron2 `DefaultPredictor` to run inference on this image.

In [5]:
files = os.listdir("/content/clip")
image = []
for filename in files:
    paths = os.path.join("/content/clip", filename)

    if os.path.isfile(paths):
      im = cv2.imread(paths)
      image.append(im)
      cv2_imshow(im)
      

In [6]:
output_list = []
for im in image:
  cfg = get_cfg()
  # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
  cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
  # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
  predictor = DefaultPredictor(cfg)
  outputs = predictor(im)
  output_list.append(outputs)

[05/16 22:46:58 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...


model_final_f10217.pkl: 178MB [00:00, 211MB/s]                           
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[05/16 22:47:07 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...
[05/16 22:47:08 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...
[05/16 22:47:09 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...
[05/16 22:47:10 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...
[05/16 22:47:11 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/

In [7]:
# look at the outputs. See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format for specification
classes = []
bounding_box = []
mask = []
for out in output_list:
  print(out["instances"].pred_classes)
  classes.append(out["instances"].pred_classes)
  print(out["instances"].pred_boxes)
  bounding_box.append(list(out["instances"].pred_boxes))
  print(out["instances"].pred_masks)
  mask.append(out["instances"].pred_masks)


tensor([2, 7, 5, 2, 2, 2, 7, 2, 2, 2, 7, 2, 2], device='cuda:0')
Boxes(tensor([[5.0553e+02, 5.7624e+02, 5.8682e+02, 6.4508e+02],
        [6.4810e+01, 4.8505e+02, 3.0883e+02, 6.9107e+02],
        [9.0792e+02, 4.9821e+02, 1.0788e+03, 6.8768e+02],
        [1.1707e+03, 5.7895e+02, 1.2841e+03, 6.7625e+02],
        [8.0222e-01, 6.1397e+02, 7.3711e+01, 6.7041e+02],
        [1.3029e+03, 5.3472e+02, 1.6503e+03, 8.7230e+02],
        [5.1276e+02, 4.9377e+02, 7.0175e+02, 6.2817e+02],
        [6.9422e+02, 5.8014e+02, 7.2879e+02, 6.1310e+02],
        [1.0809e+03, 5.7661e+02, 1.1415e+03, 6.2300e+02],
        [7.0855e+02, 5.7664e+02, 7.4843e+02, 6.1130e+02],
        [1.3012e+03, 5.2625e+02, 1.6454e+03, 8.7372e+02],
        [7.6188e+02, 5.7615e+02, 8.0534e+02, 6.0322e+02],
        [7.8476e+02, 5.8119e+02, 8.0482e+02, 6.0192e+02]], device='cuda:0'))
tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., F

In [None]:
# We can use `Visualizer` to draw the predictions on the image.
for im in range(len(image)):
  v = Visualizer(image[im][:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
  out = v.draw_instance_predictions(output_list[im]["instances"].to("cpu"))
  cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
def compute_iou(box_1, box_2):
    
    
    assert box_1[0] <= box_1[2]
    assert box_1[1] <= box_1[3]
    assert box_2[0] <= box_2[2]
    assert box_2[1] <= box_2[3]

    # determine the coordinates of the intersection rectangle
    x_left = max(box_1[0], box_2[0])
    y_top = max(box_1[2], box_2[2])
    x_right = min(box_1[1], box_2[1])
    y_bottom = min(box_1[3], box_2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = max(0,x_right - x_left + 1) * max(0,y_bottom - y_top + 1) 

    # compute the area of both AABBs
    box_1_area = (box_1[1] - box_1[0] + 1) * (box_1[3] - box_1[2] + 1) 
    box_2_area = (box_2[1] - box_2[0] +1) * (box_2[3] - box_2[2] + 1) 

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(box_1_area + box_2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [None]:
def matching_score_category(category1, category2, pi, qj):
    return int(category1[pi] == category2[qj])


def matching_score_category_overlap(category1, category2, bounding_box1,bounding_box2 ,pi, qj):
    return int(category1[pi] == category2[qj]) * compute_iou(bounding_box1[pi], bounding_box2[qj])


def find_best_match(P, Q, box1, box2,  matching_score):
    best_matches = {}  # Dictionary to store the best match for each prediction in P

    for pi in range(len(P)):
        best_match = None
        best_score =0 #-float('inf')

        for qj in range(len(Q)):
            score = matching_score(P, Q, box1, box2, pi, qj)
            if score > best_score:
                best_match = Q[qj]
                best_score = score

        best_matches[P[pi]] = best_match

    return best_matches


In [None]:
for i in range(len(classes)-1):
  P = classes[i]  # List of predictions in frame It
  Q = classes[i+1]  # List of predictions in frame It+1
  box1 = bounding_box[i]
  box2 = bounding_box[i+1]
  best_matches = find_best_match(P, Q, box1, box2, matching_score_category_overlap)
  # or
  # best_matches = find_best_match(P, Q, matching_score_category_overlap)

  # Now you can access the best match for each prediction in P using the dictionary
  for pi, best_match in best_matches.items():
      print(f"Best match for {pi}: {best_match}")

NameError: ignored

In [None]:
def find_best_matchs(P, Q,  matching_score):
    best_matches = {}  # Dictionary to store the best match for each prediction in P

    for pi in range(len(P)):
        best_match = None
        best_score =0 #-float('inf')

        for qj in range(len(Q)):
            score = matching_score(P, Q, pi, qj)
            if score > best_score:
                best_match = Q[qj]
                best_score = score

        best_matches[P[pi]] = best_match

    return best_matches


In [None]:
for i in range(len(classes)-1):
  P = classes[i]  # List of predictions in frame It
  Q = classes[i+1]  # List of predictions in frame It+1

  best_matches = find_best_matchs(P, Q, matching_score_category)
  # or
  # best_matches = find_best_match(P, Q, matching_score_category_overlap)

  # Now you can access the best match for each prediction in P using the dictionary
  for pi, best_match in best_matches.items():
      print(f"Best match for {pi}: {best_match}")

Best match for 2: 2
Best match for 7: 7
Best match for 5: 5
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 7: 7
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 7: 7
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 7: 7
Best match for 7: 7
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 5: None
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 7: 7
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 7: 7
Best match for 7: 7
Best match for 2: 2
Best match for 5: None
Best match for 2: 2
Best match for 7: 7
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 2: 2
Best match for 5: None
Best match for 7: 7
Best match for 2: 2
Best match for 7: 7
Best match for 2: 2
Best match 

In [None]:
for out in output_list:
  print(out["instances"].pred_classes)
  classes.append(out["instances"].pred_classes)
  print(out["instances"].pred_boxes)
  bounding_box.append(list(out["instances"].pred_boxes))
  print(out["instances"].pred_masks)
  mask.append(out["instances"].pred_masks)

tensor([2, 7, 5, 2, 2, 2, 7, 2, 2, 2, 7, 2, 2], device='cuda:0')
Boxes(tensor([[5.0553e+02, 5.7624e+02, 5.8682e+02, 6.4508e+02],
        [6.4810e+01, 4.8505e+02, 3.0883e+02, 6.9107e+02],
        [9.0792e+02, 4.9821e+02, 1.0788e+03, 6.8768e+02],
        [1.1707e+03, 5.7895e+02, 1.2841e+03, 6.7625e+02],
        [8.0222e-01, 6.1397e+02, 7.3711e+01, 6.7041e+02],
        [1.3029e+03, 5.3472e+02, 1.6503e+03, 8.7230e+02],
        [5.1276e+02, 4.9377e+02, 7.0175e+02, 6.2817e+02],
        [6.9422e+02, 5.8014e+02, 7.2879e+02, 6.1310e+02],
        [1.0809e+03, 5.7661e+02, 1.1415e+03, 6.2300e+02],
        [7.0855e+02, 5.7664e+02, 7.4843e+02, 6.1130e+02],
        [1.3012e+03, 5.2625e+02, 1.6454e+03, 8.7372e+02],
        [7.6188e+02, 5.7615e+02, 8.0534e+02, 6.0322e+02],
        [7.8476e+02, 5.8119e+02, 8.0482e+02, 6.0192e+02]], device='cuda:0'))
tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., F

In [None]:
for out in range(3):#len(output_list)-1):
  for j in range(len(classes[out])):
    for k in range(len(classes[out +1])):
      if (output_list[out]["instances"].pred_classes)[j] == (output_list[out+1]["instances"].pred_classes)[k]:
         (output_list[out]["instances"].pred_masks)[j][k] = (output_list[out+1]["instances"].pred_masks)[k][k]


#output_list

In [None]:
# We can use `Visualizer` to draw the predictions on the image.
for im in range(len(image)):
  v = Visualizer(image[im][:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
  out = v.draw_instance_predictions(output_list[im]["instances"].to("cpu"))
  cv2_imshow(out.get_image()[:, :, ::-1])