<a href="https://colab.research.google.com/github/Kayjayi/computer-vision/blob/main/Object%2BCustom%2BDataset_k1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Set the Colab GPU**

Go to Edit > Notebook settings as the following:

Click on “Notebook settings” and select “GPU” from Hardware *accelerator*. That's it.

**Connect Colab with Google Drive**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!python -m pip install pyyaml==5.1

# Properly install detectron2.
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities
#(e.g. compiled operators).
#(Please do not install twice in both ways)
#!git clone 'https://github.com/facebookresearch/detectron2'
#dist = distutils.core.run_setup("./detectron2/setup.py")
#!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
#sys.path.insert(0, os.path.abspath('./detectron2'))

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/274.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/274.2 kB[0m [31m991.8 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m194.6/274.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyyaml
  Building wheel for pyyaml (setup.py) ... [?25l[?25hdone
  Created wheel for pyyaml: filename=PyYAML-5.1-cp310-cp310-linux_x86_64.whl size=44089 sha256=36fdd2a2875229e6a3e53a5166af35dfab3bfe5a125b76d8b0d5f886cf13c21c
  Stored in directory: /root/.cache/pip/wheels/70/83/31/975b737609aba39a4099d471d5684141c1fdc340

In [3]:
import detectron2

# Setup detectron2 logger
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
#https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

# import some common libraries
import numpy as np
import torch, os, json, cv2, random
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow




**Balloon Dataset**

In [4]:
#https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip

balloonData_path = '/content/drive/MyDrive/Colab Notebooks/balloon'

In [None]:
# if your dataset is in COCO format, then write the following three lines:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir")
# register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")

In [5]:
jjson_file = os.path.join(balloonData_path, "via_region_data.json")
print(jjson_file)

/content/drive/MyDrive/Colab Notebooks/balloon/via_region_data.json


In [6]:
#function to parse and prepare bolloon dataset into detectron2's standard format

from detectron2.structures import BoxMode
# boxmode provides the details on the boc coordinates
def get_balloon_dicts(balloonData_path):
    json_file = os.path.join(balloonData_path, "via_region_data.json")
    #json_file= '/content/drive/MyDrive/Colab Notebooks/balloon/train/via_region_data.json'
    with open(json_file) as f:
        imgs_anns = json.load(f)

    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = os.path.join(balloonData_path, v["filename"])
        height, width = cv2.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width

        annos = v["regions"]
        objs = []
        for _, anno in annos.items():
            assert not anno["region_attributes"]
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [7]:
#Register the balloon dataset to detectron2

from detectron2.data import DatasetCatalog

for d in ["train", "val"]:
    DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts(balloonData_path + '/'+ d))
    MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"])

balloon_metadata = MetadataCatalog.get("balloon_train")

In [8]:
print(balloon_metadata)

Metadata(name='balloon_train', thing_classes=['balloon'])


In [None]:
dataset_dicts = get_balloon_dicts(balloonData_path + "/train")
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=balloon_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])

# Object Detection with Custom Dataset using Faster R-CNN

In [10]:
#https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-Detection

from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("balloon_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")  # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[09/05 23:39:25 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_f6e8b1.pkl: 243MB [00:12, 19.8MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}


[09/05 23:39:40 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[09/05 23:39:56 d2.utils.events]:  eta: 0:04:39  iter: 19  total_loss: 1.509  loss_cls: 0.7539  loss_box_reg: 0.5977  loss_rpn_cls: 0.02805  loss_rpn_loc: 0.007388    time: 0.5894  last_time: 0.8317  data_time: 0.0232  last_data_time: 0.0223   lr: 9.7405e-06  max_mem: 3328M
[09/05 23:40:12 d2.utils.events]:  eta: 0:04:30  iter: 39  total_loss: 1.332  loss_cls: 0.6602  loss_box_reg: 0.6057  loss_rpn_cls: 0.03748  loss_rpn_loc: 0.00758    time: 0.5902  last_time: 0.7308  data_time: 0.0111  last_data_time: 0.0280   lr: 1.9731e-05  max_mem: 3493M
[09/05 23:40:24 d2.utils.events]:  eta: 0:04:16  iter: 59  total_loss: 1.197  loss_cls: 0.5645  loss_box_reg: 0.6259  loss_rpn_cls: 0.03624  loss_rpn_loc: 0.007148    time: 0.5841  last_time: 0.6256  data_time: 0.0158  last_data_time: 0.0199   lr: 2.972e-05  max_mem: 3493M
[09/05 23:40:36 d2.utils.events]:  eta: 0:04:04  iter: 79  total_loss: 1.144  loss_cls: 0.4664  loss_box_reg: 0.6253  loss_rpn_cls: 0.02387  loss_rpn_loc: 0.005215    time: 0.58

In [None]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output

# Inference & evaluation using the trained model

In [12]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
cfg.DATASETS.TEST = ("balloon_val", )
predictor = DefaultPredictor(cfg)

[09/05 23:48:22 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output/model_final.pth ...


In [None]:
dataset_dicts = get_balloon_dicts(balloonData_path + "/val")
for d in random.sample(dataset_dicts, 3):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1], metadata=balloon_metadata, scale=0.8)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize = (14, 10))
    plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    plt.show()

 **Performance Metric**

 Evaluate the performance of Trained Faster RCNN using Average Precision (AP) metric implemented in COCO API.

In [14]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator("balloon_val", ("bbox",), False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "balloon_val")

print(inference_on_dataset(trainer.model, val_loader, evaluator))

results_val = inference_on_dataset(trainer.model, val_loader, evaluator)
category_met = results_val["bbox"]
#average_prec = results_val['AP']

[09/05 23:51:51 d2.evaluation.coco_evaluation]: Trying to convert 'balloon_val' to COCO format ...
[09/05 23:51:51 d2.data.datasets.coco]: Converting annotations of dataset 'balloon_val' to COCO format ...)
[09/05 23:51:51 d2.data.datasets.coco]: Converting dataset dicts into COCO format
[09/05 23:51:51 d2.data.datasets.coco]: Conversion finished, #images: 13, #annotations: 50
[09/05 23:51:51 d2.data.datasets.coco]: Caching COCO format annotations at './output/balloon_val_coco_format.json' ...
[09/05 23:51:52 d2.data.build]: Distribution of instances among all 1 categories:
|  category  | #instances   |
|:----------:|:-------------|
|  balloon   | 50           |
|            |              |
[09/05 23:51:52 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[09/05 23:51:52 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[0

'AP' stands for "Average Precision," which is a common metric for evaluating object detection performance. It measures the precision and recall trade-off across different confidence score thresholds.

'AP50' represents the Average Precision with IoU (Intersection over Union) threshold of 0.5. It measures the precision and recall at IoU = 0.5, which is a commonly used threshold for detection evaluation.

'AP75' is the Average Precision at IoU threshold of 0.75. It measures the precision and recall at IoU = 0.75, which is a stricter evaluation threshold.

'APs', 'APm', and 'APl' represent the Average Precision for small, medium, and large objects, respectively. These metrics evaluate the detection performance based on the size of the objects in the dataset.

In [19]:
import json

# Load the JSON metrics file generated by COCOEvaluator
json_file_path = "/content/output/metrics.json"

with open(json_file_path, "r") as f:
  coco_metrics = json.load(f)

# Access and process specific metrics as needed
bbox_metrics = coco_metrics["bbox"]  # Replace "bbox" with "segm" for instance segmentation

# Access metrics for different IoU thresholds
print("Metrics for IoU threshold 0.5:")
iou_05_metrics = bbox_metrics["IoU=0.50:0.95"]
print("Precision:", iou_05_metrics["precision"])
print("Recall:", iou_05_metrics["recall"])
print("AP:", iou_05_metrics["AP"])

# Access metrics for each category
category_metrics = bbox_metrics["per-category"]
for category_id, category_data in category_metrics.items():
    category_name = category_data["name"]
    category_precision = category_data["precision"]
    category_recall = category_data["recall"]
    category_ap = category_data["AP"]
    print(f"Category: {category_name}")
    print(f"Precision: {category_precision}")
    print(f"Recall: {category_recall}")
    print(f"AP: {category_ap}")
    print()


JSONDecodeError: ignored