In [1]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt
#from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances

In [2]:
dataset = 'kitti'
annot_dir = './kitti/training_coco'
imgs_dir = './kitti/training_voc/VOC2012/JPEGImages'

for split in ['train', 'val']: 
    if split == 'train':
        annot_path = os.path.join(annot_dir, f'{split}_temp.json')
    else:
        annot_path = os.path.join(annot_dir, f'{split}.json')
    d_name = dataset + f'_{split}'
    register_coco_instances(d_name, {}, annot_path, imgs_dir)

# Load dataset
dataset_dicts = DatasetCatalog.get('kitti_train')
metadata = MetadataCatalog.get('kitti_train')

[32m[03/15 00:01:26 d2.data.datasets.coco]: [0mLoaded 1 images in COCO format from ./kitti/training_coco/train_temp.json


In [3]:
import argparse

class Args(argparse.Namespace):
    config_file='khang_net/configs/yolof/yolof_resnet_50_1x.py'
    eval_only=False
    num_gpus=1
    num_machines=1
    resume=False

args = Args()

In [4]:
from detectron2.model_zoo import get_config
from detectron2.config import LazyConfig
from detectron2.config.instantiate import instantiate


cfg = LazyConfig.load("khang_net/configs/yolof/yolof_resnet_50_1x.py")
cfg.train.device = 'mps'
cfg.dataloader.evaluator.dataset_name = 'kitti_val'
cfg.dataloader.train.dataset.names = 'kitti_train'
cfg.dataloader.train.total_batch_size = 1
cfg.optimizer.lr = 0.0001

In [5]:
from detectron2.engine import default_setup

default_setup(cfg, args)

[32m[03/14 23:59:07 detectron2]: [0mRank of current process: 0. World size: 1
[32m[03/14 23:59:07 detectron2]: [0mRank of current process: 0. World size: 1
[32m[03/14 23:59:08 detectron2]: [0mEnvironment info:
-------------------------------  -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [6]:
dataset_dicts

[{'file_name': './kitti/training_voc/VOC2012/JPEGImages/004914.png',
  'height': 375,
  'width': 1242,
  'image_id': '004914',
  'annotations': [{'iscrowd': 0,
    'bbox': [586, 179, 17, 14],
    'category_id': 7,
    'bbox_mode': <BoxMode.XYWH_ABS: 1>},
   {'iscrowd': 0,
    'bbox': [490, 179, 30, 22],
    'category_id': 7,
    'bbox_mode': <BoxMode.XYWH_ABS: 1>},
   {'iscrowd': 0,
    'bbox': [524, 173, 57, 18],
    'category_id': 1,
    'bbox_mode': <BoxMode.XYWH_ABS: 1>}]}]

In [7]:
from tools.lazyconfig_train_net import do_train

do_train(args, cfg)

[32m[03/14 23:59:11 detectron2]: [0mModel:
YOLOF(
  (backbone): ResNet(
    (stem): BasicStem(
      (conv1): Conv2d(
        3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
        (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
      )
    )
    (res2): Sequential(
      (0): BottleneckBlock(
        (shortcut): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )
        (conv1): Conv2d(
          64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv3): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )


KeyboardInterrupt: 

In [5]:
model = instantiate(cfg.model)
dataloader = instantiate(cfg.dataloader.train)

[32m[03/15 00:01:56 d2.data.datasets.coco]: [0mLoaded 1 images in COCO format from ./kitti/training_coco/train_temp.json
[32m[03/15 00:01:56 d2.data.build]: [0mRemoved 0 images with no usable annotations. 1 images left.
[32m[03/15 00:01:56 d2.data.build]: [0mDistribution of instances among all 9 categories:
[36m|   category    | #instances   |  category  | #instances   |  category  | #instances   |
|:-------------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|    Cyclist    | 0            |  DontCare  | 1            |    Misc    | 0            |
| Person_sitt.. | 0            |    Tram    | 0            |   Truck    | 0            |
|      Van      | 0            |    car     | 2            |   person   | 0            |
|               |              |            |              |            |              |
|     total     | 3            |            |              |            |              |[0m
[32m[03/15 00:01:56 d2.data.dataset_mapper]: [0m[Da

In [6]:
batch = next(iter(dataloader))

In [8]:
img = batch[0]['image']


In [None]:
img = np.ascontiguousarray(img.permute(1, 2, 0), dtype=np.uint8)

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(12, 12))
plt.imshow(img)

In [None]:
import torch

gt_boxes = batch[0]['instances'].gt_boxes.tensor.to(torch.int64).numpy()

In [None]:
gt_boxes

In [None]:
import cv2

for gt_box in gt_boxes:
    img = cv2.rectangle(img, (gt_box[0], gt_box[1]),
                        (gt_box[2], gt_box[3]),
                        (0, 0, 255), 2, 1)



In [None]:
fig = plt.figure(figsize=(12, 12))
plt.imshow(img)

In [None]:
batch_size = 1
h, w = 13, 42
channels = 3
features = torch.rand((batch_size, channels, h, w))
features = [features]

anchors = model.anchor_generator(features)
pred_anchor_deltas = [torch.zeros((batch_size, h * w * 5, 4))]

In [None]:
indices = model.get_ground_truth(anchors, pred_anchor_deltas, [batch[0]['instances']])

In [None]:
indices

In [None]:
nearest_boxes = anchors[0][0].tensor[indices[0][0]].to(torch.int64)

In [None]:
for box in nearest_boxes:
    box = box.numpy()
    img = cv2.rectangle(img, (box[0], box[1]),
                        (box[2], box[3]),
                        (0, 255, 0), 2, 1)

In [None]:
fig = plt.figure(figsize=(12, 12))
plt.imshow(img)

In [7]:
model.training = False
img = model(batch)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [8]:
img

[{'instances': Instances(num_instances=0, image_height=375, image_width=1242, fields=[pred_boxes: Boxes(tensor([], size=(0, 4), grad_fn=<ViewBackward0>)), scores: tensor([], grad_fn=<IndexBackward0>), pred_classes: tensor([], dtype=torch.int64)])}]