<a href="https://colab.research.google.com/github/CanKeles5/ObjectDetection/blob/main/Detectron2_Faster_R_CNN_VisDrone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

In [None]:
# install detectron2: (Colab has CUDA 10.1 + torch 1.7)
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
import torch
assert torch.__version__.startswith("1.7")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html
exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import random
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow
from pathlib import Path
from PIL import Image as PILImage
import IPython
from math import trunc
import base64
from io import BytesIO

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
from google.colab.patches import cv2_imshow
from detectron2.utils.visualizer import ColorMode
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

In [None]:
!unzip /content/drive/MyDrive/VisDrone2019-DET-train.zip -d /content/

In [None]:
!unzip /content/drive/MyDrive/VisDrone2019-DET-val.zip -d /content/

In [5]:
#/content/VisDrone2019-DET-train/annotations
def get_visdrone_dicts(img_path = "/content/VisDrone2019-DET-train/images", annot_path = "/content/VisDrone2019-DET-train/annotations" ):
  dataset_dicts = []
  for path, subdirs, files in os.walk(img_path):
   for filename in files:
     record = {}
     
     
     img_p = os.path.join(path, filename)
     anot_p = os.path.join(annot_path, filename[:-4] + '.txt')

     h, w = cv2.imread(img_p).shape[:2]

     record["file_name"] = img_p
     record["image_id"] = filename
     record["height"] = h
     record["width"] = w

     objs = []

     with open(anot_p) as fp:
       line = fp.readline()
       while line:
         line = line.replace("\n","") 
         vals = line.split (",")
         id = int(vals[5])
         
         if id==0 or id == 11:
           id=0
         elif id==1 or id==2:
           id=1
         else:
           id=2    
        
         b_left, b_top, b_width, b_height = list(map(float,vals[:4]))
         b_right, b_bottom = b_left + b_width, b_top - b_height
         
         obj = {
            "bbox": [b_left, b_top, b_width, b_height],
            "bbox_mode": BoxMode.XYWH_ABS,
            "segmentation": [],
            "category_id": id,
         }
         objs.append(obj)
         line = fp.readline()
     record["annotations"] = objs
     dataset_dicts.append(record)
  
  return dataset_dicts

In [6]:
DatasetCatalog.register("train_set",  lambda d=_: get_visdrone_dicts("/content/VisDrone2019-DET-train/images", "/content/VisDrone2019-DET-train/annotations"))
MetadataCatalog.get("train_set").set(thing_classes =["Others", "Person", "Vehicle"])
MetadataCatalog.get("train_set").thing_colors = [(0,255,0), (255,0,0), (0,0,255)]

DatasetCatalog.register("val_set",  lambda d=_: get_visdrone_dicts("/content/VisDrone2019-DET-val/images", "/content/VisDrone2019-DET-val/annotations"))
MetadataCatalog.get("val_set").set(thing_classes =["Others", "Person", "Vehicle"])
MetadataCatalog.get("val_set").thing_colors = [(0,255,0), (255,0,0), (0,0,255)]

In [7]:
train_metadata = MetadataCatalog.get("train_set")
val_metadata = MetadataCatalog.get("val_set")

In [8]:
val_metadata

Metadata(name='val_set', thing_classes=['Others', 'Person', 'Vehicle'], thing_colors=[(0, 255, 0), (255, 0, 0), (0, 0, 255)])

In [9]:
train_set_dicts = get_visdrone_dicts("/content/VisDrone2019-DET-train/images", "/content/VisDrone2019-DET-train/annotations")
val_set_dicts = get_visdrone_dicts("/content/VisDrone2019-DET-val/images", "/content/VisDrone2019-DET-val/annotations")

KeyboardInterrupt: ignored

In [None]:
cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")
)
cfg.DATASETS.TRAIN = ("train_set",)
cfg.DATASETS.TEST = ("val_set",)  # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.0001
cfg.SOLVER.MAX_ITER = (8000)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (1024)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3

#print(cfg)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 2
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ('val_set',)
  TRAIN: ('train_set',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: build_resnet_fpn_backbone
  DEVICE: cuda
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: ['res2', 'res3', 'res

model_final_68b088.pkl: 421MB [00:05, 83.3MB/s]                           
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.


[32m[03/03 10:45:37 d2.engine.train_loop]: [0mStarting training from iteration 0


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  num_fg = fg_inds.nonzero().numel()


[32m[03/03 10:46:14 d2.utils.events]: [0m eta: 4:12:13  iter: 19  total_loss: 3.275  loss_cls: 1.388  loss_box_reg: 0.4778  loss_rpn_cls: 0.931  loss_rpn_loc: 0.4006  time: 1.8395  data_time: 0.0426  lr: 1.9981e-06  max_mem: 11246M
[32m[03/03 10:46:52 d2.utils.events]: [0m eta: 4:11:56  iter: 39  total_loss: 2.666  loss_cls: 1.367  loss_box_reg: 0.4979  loss_rpn_cls: 0.4276  loss_rpn_loc: 0.3679  time: 1.8727  data_time: 0.0194  lr: 3.9961e-06  max_mem: 11246M
[32m[03/03 10:47:30 d2.utils.events]: [0m eta: 4:10:58  iter: 59  total_loss: 2.569  loss_cls: 1.308  loss_box_reg: 0.5313  loss_rpn_cls: 0.3552  loss_rpn_loc: 0.3511  time: 1.8731  data_time: 0.0223  lr: 5.9941e-06  max_mem: 11246M
[32m[03/03 10:48:08 d2.utils.events]: [0m eta: 4:11:25  iter: 79  total_loss: 2.461  loss_cls: 1.234  loss_box_reg: 0.5266  loss_rpn_cls: 0.3139  loss_rpn_loc: 0.3316  time: 1.8837  data_time: 0.0188  lr: 7.9921e-06  max_mem: 11246M
[32m[03/03 10:48:46 d2.utils.events]: [0m eta: 4:10:47  ite

In [None]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output

In [None]:
!unzip /content/drive/MyDrive/VisDrone2019-DET-test-dev.zip -d /content/

In [None]:
DatasetCatalog.register("test_set",  lambda d=_: get_visdrone_dicts("/content/images", "/content/annotations"))
MetadataCatalog.get("test_set").set(thing_classes =["Others", "Person", "Vehicle"])
MetadataCatalog.get("test_set").thing_colors = [(0,255,0), (255,0,0), (0,0,255)]

In [None]:
balloon_metadata = MetadataCatalog.get("test_set")

In [None]:
dataset_dicts = get_visdrone_dicts("/content/images", "/content/annotations")

In [None]:
cfg

In [None]:
print(cfg.OUTPUT_DIR)

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "/content/output/model_final.pth")  # path to the model we just trained
#cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4   # set a custom testing threshold
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.45
cfg.DATASETS.TEST = ( )
predictor = DefaultPredictor(cfg)

In [None]:
for d in random.sample(dataset_dicts, 10):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=balloon_metadata, 
                   scale=1.5, 
                   instance_mode=ColorMode.SEGMENTATION   # remove the colors of unsegmented pixels
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(v.get_image()[:, :, ::-1])


In [None]:
#import the COCO Evaluator to use the COCO Metrics
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

#Call the COCO Evaluator function and pass the Validation Dataset
evaluator = COCOEvaluator("test_set", cfg, False, output_dir="/output2/")
val_loader = build_detection_test_loader(cfg, "test_set")

#Use the created predicted model in the previous step
inference_on_dataset(predictor.model, val_loader, evaluator)