# PDSM Stereo Faster R-CNN
@author: Moritz Bednorz

## 1 - Setup

### 1.1 - Install packages

In [None]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version

In [None]:
# install dependencies: (use cu111 because colab has CUDA 11.1)
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# install dependencies: (use cu111 because colab has CUDA 11.1)
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

# install mmcv-full thus we could use CUDA operators
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html

# Install mmdetection
!rm -rf mmdetection
!git clone https://github.com/open-mmlab/mmdetection.git
%cd mmdetection
!pip install -e .

### 1.2 - Import packages

In [None]:
from mmcv import collect_env
collect_env()

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

In [None]:
import mmcv
import matplotlib.pyplot as plt

### 1.3 - Download pretained Faster R-CNN

The high-level architecture of Faster R-CNN is shown in the following picture. More details can be found in the [paper](https://arxiv.org/abs/1506.01497).

![faster rcnn](https://pic1.zhimg.com/80/v2-c0172be282021a1029f7b72b51079ffe_1440w.jpg) ![mmdet](https://pic2.zhimg.com/v2-e49ebcf931b5cf424ed311338f9ff35d_b.jpg)

Briefly, it uses a convolutional neural network (CNN) as backbone to extract features from an image. Then, it uses a region proposal network (RPN) to predict proposals, i.e., potential objects. After that, it uses a feature extractor to crop features for the region of interests (RoI), and uses a RoI Head to perform classification and bounding box prediction.

In [None]:
# We download the pre-trained checkpoints for inference and finetuning.
!mkdir checkpoints
!wget -c https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth \
      -O checkpoints/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth

## 2 - Data

### 2.1 - Connect Google Drive to import data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### 2.2 - First sneak peak on dataset

In [None]:
# Let's take a look at the dataset
img = mmcv.imread('/content/drive/MyDrive/PDSM_test/data_coco_bb/aicm01/VID000_0/image_02/000010.png')
plt.figure(figsize=(15, 10))
plt.imshow(mmcv.bgr2rgb(img))
plt.show()

## 3 - Building the Faster R-CNN
Set configuration of the datatset, the model and the evaluation

### 3.1 - Import configuration of pretrained detector

[INFO:](https://mmdetection.readthedocs.io/en/v2.21.0/tutorials/config.html)<br>
For easy understanding, we recommend contributors to inherit from existing methods. For example, if some modification is made base on Faster R-CNN, user may first inherit the basic Faster R-CNN structure by specifying _base_ = ../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py, then modify the necessary fields in the config files.

In [None]:
from mmcv import Config
cfg = Config.fromfile('/content/mmdetection/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py')

Default image size: 1.920 x 540‬ <br>
Resized images --> Ratio: 3,555 : 1 <br>

### 3.2 - Data augmentation

![faster rcnn](https://mmdetection.readthedocs.io/en/v1.2.0/_images/data_pipeline.png)
<br>
[**SOURCE**](https://mmdetection.readthedocs.io/en/latest/tutorials/data_pipeline.html#design-of-data-pipelines)

#### 3.2.1 - Configure train pipeline

In [None]:
# Normalize image RGB values
# The mean and std values are decided by the pretrained models.
# When you are finetuning with some pretrained model, you need to follow the mean and std values used for pretraining.
cfg.img_norm_cfg = dict(mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)

cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='Resize', # Augmentation pipeline that resize the images and their annotations
        img_scale=(1920/2, 540/2), # Original scale 1920x540p
        multiscale_mode='value',
        keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),  # The ratio or probability to flip
    dict(type='Normalize', **cfg.img_norm_cfg),
    dict(type='Pad', size_divisor=32), # The number the padded images should be divisible
    dict(type='DefaultFormatBundle'), # Default format bundle to gather data in the pipeline
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) # Pipeline that decides which keys in the data should be passed to the detector
]

#### 3.2.2 - Configure test pipeline

In [None]:
cfg.test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1920/2, 540/2), # Decides the largest scale for testing, used for the Resize pipeline
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **cfg.img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]

If the concatenated dataset is used for test or evaluation, this manner supports to evaluate each dataset separately. To test the concatenated datasets as a whole, you can set separate_eval=False as below.

### 3.3 - Create datasets

In [None]:
import mmcv
import numpy as np

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset
from mmdet.datasets.coco import CocoDataset

from .api_wrappers import COCO


@DATASETS.register_module()
class bbdataset(CocoDataset):

    CLASSES = ('Nadelhalter', 'Knotenschieber', 'Atraum. Pinzette', 'Nervhaken', 'Klappenschere', 'None')

    PALETTE = None

    def load_annotations(self, ann_files):
        """Load annotation from COCO style annotation file.

        Args:
            ann_file (str): Path of annotation file.

        Returns:
            list[dict]: Annotation info from COCO api.
        """
        data_infos = []
        total_ann_ids = []
        for ann_file in ann_files:
          self.coco = COCO(ann_file)
          # The order of returned `cat_ids` will not
          # change with the order of the CLASSES
          self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES)

          self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
          self.img_ids = self.coco.get_img_ids()
          for i in self.img_ids:
              info = self.coco.load_imgs([i])[0]
              info['filename'] = info['file_name']
              data_infos.append(info)
              ann_ids = self.coco.get_ann_ids(img_ids=[i])
              total_ann_ids.extend(ann_ids)
          assert len(set(total_ann_ids)) == len(
              total_ann_ids), f"Annotation ids in '{ann_file}' are not unique!"
        return data_infos

    def get_ann_info(self, idx):
        """Get COCO annotation by index.

        Args:
            idx (int): Index of data.

        Returns:
            dict: Annotation info of specified index.
        """

        img_id = self.data_infos[idx]['id']
        ann_ids = self.coco.get_ann_ids(img_ids=[img_id])
        ann_info = self.coco.load_anns(ann_ids)
        return self._parse_ann_info(self.data_infos[idx], ann_info)

    def get_cat_ids(self, idx):
        """Get COCO category ids by index.

        Args:
            idx (int): Index of data.

        Returns:
            list[int]: All categories in the image of specified index.
        """

        img_id = self.data_infos[idx]['id']
        ann_ids = self.coco.get_ann_ids(img_ids=[img_id])
        ann_info = self.coco.load_anns(ann_ids)
        return [ann['category_id'] for ann in ann_info]

In [None]:
# Set the label names
classes = ('Nadelhalter', 'Knotenschieber', 'Atraum. Pinzette', 'Nervhaken', 'Klappenschere', 'None')
# Modify num classes of the model in box head
cfg.model.roi_head.bbox_head.num_classes = 6

cfg.data = dict(
    train=dict(pipeline=cfg.train_pipeline,
               classes=classes),
    val=dict(pipeline=cfg.test_pipeline,
             classes=classes),
    test=dict(pipeline=cfg.test_pipeline,
              classes=classes))

In [None]:
from mmdet.apis import set_random_seed
import os.path as osp
import mmcv
import numpy as np
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L211

# Modify dataset type and path
cfg.dataset_type = 'bbdataset'
cfg.data_root = '/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/'


def get_concat_datasets(set_type: str, num_videos: list):

    # list of datasets
    dataset = list()
    # Set view numbers
    num_views = [2,3]


    if set_type == 'train':
      pipeline = cfg.train_pipeline

    if set_type == 'val':
      pipeline = cfg.test_pipeline

    if set_type == 'test':
      pipeline = cfg.test_pipeline


    for video in num_videos:
      for view in num_views:
        if video < 10:
          dataset.append(
              dict(type='bbdataset',
                  pipeline=pipeline,
                  data_root = f'/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/aicm0{video}/VID000_0/image_0{view}',
                  ann_file = f'/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/aicm0{video}/VID000_0/instrument_labels_0{view}/instances_default.json',
                  img_prefix = f'/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/aicm0{video}/VID000_0/image_0{view}/',
              )
          )
        else:
          dataset.append(
              dict(type='bbdataset',
                  pipeline=pipeline,
                  data_root = f'/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/aicm{video}/VID000_0/image_0{view}',
                  ann_file = f'/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/aicm{video}/VID000_0/instrument_labels_0{view}/instances_default.json',
                  img_prefix = f'/content/drive/MyDrive/PDSM_Nils/data_coco_bb_complete/aicm{video}/VID000_0/image_0{view}/',
              )
          )

    return dataset

In [None]:
train_videos = range(1,7)
val_videos = range(7,9)
test_videos = range(9,11)

cfg.data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train = dict(
        type = "ConcatDataset",
        datasets = get_concat_datasets(set_type='train', num_videos=train_videos)),
    val = dict(
        type = "ConcatDataset",
        datasets = get_concat_datasets(set_type='val', num_videos=val_videos)),
    test = dict(
        type = "ConcatDataset",
        datasets = get_concat_datasets(set_type='test', num_videos=test_videos)),
    )

### 3.4 - Finetune R-CNN architecture

In [None]:
# Use load_from to load the checkpoints of the pretrained model
cfg.load_from = 'checkpoints/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth'

### 3.5 - Optimizer and Evaluation metric

In [None]:
# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer = dict(type='SGD', 
                     lr=0.02 / 8, 
                     momentum=0.9, 
                     weight_decay=0.0001)
cfg.lr_config.warmup = None
cfg.log_config.interval = 100


# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = ['bbox']

# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 10

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10

### 3.6 - Additional configs

In [None]:
# Set up working dir to save files and logs.
cfg.work_dir = '/content'

# Set seed thus the results are more reproducible
cfg.seed = 42
set_random_seed(42, deterministic=False)
cfg.gpu_ids = range(1)

### 3.7 - Final config overview

In [None]:
# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

### 3.2 - Build the Faster R-CNN

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

cfg.device='cuda'

# Build dataset
datasets = [build_dataset(cfg.data.train)]

# Build the detector
model = build_detector(cfg.model)
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES

print(model.CLASSES)

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

In [None]:
# We can also use tensorboard to log the training process
cfg.log_config.hooks = [
    dict(type='TextLoggerHook'),
    dict(type='TensorboardLoggerHook')]
cfg.runner.max_epochs = 20

### 3.3 - Train the Faster R-CNN

In [None]:
train_detector(model, datasets, cfg, distributed=False, validate=True)

## 4 - Evaluation

Load existing model

In [None]:
from mmdet.apis import init_detector

checkpoint_file = 'xxx'
model = init_detector(cfg, checkpoint_file, device='cuda:0')

### 4.1 - Predict on TEST-SET and show results

In [None]:
from mmdet.apis import inference_detector, show_result_pyplot

for i in range(10,100):
  try:
    img = mmcv.imread(f'/content/drive/MyDrive/PDSM/data_coco_bb/aicm08/VID000_0/image_02/0000{i}.png')
  except:
    continue
  model.cfg = cfg
  result = inference_detector(model, img)
  show_result_pyplot(model, img, result)