In [None]:
%cd mmdetection-2.25.3

In [None]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version

In [None]:
from mmcv import collect_env
collect_env()

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

In [None]:
import copy
import os.path as osp
import numpy as np

import mmcv

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

@DATASETS.register_module()
class KittiTinyDataset(CustomDataset):

    CLASSES = ('Car','Pedestrian', 'Cyclist')

    def load_annotations(self, ann_file):
        cat2label = {k: i for i, k in enumerate(self.CLASSES)}
        # load image list from file
        image_list = mmcv.list_from_file(self.ann_file)

        data_infos = []
        # convert annotations to middle format
        for image_id in image_list:
            filename = f'{self.img_prefix}/{image_id}.png'
            image = mmcv.imread(filename)
            height, width = image.shape[:2]

            data_info = dict(filename=f'{image_id}.png', width=width, height=height)

            # load annotations
            label_prefix = self.img_prefix.replace('image_2', 'label_2')
            lines = mmcv.list_from_file(osp.join(label_prefix, f'{image_id}.txt'))

            content = [line.strip().split(' ') for line in lines]
            bbox_names = [x[0] for x in content]
            bboxes = [[float(info) for info in x[4:8]] for x in content]

            gt_bboxes = []
            gt_labels = []
            gt_bboxes_ignore = []
            gt_labels_ignore = []

            # filter 'DontCare'
            for bbox_name, bbox in zip(bbox_names, bboxes):
                if bbox_name in cat2label:
                    gt_labels.append(cat2label[bbox_name])
                    gt_bboxes.append(bbox)
                else:
                    gt_labels_ignore.append(-1)
                    gt_bboxes_ignore.append(bbox)

            data_anno = dict(
                bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
                labels=np.array(gt_labels, dtype=np.longlong),
                bboxes_ignore=np.array(gt_bboxes_ignore,
                                       dtype=np.float32).reshape(-1, 4),
                labels_ignore=np.array(gt_labels_ignore, dtype=np.longlong))

            data_info.update(ann=data_anno)
            data_infos.append(data_info)

        return data_infos

In [None]:
from mmcv import Config
cfg = Config.fromfile('./configs/retinanet/retinanet_r50_fpn_2x_coco.py')
#another test on ResNet 101
#cfg = Config.fromfile('./configs/retinanet/retinanet_r101_fpn_2x_coco.py')

In [None]:
!mkdir checkpoints

In [None]:

from mmdet.apis import set_random_seed

# Modify dataset type and path
cfg.dataset_type = 'KittiTinyDataset'
cfg.data_root = '/path/to/KITTI'

cfg.data.test.type = 'KittiTinyDataset'
cfg.data.test.data_root = '/path/to/KITTI'
cfg.data.test.ann_file = 'training/train.txt'
cfg.data.test.img_prefix = 'training/image_2'

cfg.data.train.type = 'KittiTinyDataset'
cfg.data.train.data_root = '/path/to/KITTI'
cfg.data.train.ann_file = 'training/train.txt'
cfg.data.train.img_prefix = 'training/image_2'

cfg.data.val.type = 'KittiTinyDataset'
cfg.data.val.data_root = '/path/to/KITTI'
cfg.data.val.ann_file = 'training/val.txt'
cfg.data.val.img_prefix = 'training/image_2'
# modify num classes of the model in box head
# car, pedestrian and cyclist
cfg.model.bbox_head.num_classes = 3
# Set up working dir to save files and logs.
cfg.work_dir = './OptRetinaNet'
#####################################
cfg.load_from ="/path/to/model"


cfg.lr_config.warmup = None
cfg.log_config.interval = 10
cfg.optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
cfg.optimizer_config = dict(grad_clip=None)
# learning policy
cfg.lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[7])

# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'mAP'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 10
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10
cfg.evaluation.iou_thr=[0.5,0.7]
cfg.runner.max_epochs=80
# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
cfg.workflow = [('train', 1), ('val', 1)]

# We can also use tensorboard to log the training process
cfg.log_config.hooks = [
    dict(type='TextLoggerHook'),
    dict(type='TensorboardLoggerHook')]

print(f'Config:\n{cfg.pretty_text}')



In [None]:
#################### Modification to change anchor parameters #######################
cfg.model.bbox_head=dict(
        type='RetinaHead',
        num_classes=3,
        in_channels=256,
        stacked_convs=4,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            #octave_base_scale=4,
            #scales_per_octave=3,
            scales=[0.3, 0.4, 0.5],
            ratios=[0.25, 0.45, 1.0, 2.15,2.85],
            strides=[8, 16, 32, 64, 128]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0))

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# Build dataset
datasets = [build_dataset(cfg.data.train)]
cfg.workflow = [('train', 1), ('val', 1)]
if len(cfg.workflow) == 2:

    val_dataset = copy.deepcopy(cfg.data.val)
    val_dataset.pipeline = cfg.data.train.pipeline
    datasets.append(build_dataset(val_dataset))


# Build the detector
model = build_detector(cfg.model)
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES
cfg.device='cuda'

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
from mmdet.apis import inference_detector, show_result_pyplot

img = mmcv.imread('/path/to/KITTI/KITTI/training/image_2/000043.png')
model.cfg = cfg
result = inference_detector(model, img)
model.show_result(img,
        result,
        score_thr=0.27,
        show=True,
        win_name='result',
        bbox_color=(255,0,0),
        text_color=(255, 255, 255),
        out_file='../images/43.png')

In [None]:
# load tensorboard in colab
%load_ext tensorboard

# see curves in tensorboard
%tensorboard --logdir ./OptRetinaNet
