I couldn't find a notebook using the new mmdet version 3.0.0, so I made one myself. Please let me know if there are any mistakes!

the annotation josn file is from [this great notebook](https://www.kaggle.com/code/ammarnassanalhajali/hubmap-2023-k-fold-cv-coco-dataset-generator).

inference notebook is [here](https://www.kaggle.com/code/andtaichi/hubmap-mmdet-ver3-0-0-infer/notebook).

In [1]:
!pip install -qqq /kaggle/input/mmdet3-wheels-ando/addict-2.4.0-py3-none-any.whl
!pip install -qqq /kaggle/input/mmdet3-wheels-ando/mmengine-0.7.3-py3-none-any.whl
!pip install -qqq /kaggle/input/mmdet3-wheels-ando/mmcv-2.0.0-cp310-cp310-linux_x86_64.whl
!pip install -qqq /kaggle/input/pycocotools-206/wheels/pycocotools-2.0.6-cp310-cp310-linux_x86_64.whl
!pip install -qqq /kaggle/input/mmdet3-wheels-ando/terminaltables-3.1.10-py2.py3-none-any.whl
!pip install -qqq /kaggle/input/mmdet3-wheels-ando/mmdet-3.0.0-py3-none-any.whl

In [2]:
import mmdet, mmcv
print(mmdet.__version__)
print(mmcv.__version__)

3.0.0
2.0.0


## Make config file

In [3]:
%mkdir /kaggle/working/configs/

In [4]:
%%writefile /kaggle/working/configs/custom_config.py

model = dict(
    type='MaskRCNN',
    data_preprocessor=dict(
        type='DetDataPreprocessor',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        bgr_to_rgb=True,
        pad_mask=True,
        pad_size_divisor=32),
    backbone=dict(
        type='RegNet',
        arch='regnetx_12gf',
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(
            type='Pretrained', checkpoint='open-mmlab://regnetx_12gf')),
    neck=dict(
        type='FPN',
        in_channels=[224, 448, 896, 2240],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[0.0, 0.0, 0.0, 0.0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=2,
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
        mask_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        mask_head=dict(
            type='FCNMaskHead',
            num_convs=4,
            in_channels=256,
            conv_out_channels=256,
            num_classes=2,
            loss_mask=dict(
                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.3,
                min_pos_iou=0.3,
                match_low_quality=True,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=256,
                pos_fraction=0.5,
                neg_pos_ub=-1,
                add_gt_as_proposals=False),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        rpn_proposal=dict(
            nms_across_levels=False,
            nms_pre=2000,
            nms_post=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.5),
            min_bbox_size=0),
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                match_low_quality=False,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            mask_size=28,
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_across_levels=False,
            nms_pre=1000,
            nms_post=1000,
            max_per_img=500,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.01,
            nms=dict(type='nms', iou_thr=0.5),
            max_per_img=50,
            mask_thr_binary=0.5)))
dataset_type = 'CocoDataset'
data_root = ''
backend_args = None
metainfo=dict(classes=('blood_vessel', 'glomerulus'), palette=[(255, 0, 0), (0, 255, 0)])

train_dataloader = dict(
    batch_size=2,
    num_workers=2,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    batch_sampler=dict(type='AspectRatioBatchSampler'),
    dataset=dict(
        type='CocoDataset',
        metainfo=dict(classes=('blood_vessel', 'glomerulus'), palette=[(255, 0, 0), (0, 255, 0)]),
        data_root='',
        ann_file='/kaggle/input/coco-hubmap-fold-1/coco_annotations_train_all_fold10.json',
        data_prefix=dict(
            img='/kaggle/input/hubmap-hacking-the-human-vasculature/train/'),
        filter_cfg=dict(filter_empty_gt=True, min_size=32),
        pipeline=[
            dict(type='LoadImageFromFile', backend_args=None),
            dict(
                type='LoadAnnotations',
                with_bbox=True,
                with_mask=True,
                poly2mask=True),
            dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
            dict(
                type='RandomFlip',
                direction=['horizontal', 'vertical'],
                prob=0.5),
            dict(type='PackDetInputs')
        ],
        backend_args=None))
val_dataloader = dict(
    batch_size=1,
    num_workers=2,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type='CocoDataset',
        metainfo=dict(classes=('blood_vessel', 'glomerulus'), palette=[(255, 0, 0), (0, 255, 0)]),
        data_root='',
        ann_file=
        '/kaggle/input/coco-hubmap-fold-1/coco_annotations_valid_all_fold10.json',
        data_prefix=dict(
            img='/kaggle/input/hubmap-hacking-the-human-vasculature/train/'),
        test_mode=True,
        pipeline=[
            dict(type='LoadImageFromFile', backend_args=None),
            dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
            dict(
                type='PackDetInputs',
                meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                           'scale_factor'))
        ],
        backend_args=None))
test_dataloader = dict(
    batch_size=1,
    num_workers=2,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type='CocoDataset',
        metainfo=dict(classes=('blood_vessel', 'glomerulus'), palette=[(255, 0, 0), (0, 255, 0)]),
        data_root='',
        ann_file=
        '/kaggle/input/coco-hubmap-fold-1/coco_annotations_valid_all_fold10.json',
        data_prefix=dict(
            img='/kaggle/input/hubmap-hacking-the-human-vasculature/train/'),
        test_mode=True,
        pipeline=[
            dict(type='LoadImageFromFile', backend_args=None),
            dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
            dict(
                type='PackDetInputs',
                meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                           'scale_factor'))
        ],
        backend_args=None))
val_evaluator = dict(
    type='CocoMetric',
    ann_file=
    '/kaggle/input/coco-hubmap-fold-1/coco_annotations_valid_all_fold10.json',
    metric=['segm'],
    format_only=False,
    backend_args=None)
#     iou_thrs=[0.5]
test_evaluator = dict(
    type='CocoMetric',
    ann_file=
    '/kaggle/input/coco-hubmap-fold-1/coco_annotations_valid_all_fold10.json',
    metric=['segm'],
    format_only=False,
    backend_args=None)
#     iou_thrs=[0.5]
optim_wrapper = dict(
    type='OptimWrapper',
    optimizer=dict(type='SGD', lr=0.02, momentum=0.91, weight_decay=0.00005),
    clip_grad=None)
param_scheduler = [
    dict(
        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
    dict(
        type='MultiStepLR',
        by_epoch=True,
        begin=0,
        end=30,
        milestones=[13, 17, 21],
        gamma=0.2)
]
default_hooks = dict(
    timer=dict(type='IterTimerHook'),
    logger=dict(type='LoggerHook', interval=50),
    param_scheduler=dict(type='ParamSchedulerHook'),
    checkpoint=dict(
        type='CheckpointHook', interval=3, save_best='coco/segm_mAP'),
    sampler_seed=dict(type='DistSamplerSeedHook'),
    visualization=dict(type='DetVisualizationHook'))
custom_hooks = [
    dict(
        type='EarlyStoppingHook',
        monitor='coco/segm_mAP',
        rule='greater',
        min_delta=0.005,
        strict=False,
        check_finite=True,
        patience=15,
        stopping_threshold=None)
]
default_scope = 'mmdet'
env_cfg = dict(
    cudnn_benchmark=False,
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
    dist_cfg=dict(backend='nccl'))
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
    type='DetLocalVisualizer',
    vis_backends=[dict(type='LocalVisBackend')],
    name='visualizer')
log_config = dict(hooks=[
    dict(type='TextLoggerHook'),
    dict(
        type='MMDetWandbHook',
        init_kwargs=dict(project='mmdetection'),
        interval=10,
        log_checkpoint=True,
        log_checkpoint_metadata=True,
        num_eval_images=100,
        bbox_score_thr=0.25)
])
log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
log_level = 'INFO'
load_from = None
resume = False
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
work_dir = '/kaggle/working/work_dir'

Writing /kaggle/working/configs/custom_config.py


In [5]:
from mmengine.config import Config
from mmengine.runner import Runner

from mmdet.utils import register_all_modules

cfg = Config.fromfile("/kaggle/working/configs/custom_config.py")

cfg.work_dir = "/kaggle/working/work_dir"
runner = Runner.from_cfg(cfg)

07/29 18:32:33 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.10 | packaged by conda-forge | (main, Mar 24 2023, 20:08:06) [GCC 11.3.0]
    CUDA available: True
    numpy_random_seed: 83005302
    GPU 0: Tesla P100-PCIE-16GB
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.8, V11.8.89
    GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
    PyTorch: 2.0.0
    PyTorch compiling details: PyTorch built with:
  - GCC 11.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_37,code=

In [6]:
runner.train()

loading annotations into memory...
Done (t=0.86s)
creating index...
index created!
loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
07/29 18:32:53 - mmengine - [4m[97mINFO[0m - load model from: open-mmlab://regnetx_12gf
07/29 18:32:53 - mmengine - [4m[97mINFO[0m - Loads checkpoint by openmmlab backend from path: open-mmlab://regnetx_12gf


Downloading: "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth" to /root/.cache/torch/hub/checkpoints/regnetx_12gf-4c2a3350.pth



unexpected key in source state_dict: fc.weight, fc.bias

07/29 18:33:09 - mmengine - [4m[97mINFO[0m - Checkpoints will be saved to /kaggle/working/work_dir.
07/29 18:34:10 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 50/731]  lr: 1.9820e-03  eta: 7:26:44  time: 1.2251  data_time: 0.0614  memory: 6380  loss: 1.8463  loss_rpn_cls: 0.6391  loss_rpn_bbox: 0.0973  loss_cls: 0.2799  acc: 97.9492  loss_bbox: 0.0302  loss_mask: 0.7997
07/29 18:35:06 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][100/731]  lr: 3.9840e-03  eta: 7:05:49  time: 1.1157  data_time: 0.0545  memory: 6381  loss: 1.2361  loss_rpn_cls: 0.3700  loss_rpn_bbox: 0.1032  loss_cls: 0.1648  acc: 95.8984  loss_bbox: 0.0736  loss_mask: 0.5245
07/29 18:36:02 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][150/731]  lr: 5.9860e-03  eta: 6:59:59  time: 1.1303  data_time: 0.0638  memory: 6380  loss: 1.1861  loss_rpn_cls: 0.2713  loss_rpn_bbox: 0.1014  loss_cls: 0.1978  acc: 97.6562  loss_bbox: 0.1375  loss_mask: 0.4

MaskRCNN(
  (data_preprocessor): DetDataPreprocessor()
  (backbone): RegNet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(32, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(224, 224, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=2, bias=False)
        (bn2): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(224, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(32, 224, kernel_s