In [1]:
from detectron2.modeling.meta_arch import GeneralizedRCNN as RCNN
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead
from detectron2.modeling.backbone.resnet import ResNet, BasicStem, BasicBlock, BottleneckBlock, DeformBottleneckBlock
from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool
from detectron2.modeling.roi_heads import StandardROIHeads, FastRCNNConvFCHead
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.solver.build import get_default_optimizer_params
from detectron2.data.build import build_batch_data_loader, get_detection_dataset_dicts
from detectron2.data.datasets import register_coco_instances
from detectron2.data.common import DatasetFromList, MapDataset
from detectron2.data.dataset_mapper import DatasetMapper
from detectron2.data.samplers import TrainingSampler
from detectron2.layers import ShapeSpec
from detectron2.engine.train_loop import SimpleTrainer
from detectron2.solver.lr_scheduler import LRMultiplier, WarmupParamScheduler
from detectron2.checkpoint.detection_checkpoint import DetectionCheckpointer
from detectron2.data import transforms as T
from detectron2.data import detection_utils as utils
from fvcore.common.param_scheduler import MultiStepParamScheduler
from munch import Munch as m

import os
import weakref
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())


1.8.0 True


In [2]:
config = m({
    'OUTPUT_DIR': 'output_fasterrcnn',
    'MODEL': m({
        # 'META_ARCHITECTURE': "GeneralizedRCNN", # not used in our code
        'PIXEL_MEAN': [103.530, 116.280, 123.675],
        'PIXEL_STD': [1.0, 1.0, 1.0],
        'MASK_ON': False, # not used in our code
        'KEYPOINT_ON': False, # not used in our code
        'LOAD_PROPOSALS': False, # not used in our code
        'WEIGHTS': "https://dl.fbaipublicfiles.com/detectron2/"
                +"COCO-Detection/faster_rcnn_R_101_FPN_3x"
                +"/"+"137851257/model_final_f6e8b1.pkl", # "COCO-Detection/faster_rcnn_R_101_FPN_3x" -> "137851257/model_final_f6e8b1.pkl",
        'MASK_ON': False,
        'BACKBONE': m({
            # 'NAME': "build_resnet_fpn_backbone", # not used in our code
            'FREEZE_AT': 2,
        }),
        'RESNETS': m({
            'OUT_FEATURES': ["res2", "res3", "res4", "res5"],
            'STEM_OUT_CHANNELS': 64,
            'DEPTH': 101,
            'NUM_GROUPS': 1,
            'WIDTH_PER_GROUP': 64,
            'RES2_OUT_CHANNELS': 256,
            'STRIDE_IN_1X1': True,
            'RES5_DILATION': 1,
            'DEFORM_ON_PER_STAGE': [False, False, False, False],
            'DEFORM_MODULATED': False,
            'DEFORM_NUM_GROUPS': 1,
            'NORM': 'FrozenBN',
        }),
        'ANCHOR_GENERATOR': m({
            'SIZES': [[32], [64], [128], [256], [512]], # One size for each in feature map
            'ASPECT_RATIOS': [[0.5, 1.0, 2.0]],  # Three aspect ratios (same for all in feature maps)
            'OFFSET': 0.0,
        }),
        'FPN': m({
            'IN_FEATURES': ["res2", "res3", "res4", "res5"],
            'OUT_CHANNELS': 256,
            'FUSE_TYPE': "sum",
            'NORM': "", #"GN",
        }),
        'RPN': m({
            'IN_FEATURES': ["p2", "p3", "p4", "p5", "p6"],
            'PRE_NMS_TOPK_TRAIN': 2000,
            'PRE_NMS_TOPK_TEST': 1000,
            'POST_NMS_TOPK_TRAIN': 1000,
            'POST_NMS_TOPK_TEST': 1000,
            'SMOOTH_L1_BETA': 0.0,
            'BBOX_REG_LOSS_TYPE': "smooth_l1",
            'LOSS_WEIGHT': 1.0,
            'BBOX_REG_LOSS_WEIGHT': 1.0,
            'BOUNDARY_THRESH': -1,
            'NMS_THRESH': 0.7,
            'POSITIVE_FRACTION': 0.5,
            'BATCH_SIZE_PER_IMAGE': 256,
            'BBOX_REG_WEIGHTS': (1.0, 1.0, 1.0, 1.0),
            'IOU_LABELS': [0, -1, 1],
            'IOU_THRESHOLDS': [0.3, 0.7],
            'CONV_DIMS': [-1],
        }),
        'ROI_HEADS': m({
            # 'NAME': "StandardROIHeads", # not used in our code
            'IN_FEATURES': ["p2", "p3", "p4", "p5"],
            'NUM_CLASSES': 15,
            'SCORE_THRESH_TEST': 0.05,
            'NMS_THRESH_TEST': 0.5,
            'BATCH_SIZE_PER_IMAGE': 128,
            'POSITIVE_FRACTION': 0.25,
            'IOU_THRESHOLDS': [0.5],
            'IOU_LABELS': [0, 1],
        }),
        'ROI_BOX_HEAD': m({
            # 'NAME': "FastRCNNConvFCHead", # not used in our code
            'NUM_CLASSES': 4,
            'NUM_FC': 4,
            'FC_DIM': 1024,
            'POOLER_RESOLUTION': 14,
            'CONV_DIM': 256,
            'NUM_CONV': 0,
            'NORM': "",
            'BBOX_REG_WEIGHTS': (10.0, 10.0, 5.0, 5.0),
            'CLS_AGNOSTIC_BBOX_REG': False,
            'SMOOTH_L1_BETA': 0.0,
            'BBOX_REG_LOSS_TYPE': "smooth_l1",
            'BBOX_REG_LOSS_WEIGHT': 1.0,
            'POOLER_SAMPLING_RATIO': 0,
            'POOLER_TYPE': "ROIAlignV2",
            'TRAIN_ON_PRED_BOXES': False,
        }),
        'ROI_MASK_HEAD': m({
            # 'NAME': "MaskRCNNConvUpsampleHead", # not used in our code
            'NUM_CONV': 2,
            'POOLER_RESOLUTION': 7,
        }),
        'ROI_KEYPOINT_HEAD': m({
            # 'MIN_KEYPOINTS_PER_IMAGE': "MaskRCNNConvUpsampleHead", # not used in our code
        }),
        'PROPOSAL_GENERATOR': m({
            'MIN_SIZE': 0,
        }),
    }),
    'SOLVER': m({
        'IMS_PER_BATCH': 2,
        'WEIGHT_DECAY_NORM': 0.0,
        'BIAS_LR_FACTOR': 1.0,
        'WARMUP_FACTOR': 1.0 / 1000,
        'WARMUP_ITERS': 1000,
        'WARMUP_METHOD': "linear",
        'WEIGHT_DECAY_BIAS': None,
        'MOMENTUM': 0.9,
        'NESTEROV': False,
        'WEIGHT_DECAY': 0.0001,
        'GAMMA': 0.1,
        'BASE_LR': 0.02, # 0.00025
        'STEPS': (), # (210000, 250000),
        'MAX_ITER': 100000,
        'CLIP_GRADIENTS': m({
            'ENABLED': False, # not used in our code
        })
    }),
    'INPUT': m({
        'MIN_SIZE_TRAIN': (640, 672, 704, 736, 768, 800),
        'MAX_SIZE_TRAIN': 1333,
        'MIN_SIZE_TRAIN_SAMPLING': "choice",
        'RANDOM_FLIP': "horizontal",
        'MASK_FORMAT': "polygon",
        'FORMAT': "BGR",
        'CROP': m({
            'ENABLED': False,
            'TYPE': "relative_range",
            'SIZE': [0.9, 0.9],
        }),
    }),
    'TEST': m({
        'DETECTIONS_PER_IMAGE': 100,
    }),
    'DATALOADER': m({
        'NUM_WORKERS': 2,
        'FILTER_EMPTY_ANNOTATIONS': True,
        'ASPECT_RATIO_GROUPING': True,
        'SAMPLER_TRAIN': "TrainingSampler", # not used in our code
    }),
    'DATASETS': m({
        'TRAIN': ("iSAID_train",),
        'TEST': (),
        'PROPOSAL_FILES_TRAIN': (),
    }),
})
config

Munch({'OUTPUT_DIR': 'output_fasterrcnn', 'MODEL': Munch({'PIXEL_MEAN': [103.53, 116.28, 123.675], 'PIXEL_STD': [1.0, 1.0, 1.0], 'MASK_ON': False, 'KEYPOINT_ON': False, 'LOAD_PROPOSALS': False, 'WEIGHTS': 'https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl', 'BACKBONE': Munch({'FREEZE_AT': 2}), 'RESNETS': Munch({'OUT_FEATURES': ['res2', 'res3', 'res4', 'res5'], 'STEM_OUT_CHANNELS': 64, 'DEPTH': 101, 'NUM_GROUPS': 1, 'WIDTH_PER_GROUP': 64, 'RES2_OUT_CHANNELS': 256, 'STRIDE_IN_1X1': True, 'RES5_DILATION': 1, 'DEFORM_ON_PER_STAGE': [False, False, False, False], 'DEFORM_MODULATED': False, 'DEFORM_NUM_GROUPS': 1, 'NORM': 'FrozenBN'}), 'ANCHOR_GENERATOR': Munch({'SIZES': [[32], [64], [128], [256], [512]], 'ASPECT_RATIOS': [[0.5, 1.0, 2.0]], 'OFFSET': 0.0}), 'FPN': Munch({'IN_FEATURES': ['res2', 'res3', 'res4', 'res5'], 'OUT_CHANNELS': 256, 'FUSE_TYPE': 'sum', 'NORM': ''}), 'RPN': Munch({'IN_FEATURES': ['p2', 'p3', 'p4', 'p5', 'p

## Define the backbone

First, Define the FPN bottom-up part (which is a ResNet)

In [3]:

input_shape = ShapeSpec(channels=len(config.MODEL.PIXEL_MEAN))
norm = config.MODEL.RESNETS.NORM
stem = BasicStem(
    in_channels     = input_shape.channels,
    out_channels    = config.MODEL.RESNETS.STEM_OUT_CHANNELS,
    norm            = norm,
)

freeze_at           = config.MODEL.BACKBONE.FREEZE_AT
out_features        = config.MODEL.RESNETS.OUT_FEATURES
depth               = config.MODEL.RESNETS.DEPTH
num_groups          = config.MODEL.RESNETS.NUM_GROUPS
width_per_group     = config.MODEL.RESNETS.WIDTH_PER_GROUP
bottleneck_channels = num_groups * width_per_group
in_channels         = config.MODEL.RESNETS.STEM_OUT_CHANNELS
out_channels        = config.MODEL.RESNETS.RES2_OUT_CHANNELS
stride_in_1x1       = config.MODEL.RESNETS.STRIDE_IN_1X1
res5_dilation       = config.MODEL.RESNETS.RES5_DILATION
deform_on_per_stage = config.MODEL.RESNETS.DEFORM_ON_PER_STAGE
deform_modulated    = config.MODEL.RESNETS.DEFORM_MODULATED
deform_num_groups   = config.MODEL.RESNETS.DEFORM_NUM_GROUPS

# assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)

num_blocks_per_stage = {
    18: [2, 2, 2, 2],
    34: [3, 4, 6, 3],
    50: [3, 4, 6, 3],
    101: [3, 4, 23, 3],
    152: [3, 8, 36, 3],
}[depth]

if depth in [18, 34]:
    assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34"
    assert not any(
        deform_on_per_stage
    ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34"
    assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34"
    assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34"

stages = []

for idx, stage_idx in enumerate(range(2, 6)):
    # res5_dilation is used this way as a convention in R-FCN & Deformable Conv paper
    dilation = res5_dilation if stage_idx == 5 else 1
    first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
    stage_kargs = {
        "num_blocks": num_blocks_per_stage[idx],
        "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1),
        "in_channels": in_channels,
        "out_channels": out_channels,
        "norm": norm,
    }
    # Use BasicBlock for R18 and R34.
    if depth in [18, 34]:
        stage_kargs["block_class"] = BasicBlock
    else:
        stage_kargs["bottleneck_channels"] = bottleneck_channels
        stage_kargs["stride_in_1x1"] = stride_in_1x1
        stage_kargs["dilation"] = dilation
        stage_kargs["num_groups"] = num_groups
        if deform_on_per_stage[idx]:
            stage_kargs["block_class"] = DeformBottleneckBlock
            stage_kargs["deform_modulated"] = deform_modulated
            stage_kargs["deform_num_groups"] = deform_num_groups
        else:
            stage_kargs["block_class"] = BottleneckBlock
    blocks = ResNet.make_stage(**stage_kargs)
    in_channels = out_channels
    out_channels *= 2
    bottleneck_channels *= 2
    stages.append(blocks)

# fpn_bottom_up (type Backbone > nn.Module)
fpn_bottom_up = ResNet(stem, stages, out_features=out_features, freeze_at=freeze_at)
# fpn_bottom_up

Define the backbone

In [4]:
backbone = FPN(
    bottom_up       = fpn_bottom_up,
    in_features     = config.MODEL.FPN.IN_FEATURES,
    out_channels    = config.MODEL.FPN.OUT_CHANNELS,
    norm            = config.MODEL.FPN.NORM,
    fuse_type       = config.MODEL.FPN.FUSE_TYPE,
    top_block       = LastLevelMaxPool(),
)
# backbone

## Define the Region Proposal Network (RPN)

Define the Anchor generator

In [5]:
input_shape = backbone.output_shape()
input_shape = [input_shape[f] for f in config.MODEL.RPN.IN_FEATURES]

anchor_generator = DefaultAnchorGenerator(
    sizes           = config.MODEL.ANCHOR_GENERATOR.SIZES, 
    aspect_ratios   = config.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
    strides         = [x.stride for x in input_shape],
    offset          = config.MODEL.ANCHOR_GENERATOR.OFFSET,
)
# anchor_generator

Define the RPN head

In [6]:
input_shape = backbone.output_shape()
input_shape = [input_shape[f] for f in config.MODEL.RPN.IN_FEATURES]
in_channels = [s.channels for s in input_shape]
assert len(set(in_channels)) == 1, "Each level must have the same channel!"
in_channels = in_channels[0]

assert (
    len(set(anchor_generator.num_anchors)) == 1
), "Each level must have the same number of anchors per spatial position"

rpn_head = StandardRPNHead(
    in_channels = in_channels,
    num_anchors = anchor_generator.num_anchors[0],
    box_dim     = anchor_generator.box_dim,
    conv_dims   = config.MODEL.RPN.CONV_DIMS,
)
# rpn_head

Define the Region Proposal Generator (RPN)

In [7]:
proposal_generator = RPN(
    in_features             = config.MODEL.RPN.IN_FEATURES,
    head                    = rpn_head,
    anchor_generator        = anchor_generator,
    anchor_matcher          = Matcher(
                                config.MODEL.RPN.IOU_THRESHOLDS, config.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
                                ),
    box2box_transform       = Box2BoxTransform(weights=config.MODEL.RPN.BBOX_REG_WEIGHTS),
    batch_size_per_image    = config.MODEL.RPN.BATCH_SIZE_PER_IMAGE,
    positive_fraction       = config.MODEL.RPN.POSITIVE_FRACTION,
    pre_nms_topk            = (config.MODEL.RPN.POST_NMS_TOPK_TRAIN, config.MODEL.RPN.POST_NMS_TOPK_TEST),
    post_nms_topk           = (config.MODEL.RPN.POST_NMS_TOPK_TRAIN, config.MODEL.RPN.POST_NMS_TOPK_TEST),
    nms_thresh              = config.MODEL.RPN.NMS_THRESH,
    min_box_size            = config.MODEL.PROPOSAL_GENERATOR.MIN_SIZE,
    anchor_boundary_thresh  = config.MODEL.RPN.BOUNDARY_THRESH,
    loss_weight             = {
                                "loss_rpn_cls": config.MODEL.RPN.LOSS_WEIGHT,
                                "loss_rpn_loc": config.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * config.MODEL.RPN.LOSS_WEIGHT,
                                },
    box_reg_loss_type       = config.MODEL.RPN.BBOX_REG_LOSS_TYPE,
    smooth_l1_beta          = config.MODEL.RPN.SMOOTH_L1_BETA,
)
# proposal_generator

Define the Region of Interest Head

In [8]:
input_shape = backbone.output_shape()
input_shape = [input_shape[f] for f in config.MODEL.RPN.IN_FEATURES]
in_channels = [s.channels for s in input_shape]
assert len(set(in_channels)) == 1, "Each level must have the same channel!"
in_channels = in_channels[0]

box_head = FastRCNNConvFCHead(
    input_shape = ShapeSpec(
                    channels    = in_channels, 
                    height      = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION, 
                    width       = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
                    ),
    conv_dims   = [config.MODEL.ROI_BOX_HEAD.CONV_DIM] * config.MODEL.ROI_BOX_HEAD.NUM_CONV,
    fc_dims     = [config.MODEL.ROI_BOX_HEAD.FC_DIM] * config.MODEL.ROI_BOX_HEAD.NUM_FC,
    conv_norm   = config.MODEL.ROI_BOX_HEAD.NORM,
)

box_predictor = FastRCNNOutputLayers(
        input_shape             = box_head.output_shape,
        box2box_transform       = Box2BoxTransform(weights=config.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS),
        num_classes             = config.MODEL.ROI_HEADS.NUM_CLASSES,
        test_score_thresh       = config.MODEL.ROI_HEADS.SCORE_THRESH_TEST,
        test_nms_thresh         = config.MODEL.ROI_HEADS.NMS_THRESH_TEST,
        test_topk_per_image     = config.TEST.DETECTIONS_PER_IMAGE,
        cls_agnostic_bbox_reg   = config.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,
        smooth_l1_beta          = config.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA,
        box_reg_loss_type       = config.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE,
        loss_weight             = {"loss_box_reg": config.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT},
)
# box_predictor

In [9]:
input_shape = backbone.output_shape()

roi_pooler = ROIPooler(
    output_size     = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION,
    scales          = tuple(1.0 / input_shape[k].stride for k in config.MODEL.ROI_HEADS.IN_FEATURES),
    sampling_ratio  = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO,
    pooler_type     = config.MODEL.ROI_BOX_HEAD.POOLER_TYPE,
)

roi_heads = StandardROIHeads(
    box_in_features         = config.MODEL.ROI_HEADS.IN_FEATURES,
    box_pooler              = roi_pooler,
    box_head                = box_head,
    box_predictor           = box_predictor,
    num_classes             = config.MODEL.ROI_HEADS.NUM_CLASSES,
    batch_size_per_image    = config.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE,
    positive_fraction       = config.MODEL.ROI_HEADS.POSITIVE_FRACTION,
    proposal_matcher        = Matcher(
                                config.MODEL.ROI_HEADS.IOU_THRESHOLDS,
                                config.MODEL.ROI_HEADS.IOU_LABELS,
                                allow_low_quality_matches=False,
                                ),
    proposal_append_gt      = True,
    mask_in_features        = None, #optional
    mask_pooler             = None, #optional
    mask_head               = None, #optional
    keypoint_in_features    = None, #optional
    keypoint_pooler         = None, #optional
    keypoint_head           = None, #optional
    train_on_pred_boxes     = config.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
)
# roi_heads

Define the model

In [10]:
faster_rcnn_101 = RCNN(
    backbone            = backbone,
    proposal_generator  = proposal_generator,
    roi_heads           = roi_heads,
    pixel_mean          = config.MODEL.PIXEL_MEAN,
    pixel_std           = config.MODEL.PIXEL_STD,
    input_format        = None,
    vis_period          = 0,
)
# faster_rcnn_101 # model

# Training

In [11]:
os.makedirs(config.OUTPUT_DIR, exist_ok=True)

register_coco_instances("iSAID_train", {}, 
                        "/apps/local/shared/CV703/datasets/iSAID/iSAID_patches/train/instancesonly_filtered_train.json",
                        "/apps/local/shared/CV703/datasets/iSAID/iSAID_patches/train/images/")
register_coco_instances("iSAID_val", {}, 
                        "/apps/local/shared/CV703/datasets/iSAID/iSAID_patches/val/instancesonly_filtered_val.json",
                        "/apps/local/shared/CV703/datasets/iSAID/iSAID_patches/val/images/")


Build optimizer

In [12]:
params = get_default_optimizer_params(
    faster_rcnn_101, # model
    base_lr             = config.SOLVER.BASE_LR,
    weight_decay_norm   = config.SOLVER.WEIGHT_DECAY_NORM,
    bias_lr_factor      = config.SOLVER.BIAS_LR_FACTOR,
    weight_decay_bias   = config.SOLVER.WEIGHT_DECAY_BIAS,
) 
# ^ this can be expanded further

optimizer = torch.optim.SGD(
    params,
    lr              = config.SOLVER.BASE_LR,
    momentum        = config.SOLVER.MOMENTUM,
    nesterov        = config.SOLVER.NESTEROV,
    weight_decay    = config.SOLVER.WEIGHT_DECAY,
)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.02
    momentum: 0.9
    nesterov: False
    weight_decay: 0.0001
)

Train loader

In [13]:
is_train = True
dataset = get_detection_dataset_dicts(
    config.DATASETS.TRAIN,
    filter_empty    = config.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
    min_keypoints   = config.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
                    if config.MODEL.KEYPOINT_ON
                    else 0,
    proposal_files  = config.DATASETS.PROPOSAL_FILES_TRAIN if config.MODEL.LOAD_PROPOSALS else None,
)

sampler = TrainingSampler(len(dataset))

augmentations = [
    T.ResizeShortestEdge(
        config.INPUT.MIN_SIZE_TRAIN, 
        config.INPUT.MAX_SIZE_TRAIN, 
        config.INPUT.MIN_SIZE_TRAIN_SAMPLING
    ),
    T.RandomFlip(
        horizontal  = config.INPUT.RANDOM_FLIP == "horizontal",
        vertical    = config.INPUT.RANDOM_FLIP == "vertical",
    ),
]
if config.INPUT.CROP.ENABLED and is_train:
    augmentations.insert(0, T.RandomCrop(config.INPUT.CROP.TYPE, config.INPUT.CROP.SIZE))
    recompute_boxes = config.MODEL.MASK_ON
else:
    recompute_boxes = False

mapper = DatasetMapper(
    is_train                    = is_train,
    augmentations               = augmentations,
    image_format                = config.INPUT.FORMAT,
    use_instance_mask           = config.MODEL.MASK_ON,
    use_keypoint                = config.MODEL.KEYPOINT_ON,
    instance_mask_format        = config.INPUT.MASK_FORMAT,
    keypoint_hflip_indices      = None,
    precomputed_proposal_topk   = None,
    recompute_boxes             = recompute_boxes,
)

if isinstance(dataset, list):
    dataset = DatasetFromList(dataset, copy=False)
dataset = MapDataset(dataset, mapper)

data_loader = build_batch_data_loader(
    dataset,
    sampler,
    total_batch_size        = config.SOLVER.IMS_PER_BATCH,
    aspect_ratio_grouping   = config.DATALOADER.ASPECT_RATIO_GROUPING,
    num_workers             = config.DATALOADER.NUM_WORKERS,
    collate_fn              = None,
)
data_loader

<detectron2.data.common.AspectRatioGroupedDataset at 0x7eff4495b050>

In [14]:
faster_rcnn_101.train()

trainer = SimpleTrainer(
    faster_rcnn_101, # model
    data_loader,
    optimizer
)
trainer

<detectron2.engine.train_loop.SimpleTrainer at 0x7efeead5f950>

In [15]:
steps = [x for x in config.SOLVER.STEPS if x <= config.SOLVER.MAX_ITER]

sched = MultiStepParamScheduler(
    values      = [config.SOLVER.GAMMA ** k for k in range(len(steps) + 1)],
    milestones  = steps,
    num_updates = config.SOLVER.MAX_ITER,
)

sched = WarmupParamScheduler(
    sched,
    config.SOLVER.WARMUP_FACTOR,
    min(config.SOLVER.WARMUP_ITERS / config.SOLVER.MAX_ITER, 1.0),
    config.SOLVER.WARMUP_METHOD,
)
    
scheduler = LRMultiplier(
    optimizer, 
    multiplier  = sched, 
    max_iter    = config.SOLVER.MAX_ITER
)
scheduler

<detectron2.solver.lr_scheduler.LRMultiplier at 0x7efedf598b90>

In [16]:
resume = False

checkpointer = DetectionCheckpointer(
    faster_rcnn_101, # model
    config.OUTPUT_DIR,
    optimizer   = optimizer,
    trainer     = trainer
)
checkpointer.resume_or_load(config.MODEL.WEIGHTS, resume=resume)

Skip loading parameter 'roi_heads.box_head.fc1.weight' to the model due to incompatible shapes: (1024, 12544) in the checkpoint but (1024, 50176) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (16, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (60, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (60,) in the m

{'__author__': 'Detectron2 Model Zoo'}

In [19]:
curr_iter = 0
start_iter = 0
max_iter = config.SOLVER.MAX_ITER

for i in range(start_iter, 1):
    data = next(iter(data_loader))
    loss_dict = faster_rcnn_101(data)
    print(loss_dict)

#     if isinstance(loss_dict, torch.Tensor):
#         losses = loss_dict
#         loss_dict = {"total_loss": loss_dict}
#     else:
#         losses = sum(loss_dict.values())

#     optimizer.zero_grad()
#     losses.backward()

#     optimizer.step()
# curr_iter += 1

AssertionError: get_event_storage() has to be called inside a 'with EventStorage(...)' context!