Jittor · 514flowey · Dec 3, 2022 · Nov 28, 2022 · Nov 28, 2022 · Nov 28, 2022
diff --git a/configs/ld/ld_rotated_retinanet_obb_r18_r50_fpn_1x_dota.py b/configs/ld/ld_rotated_retinanet_obb_r18_r50_fpn_1x_dota.py
@@ -0,0 +1,166 @@
+# model settings
+teacher_ckpt = 'https://cloud.tsinghua.edu.cn/f/b737fe43de8c47a6810e/?dl=1'  # noqa
+model = dict(
+    type='KnowledgeDistillationSingleStageDetector',
+    teacher_config='configs/ld/rotated_retinanet_obb_distribution_r50_fpn_1x_dota.py',
+    teacher_ckpt=teacher_ckpt,
+    backbone=dict(
+        type='Resnet18',
+        frozen_stages=1,
+        return_stages=["layer1","layer2","layer3","layer4"],
+        pretrained= True),
+    neck=dict(
+        type='FPN',
+        in_channels=[64, 128, 256, 512],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs="on_input",
+        num_outs=5),
+    bbox_head=dict(
+        type='RotatedRetinaLocalizationDistillationHead',
+        num_classes=16,
+        in_channels=256,
+        feat_channels=256,
+        stacked_convs=4,
+        octave_base_scale=4,
+        scales_per_octave=3,
+        anchor_ratios=[1.0, 0.5, 2.0],
+        anchor_strides=[8, 16, 32, 64, 128],
+        target_means=[.0, .0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0, 1.0],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='L1Loss', loss_weight=1.0),
+        loss_ld=dict(
+            type='KnowledgeDistillationKLDivLoss', loss_weight=10, T=10),
+        loss_kd=dict(
+            type='KnowledgeDistillationKLDivLoss', loss_weight=10, T=2),
+        loss_im=dict(type='IMLoss', loss_weight=0),
+        imitation_method='finegrained',
+        reg_max=8,
+        test_cfg=dict(
+            nms_pre=2000,
+            min_bbox_size=0,
+            score_thr=0.05,
+            nms=dict(type='nms_rotated', iou_thr=0.1),
+            max_per_img=2000),
+        train_cfg=dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.4,
+                    min_pos_iou=0,
+                    ignore_iof_thr=-1,
+                    iou_calculator=dict(type='BboxOverlaps2D_rotated')),
+                bbox_coder=dict(type='DeltaXYWHABBoxCoder',
+                                target_means=(0., 0., 0., 0., 0.),
+                                target_stds=(1., 1., 1., 1., 1.),
+                                clip_border=True),
+                allowed_border=-1,
+                pos_weight=-1,
+                debug=False)
+        )
+    )
+dataset = dict(
+    train=dict(
+        type="DOTADataset",
+        dataset_dir='/home/ubuntu/ZZH/datasets/processed_DOTA/train_600_150_1.0',
+        transforms=[
+            dict(
+                type="RotatedResize",
+                min_size=1024,
+                max_size=1024
+            ),
+            dict(type='RotatedRandomFlip', prob=0.5, direction="horizontal"),
+            dict(type='RotatedRandomFlip', prob=0.5, direction="vertical"),
+            dict(
+                type = "Pad",
+                size_divisor=32),
+            dict(
+                type = "Normalize",
+                mean =  [123.675, 116.28, 103.53],
+                std = [58.395, 57.12, 57.375],
+                to_bgr=False,)
+
+        ],
+        batch_size=2,
+        num_workers=4,
+        shuffle=True,
+        filter_empty_gt=False
+    ),
+    val=dict(
+        type="DOTADataset",
+        dataset_dir='/home/ubuntu/ZZH/datasets/processed_DOTA/val_600_150_1.0',
+        transforms=[
+            dict(
+                type="RotatedResize",
+                min_size=1024,
+                max_size=1024
+            ),
+            dict(
+                type = "Pad",
+                size_divisor=32),
+            dict(
+                type = "Normalize",
+                mean =  [123.675, 116.28, 103.53],
+                std = [58.395, 57.12, 57.375],
+                to_bgr=False),
+        ],
+        batch_size=2,
+        num_workers=4,
+        shuffle=False
+    ),
+    test=dict(
+        type="ImageDataset",
+        images_dir='/home/ubuntu/ZZH/datasets/processed_DOTA/val_600_150_1.0/images',
+        transforms=[
+            dict(
+                type="RotatedResize",
+                min_size=1024,
+                max_size=1024
+            ),
+            dict(
+                type = "Pad",
+                size_divisor=32),
+            dict(
+                type = "Normalize",
+                mean =  [123.675, 116.28, 103.53],
+                std = [58.395, 57.12, 57.375],
+                to_bgr=False,),
+        ],
+        num_workers=4,
+        batch_size=1,
+    )
+)
+
+optimizer = dict(
+    type='SGD', 
+    lr=0.01/4., #0.0,#0.01*(1/8.), 
+    momentum=0.9, 
+    weight_decay=0.0001,
+    grad_clip=dict(
+        max_norm=35, 
+        norm_type=2))
+
+scheduler = dict(
+    type='StepLR',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    milestones=[7, 10])
+
+
+logger = dict(
+    type="RunLogger")
+
+max_epoch = 12
+eval_interval = 1
+checkpoint_interval = 1
+log_interval = 50
+
+#resume_path='/home/ubuntu/ZZH/JDet/rotated_retinanet_obb_distribution_r18_fpn_1x_dota.pkl'
diff --git a/configs/ld/rotated_retinanet_obb_distribution_r18_fpn_1x_dota.py b/configs/ld/rotated_retinanet_obb_distribution_r18_fpn_1x_dota.py
@@ -0,0 +1,156 @@
+# model settings
+model = dict(
+    type='RotatedRetinaNet',
+    backbone=dict(
+        type='Resnet18',
+        frozen_stages=1,
+        return_stages=["layer1","layer2","layer3","layer4"],
+        pretrained= True),
+    neck=dict(
+        type='FPN',
+        in_channels=[64, 128, 256, 512],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs="on_input",
+        num_outs=5),
+    bbox_head=dict(
+        type='RotatedRetinaDistributionHead',
+        num_classes=16,
+        in_channels=256,
+        feat_channels=256,
+        stacked_convs=4,
+        octave_base_scale=4,
+        scales_per_octave=3,
+        anchor_ratios=[1.0, 0.5, 2.0],
+        anchor_strides=[8, 16, 32, 64, 128],
+        target_means=[.0, .0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0, 1.0],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='L1Loss', loss_weight=1.0),
+        test_cfg=dict(
+            nms_pre=2000,
+            min_bbox_size=0,
+            score_thr=0.05,
+            nms=dict(type='nms_rotated', iou_thr=0.1),
+            max_per_img=2000),
+        train_cfg=dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.4,
+                    min_pos_iou=0,
+                    ignore_iof_thr=-1,
+                    iou_calculator=dict(type='BboxOverlaps2D_rotated')),
+                bbox_coder=dict(type='DeltaXYWHABBoxCoder',
+                                target_means=(0., 0., 0., 0., 0.),
+                                target_stds=(1., 1., 1., 1., 1.),
+                                clip_border=True),
+                allowed_border=-1,
+                pos_weight=-1,
+                debug=False)
+        )
+    )
+dataset = dict(
+    train=dict(
+        type="DOTADataset",
+        dataset_dir='/home/ubuntu/ZZH/datasets/processed_DOTA/train_600_150_1.0',
+        transforms=[
+            dict(
+                type="RotatedResize",
+                min_size=1024,
+                max_size=1024
+            ),
+            dict(type='RotatedRandomFlip', prob=0.5, direction="horizontal"),
+            dict(type='RotatedRandomFlip', prob=0.5, direction="vertical"),
+            dict(
+                type = "Pad",
+                size_divisor=32),
+            dict(
+                type = "Normalize",
+                mean =  [123.675, 116.28, 103.53],
+                std = [58.395, 57.12, 57.375],
+                to_bgr=False,)
+
+        ],
+        batch_size=2,
+        num_workers=4,
+        shuffle=True,
+        filter_empty_gt=False
+    ),
+    val=dict(
+        type="DOTADataset",
+        dataset_dir='/home/ubuntu/ZZH/datasets/processed_DOTA/val_600_150_1.0',
+        transforms=[
+            dict(
+                type="RotatedResize",
+                min_size=1024,
+                max_size=1024
+            ),
+            dict(
+                type = "Pad",
+                size_divisor=32),
+            dict(
+                type = "Normalize",
+                mean =  [123.675, 116.28, 103.53],
+                std = [58.395, 57.12, 57.375],
+                to_bgr=False),
+        ],
+        batch_size=2,
+        num_workers=4,
+        shuffle=False
+    ),
+    test=dict(
+        type="ImageDataset",
+        images_dir='/home/ubuntu/ZZH/datasets/processed_DOTA/val_600_150_1.0/images',
+        transforms=[
+            dict(
+                type="RotatedResize",
+                min_size=1024,
+                max_size=1024
+            ),
+            dict(
+                type = "Pad",
+                size_divisor=32),
+            dict(
+                type = "Normalize",
+                mean =  [123.675, 116.28, 103.53],
+                std = [58.395, 57.12, 57.375],
+                to_bgr=False,),
+        ],
+        num_workers=4,
+        batch_size=1,
+    )
+)
+
+optimizer = dict(
+    type='SGD', 
+    lr=0.01/4., #0.0,#0.01*(1/8.), 
+    momentum=0.9, 
+    weight_decay=0.0001,
+    grad_clip=dict(
+        max_norm=35, 
+        norm_type=2))
+
+scheduler = dict(
+    type='StepLR',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    milestones=[7, 10])
+
+
+logger = dict(
+    type="RunLogger")
+
+max_epoch = 12
+eval_interval = 1
+checkpoint_interval = 1
+log_interval = 50
+
+#resume_path='/home/ubuntu/ZZH/JDet/rotated_retinanet_obb_distribution_r50_1x.pkl'