/
mask2former_resnet50_os16_coco_1024x1024_bs4_370k.yml
144 lines (136 loc) · 3.02 KB
/
mask2former_resnet50_os16_coco_1024x1024_bs4_370k.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
num_classes: &num_classes 133
ignore_index: &ignore_index 255
label_divisor: &label_divisor 1000
mean: &mean [0.485, 0.456, 0.406]
std: &std [0.229, 0.224, 0.225]
iters: &iters 368750
batch_size: &batch_size 4
train_dataset:
type: COCO
dataset_root: data/coco
transforms:
- type: ConvertRGBToID
- type: RandomHorizontalFlip
- type: ResizeRangeScaling
min_value: 100
max_value: 2048
- type: RandomPaddingCrop
crop_size: &crop_size [1024, 1024]
im_padding_value: 0
- type: GenerateMaskFormerTrainTargets
- type: Normalize
mean: *mean
std: *std
- type: Collect
keys:
- img
- label
- img_path
- lab_path
- img_h
- img_w
- gt_ids
- gt_masks
mode: train
file_list: data/coco/train_list.txt
json_path: data/coco/annotations/panoptic_train2017.json
label_divisor: *label_divisor
num_classes: *num_classes
no_collation_keys:
- gt_ids
- gt_masks
ignore_index: *ignore_index
val_dataset:
type: COCO
dataset_root: data/coco
transforms:
- type: ConvertRGBToID
- type: ResizeByShort
short_size: 800
- type: LimitLong
max_long: 1333
- type: PadToDivisible
size_divisor: 32
- type: DecodeLabels
label_divisor: *label_divisor
ignore_index: *ignore_index
- type: Normalize
mean: *mean
std: *std
- type: Collect
keys:
- img
- label
- ann
- image_id
- gt_fields
- trans_info
- img_path
- lab_path
- pan_label
- sem_label
- ins_label
mode: val
file_list: data/coco/val_list.txt
json_path: data/coco/annotations/panoptic_val2017.json
label_divisor: *label_divisor
num_classes: *num_classes
model:
type: Mask2Former
backbone:
type: ResNet50_vd
output_stride: 16
pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz
num_classes: *num_classes
backbone_indices: [1, 2, 3]
backbone_feat_os: [4, 8, 16, 32]
num_queries: 100
pd_num_heads: 8
pd_conv_dim: 256
pd_mask_dim: 256
pd_ff_dim: 1024
pd_num_layers: 6
pd_common_stride: 4
td_hidden_dim: 256
td_num_heads: 8
td_ff_dim: 2048
td_num_layers: 9
td_pre_norm: False
td_mask_dim: 256
td_enforce_proj: False
pretrained: null
optimizer:
type: AdamW
weight_decay: 0.05
custom_cfg:
- name: backbone
lr_mult: 0.1
grad_clip_cfg:
name: ClipGradByNorm
clip_norm: 0.01
lr_scheduler:
type: MultiStepDecay
learning_rate: 0.0001
milestones: [327778, 355092]
gamma: 0.1
loss:
types:
- type: NoncustomizableLoss
coef: [1]
postprocessor:
type: MaskFormerPostprocessor
num_classes: *num_classes
object_mask_threshold: 0.8
overlap_threshold: 0.8
label_divisor: *label_divisor
ignore_index: *ignore_index
runner:
type: MaskFormerRunner
num_classes: *num_classes
weight_ce: 2.0
weight_mask: 5.0
weight_dice: 5.0
eos_coef: 0.1
num_points: 12544
oversample_ratio: 3.0
importance_sample_ratio: 0.75