configs/SIGMA/sigma_vgg16_cityscapace_to_foggy.yaml

OUTPUT_DIR: './experiments/sigma/city_to_foggy_vgg16/'
MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  WEIGHT: 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/pretrain/third_party/vgg16_caffe-292e1171.pth'
  # WEIGHT: './well_trained_models/city_to_foggy_vgg16_43.58_mAP.pth' # tested with our well-trained model
  # WEIGHT: './well_trained_models/city_to_foggy_vgg16_43.90_mAP.pth' # tested with our well-trained model

  RPN_ONLY: True
  FCOS_ON: True
  ATSS_ON: False # ATSS is integrated in this project
  
  DA_ON: True # weather to conduct domain adaptation
  MIDDLE_HEAD_CFG: 'GM_HEAD' # prepared for unifying our previous SCAN framework
  MIDDLE_HEAD:
    CONDGRAPH_ON: True
    IN_NORM: 'LN' # vison-to-graph transformation (support GN/BN/LN)
    NUM_CONVS_IN: 2 # the number of convs if IN_NORM = BN/GN
    GM:
      # matching cfg
      MATCHING_LOSS_CFG: 'MSE' # matching loss config (support L1)
      MATCHING_CFG: 'o2o' # one-to-one (o2o) or many-to-many (m2m) (m2m replaces sinkhorn with sigmoid) 
      WITH_QUADRATIC_MATCHING: True # Quadratic constraints in graph matching. This gives a little gain for the second stage training
    
      # node sampling
      NUM_NODES_PER_LVL_SR: 100  # per-level maximum nodes 
      NUM_NODES_PER_LVL_TG: 100
      BG_RATIO: 8 # the background node ratio
      WITH_SCORE_WEIGHT: False # use confidence as the adaptive weight in the node loss
      WITH_CTR: False  # sample target nodes with classification scores \times centerness scors

      # loss weight
      MATCHING_LOSS_WEIGHT: 0.1 # for adapting p(x|y) with matching
      NODE_LOSS_WEIGHT: 1.0 # node loss to train auxilary parameters

      # node discriminator for adapting p(x)
      WITH_NODE_DIS: True # without NA, SIGMA is easy to overfit to noisy target nodes. (mAP increases and then decreases)
      NODE_DIS_WEIGHT: 0.1 # you can try to set 0.2, which may get a better result
      NODE_DIS_LAMBDA:  0.02 # gradient scaling

      # set False to each of them will give a little performacne decrease
      WITH_SEMANTIC_COMPLETION: True # Conduct Gaussian sampling to complete the missing nodes
      WITH_CLUSTER_UPDATE: True # Use spectrual clustering to update the Graph-guided Memory Bank
      WITH_COMPLETE_GRAPH: True # Conduct one-layer graph-based message propogation (GCN in our paper)
      WITH_DOMAIN_INTERACTION: True # Cross Graph Interaction
     
  BACKBONE:
    CONV_BODY: "VGG-16-FPN-RETINANET"
  RETINANET:
    USE_C5: False # FCOS uses P5 instead of C5

# Same to our baseline model EPM
  FCOS:
    NUM_CONVS_REG: 4
    NUM_CONVS_CLS: 4
    NUM_CLASSES: 9
    INFERENCE_TH: 0.05    # pre_nms_thresh    (default=0.05)
    PRE_NMS_TOP_N: 1000   # pre_nms_top_n     (default=1000)
    NMS_TH: 0.6          # nms_thresh        (default=0.6)
    REG_CTR_ON: True

  ADV:
    GA_DIS_LAMBDA: 0.2 # gradient scaling
    CON_NUM_SHARED_CONV_P7: 4
    CON_NUM_SHARED_CONV_P6: 4
    CON_NUM_SHARED_CONV_P5: 4
    CON_NUM_SHARED_CONV_P4: 4
    CON_NUM_SHARED_CONV_P3: 4
    #
    USE_DIS_GLOBAL: True
    USE_DIS_P7: True
    USE_DIS_P6: True
    USE_DIS_P5: True
    USE_DIS_P4: True
    USE_DIS_P3: True

    GRL_WEIGHT_P7: 0.02 
    GRL_WEIGHT_P6: 0.02
    GRL_WEIGHT_P5: 0.02
    GRL_WEIGHT_P4: 0.02
    GRL_WEIGHT_P3: 0.02
TEST:
  DETECTIONS_PER_IMG: 100 # fpn_post_nms_top_n  (default=100)
  MODE: 'common' 
DATASETS:
  TRAIN_SOURCE: ("cityscapes_train_cocostyle", )
  TRAIN_TARGET: ("cityscapes_foggy_train_cocostyle", )
  TEST: ("cityscapes_foggy_val_cocostyle", )
INPUT:
  MIN_SIZE_RANGE_TRAIN: (640, 800)
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MAX_SIZE_TEST: 1333
DATALOADER:
  SIZE_DIVISIBILITY: 32
SOLVER:
  VAL_ITER: 100
  ADAPT_VAL_ON: True
  INITIAL_AP50: 35 # saving checkpoints if validation mAP > INITIAL_AP50
  WEIGHT_DECAY: 0.0001
  MAX_ITER: 100000
  # 2 for source and 2 for target
  IMS_PER_BATCH: 4 # you can use bs=2 for 2080Ti
  CHECKPOINT_PERIOD: 100000 
  BACKBONE:
    BASE_LR: 0.0025
    STEPS: (90000, )
    WARMUP_ITERS: 1000
    WARMUP_METHOD: "constant"
  MIDDLE_HEAD:
    # BASE_LR: 0.0025 is the default setting. 
    # We reproduced the end-to-end training results with BASE_LR: 0.005 (43+ mAP)
    BASE_LR: 0.005
    STEPS:  (90000, )
    WARMUP_ITERS: 1000
    WARMUP_METHOD: "constant"
    PLABEL_TH: (0.5, 1.0)  # if the cls score > 0.5, we sample this node
  FCOS:
    BASE_LR: 0.0025
    STEPS: (90000, )
    WARMUP_ITERS: 1000
    WARMUP_METHOD: "constant"
  DIS:
    BASE_LR: 0.0025
    STEPS: (90000, )
    WARMUP_ITERS: 1000
    WARMUP_METHOD: "constant"