In [None]:
!pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install "git+https://github.com/albumentations-team/albumentations.git"

In [None]:
import sys
sys.path.append("/content/drive/MyDrive/Kaggle/Sartorius")

In [None]:
import detectron2
from pathlib import Path
import random, cv2, os
import matplotlib.pyplot as plt
import numpy as np
import pycocotools.mask as mask_util
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
from detectron2.engine import BestCheckpointer
from detectron2.checkpoint import DetectionCheckpointer
from albumentations import *
import torch
import os
from detectron2.data import detection_utils
from utils.aug import MyMapper
from detectron2.solver.build import *
from detectron2.data import build_detection_test_loader, build_detection_train_loader
import warnings
warnings.filterwarnings("ignore")

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
class Config:
  pixel_mean = [128,128,128]
  pixel_std = [13.235,13.235,13.235]
  anchor_generators_sizes = [[8], [16], [32], [64],[128]]
  anchor_generators_aspect_ratios = [[0.5, 1.0, 2.0]]

In [None]:
# Taken from https://www.kaggle.com/theoviel/competition-metric-map-iou
def precision_at(threshold, iou):
    matches = iou > threshold
    true_positives = np.sum(matches, axis=1) == 1  # Correct objects
    false_positives = np.sum(matches, axis=1) == 0  # Extra objects
    false_negatives = np.sum(matches, axis=0) == 0  # Missed objects
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    pred_masks = pred['instances'].pred_masks.cpu().numpy()
    enc_preds = [mask_util.encode(np.asarray(p, order='F')) for p in pred_masks]
    enc_targs = list(map(lambda x:x['segmentation'], targ))
    ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, ious)
        p = tp / (tp + fp + fn)
        prec.append(p)
    return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"MaP IoU": np.mean(self.scores)}

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)

    # @classmethod
    # def build_train_loader(cls, cfg, sampler=None):
    #     return build_detection_train_loader(
    #         cfg, mapper=MyMapper(cfg), sampler=sampler
    #     )

    def build_hooks(self):
      # copy of cfg
      cfg = self.cfg.clone()

      # build the original model hooks
      hooks = super().build_hooks()

      # add the best checkpointer hook
      hooks.insert(-1, BestCheckpointer(cfg.TEST.EVAL_PERIOD, 
                                        DetectionCheckpointer(self.model, cfg.OUTPUT_DIR),
                                        "MaP IoU",
                                        "max",
                                        ))
      return hooks

    @classmethod
    def build_optimizer(cls,cfg,model) -> torch.optim.Optimizer:
      """
      Build an optimizer from config.
      """
      params = get_default_optimizer_params(
          model,
          base_lr=cfg.SOLVER.BASE_LR,
          weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
          bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
          weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
      )
      return maybe_add_gradient_clipping(cfg, torch.optim.AdamW)(
          params,
          lr=cfg.SOLVER.BASE_LR,
          #momentum=cfg.SOLVER.MOMENTUM,
          #nesterov=cfg.SOLVER.NESTEROV,
          weight_decay=cfg.SOLVER.WEIGHT_DECAY,
      )

    @classmethod
    def build_lr_scheduler(cls,cfg, optimizer) -> torch.optim.lr_scheduler._LRScheduler:
      """
      Build a LR scheduler from config.
      """
      name = cfg.SOLVER.LR_SCHEDULER_NAME

      if name == "WarmupMultiStepLR":
          steps = [x for x in cfg.SOLVER.STEPS if x <= cfg.SOLVER.MAX_ITER]
          if len(steps) != len(cfg.SOLVER.STEPS):
              logger = logging.getLogger(__name__)
              logger.warning(
                  "SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. "
                  "These values will be ignored."
              )
          sched = MultiStepParamScheduler(
              values=[cfg.SOLVER.GAMMA ** k for k in range(len(steps) + 1)],
              milestones=steps,
              num_updates=cfg.SOLVER.MAX_ITER,
          )
      elif name == "WarmupCosineLR":
          sched = CosineParamScheduler(1, 1/40)
      elif name == "CyclicLR":
          sched = torch.optim.lr_scheduler.CyclicLR(optimizer,
                                                    base_lr=cfg.SOLVER.BASE_LR, max_lr=5*cfg.SOLVER.BASE_LR,
                                                    base_momentum=0.85, max_momentum=0.95)

      else:
          raise ValueError("Unknown LR scheduler: {}".format(name))

      sched = WarmupParamScheduler(
          sched,
          cfg.SOLVER.WARMUP_FACTOR,
          min(cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_ITER, 1.0),
          cfg.SOLVER.WARMUP_METHOD,
      )
      return LRMultiplier(optimizer, multiplier=sched, max_iter=cfg.SOLVER.MAX_ITER)

In [None]:
def run():
  dataDir = "/content/drive/MyDrive/Kaggle/Sartorius/input/sartorius-cell-instance-segmentation"
  DatasetCatalog.clear()
  MetadataCatalog.clear()
  register_coco_instances(f'sartorius_train',{}, '/content/drive/MyDrive/Kaggle/Sartorius/input/fixed_fold/coco_cell_train_fold{}.json'.format(fold), dataDir)
  register_coco_instances(f'sartorius_val',{},'/content/drive/MyDrive/Kaggle/Sartorius/input/fixed_fold/coco_cell_valid_fold{}.json'.format(fold), dataDir)

  cfg = get_cfg()
  cfg.merge_from_file(model_zoo.get_config_file("Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"))
  cfg.DATASETS.TRAIN = (f"sartorius_train",)
  cfg.DATASETS.TEST = (f"sartorius_val",)
  # cfg.MODEL.PIXEL_MEAN = Config.pixel_mean
  # cfg.MODEL.PIXEL_STD = Config.pixel_std

  cfg.DATALOADER.NUM_WORKERS = 2
  cfg.MODEL.WEIGHTS = "/content/drive/MyDrive/Kaggle/Sartorius/model/pretrained/cascade_v0/model_final.pth"#model_zoo.get_checkpoint_url("Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml")#"/content/drive/MyDrive/Kaggle/Sartorius/model/pretrained/LIVECell_anchor_based_model.pth"
  cfg.SOLVER.IMS_PER_BATCH = 2
  cfg.SOLVER.BASE_LR = 5e-6
  #cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR"
  cfg.SOLVER.MAX_ITER = len(DatasetCatalog.get(f"sartorius_train")) // cfg.SOLVER.IMS_PER_BATCH * 20 #尝试20ep 用lr调整
  #cfg.SOLVER.STEPS = (cfg.SOLVER.MAX_ITER//3,cfg.SOLVER.MAX_ITER*2//3)
  cfg.INPUT.MIN_SIZE_TRAIN = (620,660,700,720,760,800)
  cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
  cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 320
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
  #cfg.TEST.EVAL_PERIOD = 
  cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get(f"sartorius_train")) // cfg.SOLVER.IMS_PER_BATCH  # Once per epoch

  cfg.INPUT.MASK_FORMAT='bitmask'
  cfg.TEST.DETECTIONS_PER_IMAGE = 1000
  #cfg.MODEL.BACKBONE.FREEZE_AT = 2
  cfg.MODEL.RESNETS.DEPTH = 101
  cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = Config.anchor_generators_aspect_ratios
  cfg.MODEL.ANCHOR_GENERATOR.SIZES = Config.anchor_generators_sizes
  cfg.MODEL.RESNETS.OUT_FEATURES = ["res2", "res3", "res4", "res5"]
  cfg.MODEL.FPN.IN_FEATURES = ["res2", "res3", "res4", "res5"]
  cfg.MODEL.FPN.NORM = "GN"
  cfg.MODEL.ROI_HEADS.IN_FEATURES = ["p2", "p3", "p4", "p5","p6"]
  cfg.MODEL.RPN.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6"]

  cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, True, True, True] # on Res3,Res4,Res5
  cfg.MODEL.RESNETS.DEFORM_MODULATED = True
  cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS = 2
  cfg.MODEL.RESNETS.NORM = "GN"
  cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
  cfg.MODEL.ROI_BOX_HEAD.NUM_CONV = 4
  cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 1
  cfg.MODEL.ROI_BOX_HEAD.NORM = "GN"  
  cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 7
  cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = True  
  cfg.MODEL.ROI_MASK_HEAD.NUM_CONV = 8
  cfg.MODEL.ROI_MASK_HEAD.NORM = "GN"
  cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
  cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)

  cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
  cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 3000
  
    
  # cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE = "ciou"
  # cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "ciou"
  # cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 10.0
  #cfg.SOLVER.AMP.ENABLED = True

  cfg.OUTPUT_DIR = "/content/drive/MyDrive/Kaggle/Sartorius/model/finetuned/cascade_v0/fold_{}#".format(fold)

  os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
  trainer = Trainer(cfg) 
  trainer.resume_or_load(resume=False)
  trainer.train()

In [None]:
# 2 的理论score最高
for fold in range(2,6):
  run()

#run_all()

[32m[12/12 19:53:54 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from /content/drive/MyDrive/Kaggle/Sartorius/input/fixed_fold/coco_cell_train_fold3.json
[32m[12/12 19:53:55 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from /content/drive/MyDrive/Kaggle/Sartorius/input/fixed_fold/coco_cell_train_fold3.json
[32m[12/12 19:53:57 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(
      256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
      (norm): GroupNorm(32, 256, eps=1e-05, affine=True)
    )
    (fpn_output2): Conv2d(
      256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (norm): GroupNorm(32, 256, eps=1e-05, affine=True)
    )
    (fpn_lateral3): Conv2d(
      512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
      (norm): GroupNorm(32, 256, eps=1e-05, affine=True)
    )
    (fpn_output3): Conv2d(
      256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), b

SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.


[32m[12/12 19:53:59 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[12/12 19:54:34 d2.utils.events]: [0m eta: 2:00:11  iter: 19  total_loss: 3.66  loss_cls_stage0: 0.2652  loss_box_reg_stage0: 0.5345  loss_cls_stage1: 0.2682  loss_box_reg_stage1: 0.9402  loss_cls_stage2: 0.2601  loss_box_reg_stage2: 0.8483  loss_mask: 0.3013  loss_rpn_cls: 0.02951  loss_rpn_loc: 0.1199  time: 1.6202  data_time: 0.2980  lr: 9.9905e-08  max_mem: 10832M
[32m[12/12 19:55:05 d2.utils.events]: [0m eta: 1:59:52  iter: 39  total_loss: 3.747  loss_cls_stage0: 0.2717  loss_box_reg_stage0: 0.5438  loss_cls_stage1: 0.2795  loss_box_reg_stage1: 0.8946  loss_cls_stage2: 0.2826  loss_box_reg_stage2: 0.8187  loss_mask: 0.2844  loss_rpn_cls: 0.0483  loss_rpn_loc: 0.1338  time: 1.5924  data_time: 0.1201  lr: 1.9981e-07  max_mem: 10832M
[32m[12/12 19:55:37 d2.utils.events]: [0m eta: 1:59:22  iter: 59  total_loss: 3.698  loss_cls_stage0: 0.2699  loss_box_reg_stage0: 0.543  loss_cls_stage1: 0.2645

SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.


[32m[12/12 22:17:06 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[12/12 22:17:39 d2.utils.events]: [0m eta: 1:56:04  iter: 19  total_loss: 3.882  loss_cls_stage0: 0.2629  loss_box_reg_stage0: 0.5603  loss_cls_stage1: 0.2717  loss_box_reg_stage1: 0.9421  loss_cls_stage2: 0.3054  loss_box_reg_stage2: 0.867  loss_mask: 0.2985  loss_rpn_cls: 0.04042  loss_rpn_loc: 0.1261  time: 1.6398  data_time: 0.1971  lr: 9.9905e-08  max_mem: 11636M
[32m[12/12 22:18:10 d2.utils.events]: [0m eta: 1:57:35  iter: 39  total_loss: 3.71  loss_cls_stage0: 0.2812  loss_box_reg_stage0: 0.5523  loss_cls_stage1: 0.279  loss_box_reg_stage1: 0.933  loss_cls_stage2: 0.2748  loss_box_reg_stage2: 0.9265  loss_mask: 0.2897  loss_rpn_cls: 0.04032  loss_rpn_loc: 0.132  time: 1.5907  data_time: 0.1047  lr: 1.9981e-07  max_mem: 11636M
[32m[12/12 22:18:41 d2.utils.events]: [0m eta: 1:57:06  iter: 59  total_loss: 3.644  loss_cls_stage0: 0.2419  loss_box_reg_stage0: 0.5505  loss_cls_stage1: 0.2461  

SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.


[32m[12/13 00:39:51 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[12/13 00:40:22 d2.utils.events]: [0m eta: 1:52:36  iter: 19  total_loss: 3.665  loss_cls_stage0: 0.2698  loss_box_reg_stage0: 0.5779  loss_cls_stage1: 0.2662  loss_box_reg_stage1: 0.9526  loss_cls_stage2: 0.2884  loss_box_reg_stage2: 0.8618  loss_mask: 0.3166  loss_rpn_cls: 0.03065  loss_rpn_loc: 0.1294  time: 1.5416  data_time: 0.1495  lr: 9.9905e-08  max_mem: 11636M
[32m[12/13 00:40:53 d2.utils.events]: [0m eta: 1:54:14  iter: 39  total_loss: 3.669  loss_cls_stage0: 0.2534  loss_box_reg_stage0: 0.5412  loss_cls_stage1: 0.2637  loss_box_reg_stage1: 0.9379  loss_cls_stage2: 0.2632  loss_box_reg_stage2: 0.9011  loss_mask: 0.2943  loss_rpn_cls: 0.03403  loss_rpn_loc: 0.1088  time: 1.5507  data_time: 0.1595  lr: 1.9981e-07  max_mem: 11636M
[32m[12/13 00:41:25 d2.utils.events]: [0m eta: 1:53:46  iter: 59  total_loss: 3.67  loss_cls_stage0: 0.2491  loss_box_reg_stage0: 0.5266  loss_cls_stage1: 0.26