# Дообучение модели mask_rcnn_R_50_FPN_1x из detectron2 (model zoo) на датасете LVISv1

In [1]:
import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

torch:  1.9 ; cuda:  cu111


In [3]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg, 
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [4]:
from detectron2.data.datasets import register_coco_instances, register_lvis_instances, get_lvis_instances_meta
from detectron2.data import DatasetCatalog

In [5]:
root = '/home/jupyter/mnt/s3/asciishell-fsod/LVIS/'
dataset_v = 1
register_lvis_instances(f"lvis_v1_dataset_train{dataset_v}", {}, root + "lvis_v1_train.json", root)
register_lvis_instances(f"lvis_v1_dataset_val{dataset_v}", {}, root + "lvis_v1_val.json", root)

In [6]:
torch.cuda.is_available()

True

In [7]:
from detectron2.engine import DefaultTrainer

config = "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(config))
cfg.DATASETS.TRAIN = (f"lvis_v1_dataset_train{dataset_v}",)
cfg.DATASETS.TEST = (f"lvis_v1_dataset_val{dataset_v}", )
cfg.DATALOADER.NUM_WORKERS = 16
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1204 
cfg.MODEL.DEVICE = 'cuda'
cfg.INPUT.CROP.ENABLED = False
cfg.SOLVER.WEIGHT_DECAY = 4.0e-05
cfg.SOLVER.BASE_LR = 0.32 / 8
cfg.SOLVER.MAX_ITER = 180_000
cfg.SOLVER.LR_SCHEDULER_NAME = 'WarmupMultiStepLR'
cfg.SOLVER.WARMUP_FACTOR =  0.01
cfg.SOLVER.STEPS = [162_000, 171_000, 175_500]
cfg.SOLVER.GAMMA = 0.1
cfg.OUTPUT_DIR = './output_lvis'

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

: 0.04  max_mem: 34900M
[32m[02/15 04:09:51 d2.utils.events]: [0m eta: 7:50:27  iter: 143739  total_loss: 1.16  loss_cls: 0.3684  loss_box_reg: 0.2116  loss_mask: 0.2557  loss_rpn_cls: 0.1146  loss_rpn_loc: 0.2132  time: 0.7891  data_time: 0.0341  lr: 0.04  max_mem: 34900M
[32m[02/15 04:10:06 d2.utils.events]: [0m eta: 7:50:04  iter: 143759  total_loss: 1.093  loss_cls: 0.3639  loss_box_reg: 0.221  loss_mask: 0.2484  loss_rpn_cls: 0.09212  loss_rpn_loc: 0.1651  time: 0.7891  data_time: 0.0344  lr: 0.04  max_mem: 34900M
[32m[02/15 04:10:22 d2.utils.events]: [0m eta: 7:49:48  iter: 143779  total_loss: 1.135  loss_cls: 0.389  loss_box_reg: 0.2264  loss_mask: 0.2412  loss_rpn_cls: 0.1068  loss_rpn_loc: 0.1925  time: 0.7891  data_time: 0.0352  lr: 0.04  max_mem: 34900M
[32m[02/15 04:10:39 d2.utils.events]: [0m eta: 7:49:27  iter: 143799  total_loss: 1.176  loss_cls: 0.3896  loss_box_reg: 0.2254  loss_mask: 0.2456  loss_rpn_cls: 0.1018  loss_rpn_loc: 0.2036  time: 0.7892  data_time: 

[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls_score.{bias, weight}[0m
[34mroi_heads.mask_head.predictor.{bias, weight}[0m
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


In [17]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") 
with open(cfg.OUTPUT_DIR + "/output.yaml", "w") as f:
    f.write(cfg.dump()) 

predictor = DefaultPredictor(cfg)

In [19]:
from detectron2.evaluation import LVISEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = LVISEvaluator(cfg.DATASETS.TEST[0], output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
print(inference_on_dataset(predictor.model, val_loader, evaluator))

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[32m[02/15 16:26:41 d2.data.datasets.lvis]: [0mLoading /home/jupyter/mnt/s3/asciishell-fsod/LVIS/lvis_v1_val.json takes 5.86 seconds.
[32m[02/15 16:26:41 d2.data.datasets.lvis]: [0mLoaded 19809 images in the LVIS format from /home/jupyter/mnt/s3/asciishell-fsod/LVIS/lvis_v1_val.json
[32m[02/15 16:26:44 d2.data.build]: [0mDistribution of instances among all 1203 categories:
[36m|   category    | #instances   |   category    | #instances   |   category    | #instances   |
|:-------------:|:-------------|:-------------:|:-------------|:-------------:|:-------------|
|  aerosol_can  | 11           | air_conditi.. | 146          |   airplane    | 619          |
|  alarm_clock  | 60           |    alcohol    | 149          |   alligator   | 1            |
|    almond     | 302          |   ambulance   | 6            |   amplifier   | 3            |
|    anklet     | 8            |    antenna    | 202          |     apple     | 3116         |
|  applesauce   | 3            |    apricot