# Дообучение модели faster_rcnn_R_50_FPN_1x из detectron2 (model zoo) на датасете LVISv1

Модель `faster_rcnn_R_50_FPN_1x` была обучена на COCO-датасете. Метрики качества при дообучении получились не очень высокими, из чего можено сделать вывод, что предобученная модель плохо обобщается на другой датасет

In [36]:
import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

torch:  1.9 ; cuda:  cu111


In [38]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [39]:
from detectron2.data.datasets import register_coco_instances, register_lvis_instances, get_lvis_instances_meta
from detectron2.data import DatasetCatalog

In [48]:
root = '/home/jupyter/mnt/s3/asciishell-fsod/LVIS/'
dataset_v = 1
register_lvis_instances(f"lvis_v1_dataset_train{dataset_v}", {}, root + "lvis_v1_train.json", root)
register_lvis_instances(f"lvis_v1_dataset_val{dataset_v}", {}, root + "lvis_v1_val.json", root)

In [41]:
torch.cuda.is_available()

True

In [44]:
from detectron2.engine import DefaultTrainer

config = "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml"
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(config))
cfg.DATASETS.TRAIN = (f"lvis_v1_dataset_train{dataset_v}",)
cfg.DATASETS.TEST = (f"lvis_v1_dataset_val{dataset_v}", )
cfg.DATALOADER.NUM_WORKERS = 16
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1204 
cfg.MODEL.DEVICE = 'cuda'
cfg.INPUT.CROP.ENABLED = False
cfg.SOLVER.BASE_LR = 1e-3
cfg.SOLVER.MAX_ITER = 180_000
cfg.SOLVER.IMS_PER_BATCH = 16
cfg.SOLVER.LR_SCHEDULER_NAME = 'WarmupMultiStepLR'
cfg.OUTPUT_DIR = './output_coco'

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=True)
trainer.train()

  max_mem: 18572M
[32m[02/28 23:46:15 d2.utils.events]: [0m eta: 5:59:49  iter: 141079  total_loss: 0.8407  loss_cls: 0.3473  loss_box_reg: 0.2283  loss_rpn_cls: 0.08669  loss_rpn_loc: 0.1713  time: 0.5593  data_time: 0.0261  lr: 1e-05  max_mem: 18572M
[32m[02/28 23:46:26 d2.utils.events]: [0m eta: 5:59:42  iter: 141099  total_loss: 0.791  loss_cls: 0.3251  loss_box_reg: 0.1922  loss_rpn_cls: 0.08871  loss_rpn_loc: 0.1631  time: 0.5593  data_time: 0.0287  lr: 1e-05  max_mem: 18572M
[32m[02/28 23:46:37 d2.utils.events]: [0m eta: 5:59:44  iter: 141119  total_loss: 0.785  loss_cls: 0.3338  loss_box_reg: 0.2202  loss_rpn_cls: 0.07194  loss_rpn_loc: 0.162  time: 0.5593  data_time: 0.0292  lr: 1e-05  max_mem: 18572M
[32m[02/28 23:46:48 d2.utils.events]: [0m eta: 5:59:47  iter: 141139  total_loss: 0.7779  loss_cls: 0.3345  loss_box_reg: 0.2035  loss_rpn_cls: 0.07946  loss_rpn_loc: 0.1419  time: 0.5593  data_time: 0.0269  lr: 1e-05  max_mem: 18572M
[32m[02/28 23:47:00 d2.utils.events]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


In [45]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") 
with open(cfg.OUTPUT_DIR + "/output.yaml", "w") as f:
    f.write(cfg.dump()) 
    
predictor = DefaultPredictor(cfg)

In [49]:
from detectron2.evaluation import LVISEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = LVISEvaluator(cfg.DATASETS.TEST[0], output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
print(inference_on_dataset(predictor.model, val_loader, evaluator))

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=300 catIds=all] = 0.105
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=300 catIds=all] = 0.171
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=300 catIds=all] = 0.110
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=     s | maxDets=300 catIds=all] = 0.076
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=     m | maxDets=300 catIds=all] = 0.142
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=     l | maxDets=300 catIds=all] = 0.192
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=300 catIds=  r] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=300 catIds=  c] = 0.032
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=300 catIds=  f] = 0.231
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=300 catIds=all] = 0.135
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=     s | maxDets=300 catIds=all] = 0.093
 Average R