In [1]:
import os
# config
class SetupCFG:
  def __init__(self, dataset_name, path_to_dataset, json_name, path_to_weights, path_to_mask, \
               work_dir, video_file_name, video_file_name_to_save, temporary_storage):
    self.dataset_name = dataset_name
    self.path_to_dataset = path_to_dataset
    self.path_to_json = os.path.join(path_to_dataset, json_name)    
    self.path_to_weights = path_to_weights
    self.path_to_mask = path_to_mask
    self.work_dir = work_dir
    self.video_file_name = video_file_name
    self.video_file_name_to_save = video_file_name_to_save if temporary_storage else os.path.join(work_dir, video_file_name_to_save)
    self.temporary_storage = temporary_storage

my_cfg = SetupCFG(dataset_name = "my_dataset", 
                  path_to_dataset = "raw/KomPol2_clean", 
                  json_name = "KomPol2-7.json", 
                  work_dir = "drive/My Drive/Colab Notebooks/output_Pol",                  
                  path_to_weights = "drive/My Drive/Colab Notebooks/output_Pol/model_final_poliklinika_total_loss_0.9683.pth",
                  #"drive/My Drive/Colab Notebooks/output_Pol/model_final_poliklinika.pth",
                  path_to_mask = "images/mask.png",
                  video_file_name = "raw/videos/cut_final_10sek_v2.mp4",
                  video_file_name_to_save = "raw/videos/cut_final_10sek_v2_final_with_crossing.mp4", 
                  temporary_storage = True)

In [2]:
%%capture
import torch, torchvision
import detectron2

import numpy as np
import cv2
import random
from IPython.display import Image 
# from google.colab.patches import cv2_imshow

# для регистрации данных
from detectron2.data.datasets import register_coco_instances

# для обучения
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

# для предиктора
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

# для предсказания фото
from detectron2.utils.visualizer import ColorMode

# для предсказания видео
from IPython.display import YouTubeVideo, display
import tqdm
from detectron2.utils.video_visualizer import VideoVisualizer

import copy

In [3]:
register_coco_instances(my_cfg.dataset_name, {}, my_cfg.path_to_json, my_cfg.path_to_dataset)
MetadataCatalog.get(my_cfg.dataset_name).thing_classes = ['car', 'minibus', 'trolleybus', 'tram', 'truck', 'bus', 'middle_bus', 'ambulance', 'fire_truck', 'middle_truck', 'tractor', 'uncategorized', 'van', 'person']
dataset_metadata = MetadataCatalog.get(my_cfg.dataset_name)

In [4]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) #LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
cfg.DATASETS.TRAIN = (my_cfg.dataset_name,)
cfg.DATASETS.TEST = (my_cfg.dataset_name,)
cfg.DATALOADER.NUM_WORKERS = 4 #8
# cfg.MODEL.WEIGHTS = my_cfg.path_to_weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo 
cfg.SOLVER.IMS_PER_BATCH = 2 #5
cfg.SOLVER.BASE_LR = 0.025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300   # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 #512  # faster, and good enough for this toy dataset (default: 512) 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13 #10 
# cfg.MODEL.RETINANET.PRIOR_PROB = 1


os.makedirs("./output", exist_ok=True) # cfg.OUTPUT_DIR
cfg.OUTPUT_DIR = "./output"
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[03/11 22:31:19 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[03/11 22:31:20 d2.data.datasets.coco]: [0mLoaded 750 images in COCO format from raw/KomPol2_clean/KomPol2-7.json
[32m[03/11 22:31:20 d2.data.build]: [0mRemoved 0 images with no usable annotations. 750 images left.
[32m[03/11 22:31:20 d2.data.build]: [0mDistribution of instances among all 14 categories:
[36m|   category   | #instances   |  category  | #instances   |   category    | #instances   |
|:------------:|:-------------|:----------:|:-------------|:-------------:|:-------------|
|     car      | 8087         |  minibus   | 1128         |  trolleybus   | 216          |
|     tram     | 1            |   truck    | 55           |      bus      | 62           |
|  middle_bus  | 575          | ambulance  | 8            |  fire_truck   | 0            |
| middle_truck | 289          |  tractor   | 6            | uncategorized | 4927         |
|     van      | 69           |   person   | 10560        |               |              |
|    total     | 25983        |            

model_final_280758.pkl: 167MB [01:14, 2.25MB/s]                              
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (14, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (14,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (52, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (52,) in the model! You might want to double check if this is expected.


[32m[03/11 22:32:34 d2.engine.train_loop]: [0mStarting training from iteration 0


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  num_fg = fg_inds.nonzero().numel()


[32m[03/11 22:32:51 d2.utils.events]: [0m eta: 0:03:54  iter: 19  total_loss: 3.199  loss_cls: 2.144  loss_box_reg: 0.6108  loss_rpn_cls: 0.1958  loss_rpn_loc: 0.2942  time: 0.8333  data_time: 0.0137  lr: 0.00049953  max_mem: 2640M
[32m[03/11 22:33:07 d2.utils.events]: [0m eta: 0:03:31  iter: 39  total_loss: 1.932  loss_cls: 0.7414  loss_box_reg: 0.5807  loss_rpn_cls: 0.1276  loss_rpn_loc: 0.2656  time: 0.8112  data_time: 0.0031  lr: 0.00099902  max_mem: 2640M
[32m[03/11 22:33:23 d2.utils.events]: [0m eta: 0:03:15  iter: 59  total_loss: 1.551  loss_cls: 0.5373  loss_box_reg: 0.5714  loss_rpn_cls: 0.09866  loss_rpn_loc: 0.252  time: 0.8160  data_time: 0.0033  lr: 0.0014985  max_mem: 2640M
[32m[03/11 22:33:39 d2.utils.events]: [0m eta: 0:02:58  iter: 79  total_loss: 1.339  loss_cls: 0.3971  loss_box_reg: 0.4849  loss_rpn_cls: 0.1017  loss_rpn_loc: 0.3198  time: 0.8127  data_time: 0.0030  lr: 0.001998  max_mem: 2640M
[32m[03/11 22:33:55 d2.utils.events]: [0m eta: 0:02:42  iter: 