# 环境配置

In [1]:
# !nvidia-smi
import torch
print(torch.__version__)
torch.cuda.set_device(0)

2.0.1+cu118


In [2]:
# 连接网盘
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
%cd drive/MyDrive/detectron2
%ls

/content/drive/MyDrive/detectron2
 018_Color.png                              output2.mp4
 1.jpg                                      output_cut.mp4
 3.jpg                                      [0m[01;34mpanopticapi[0m/
 [01;34mballoon[0m/                                   [01;34mpanoptic_images[0m/
 balloon_dataset.zip                        path_to_output_video.mp4
 [01;34mbean[0m/                                      [01;34mplant[0m/
 [01;34mbean_masks[0m/                                plant_002.jpg
 [01;34mbean_set[0m/                                  plant_009.jpg
 [01;34mbox_disease[0m/                               plant_015.jpg
 [01;34mcoco_eval[0m/                                 plant1.mp4
 color2.png                                 plant_output.mp4
 [01;34mcow[0m/                                       plant_output_video.mp4
 [01;34mdetectron2_dc[0m/                             pre.jpg
 [01;34mfield[0m/                                     [01;34mroad

In [4]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install opencv-python opencv-python-headless

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-0keb8kgp
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-0keb8kgp
  Resolved https://github.com/facebookresearch/detectron2.git to commit fc9c33b1f6e5d4c37bbb46dde19af41afc1ddb2a
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6)
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting omegaconf<2.4,>=2.1 (from detectron2==

# 训练过程

## 配置训练器

In [5]:
# 下面的代码块开始训练
# 导入依赖的包
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import os
import random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.data.catalog import DatasetCatalog
from detectron2.evaluation import COCOEvaluator

## 注册训练集和验证集

In [6]:
from detectron2.data.datasets import register_coco_panoptic_separated
# 全景分割
register_coco_panoptic_separated("field_train", {}, "./field/train/", "./field/train/field_train_coco_panoptic", "./field/train/field_train_coco_panoptic.json", "./field/train/field_train_coco_instance", "./field/train/field_train_coco_instance.json")
register_coco_panoptic_separated("field_valid", {}, "./field/valid/", "./field/valid/field_valid_coco_panoptic", "./field/valid/field_valid_coco_panoptic.json", "./field/valid/field_valid_coco_instance", "./field/valid/field_valid_coco_instance.json")

In [7]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")) #全景分割模型
cfg.DATASETS.TRAIN = ("field_train_separated", ) # 设置训练集
cfg.DATASETS.TEST = ("field_valid_separated",) # 设置验证集
cfg.DATALOADER.NUM_WORKERS = 2 # 工作线程数量
cfg.SOLVER.IMS_PER_BATCH = 4 # 每个小批次的载入图像数量
cfg.SOLVER.BASE_LR = 0.0001 # 初始化学习率
cfg.SOLVER.MAX_ITER = 400 # 迭代次数
cfg.SOLVER.CHECKPOINT_PERIOD = 1000 # 设置保存模型检查点的周期。在每1000次迭代后将保存一次模型

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml') # 设置模型的初始权重
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 # 设置每张图像的区域生成网络（RPN）的候选框数量
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # 设置每张图像的感兴趣区域（ROI）头部的训练时的候选框数量
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # 设置类别数量
cfg.INPUT.MASK_FORMAT = "bitmask" # 设置图像的掩码格式

cfg.TEST.EVAL_PERIOD = 100 # 设置模型评估的周期
cfg.OUTPUT_DIR = './output/field_panoptic_fpn_R_50_3x_400iter' # 输出路径

In [8]:
class CocoTrainer(DefaultTrainer):

  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("coco_eval", exist_ok=True)
        output_folder = "coco_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[09/07 09:15:24 d2.engine.defaults]: Model:
PanopticFPN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): 

model_final_c10459.pkl: 184MB [00:00, 202MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[09/07 09:15:26 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[09/07 09:15:57 d2.utils.events]:  eta: 0:07:45  iter: 19  total_loss: 3.591  loss_sem_seg: 2.61  loss_rpn_cls: 0.1625  loss_rpn_loc: 0.03097  loss_cls: 0.7912  loss_box_reg: 0  loss_mask: 0    time: 1.2664  last_time: 1.2286  data_time: 0.1255  last_data_time: 0.0248   lr: 4.8453e-06  max_mem: 6393M
[09/07 09:16:28 d2.utils.events]:  eta: 0:07:33  iter: 39  total_loss: 3.001  loss_sem_seg: 2.189  loss_rpn_cls: 0.1467  loss_rpn_loc: 0.02828  loss_cls: 0.6647  loss_box_reg: 0  loss_mask: 0    time: 1.2838  last_time: 1.3770  data_time: 0.0461  last_data_time: 0.0614   lr: 9.8403e-06  max_mem: 6393M
[09/07 09:16:54 d2.utils.events]:  eta: 0:07:14  iter: 59  total_loss: 2.198  loss_sem_seg: 1.629  loss_rpn_cls: 0.117  loss_rpn_loc: 0.03132  loss_cls: 0.4617  loss_box_reg: 0  loss_mask: 0    time: 1.2902  last_time: 1.3427  data_time: 0.0445  last_data_time: 0.0259   lr: 1.4835e-05  max_mem: 6393M
[09/07 09:17:21 d2.utils.events]:  eta: 0:06:55  iter: 79  total_loss: 1.627  loss_sem_seg: 1

## 预测一张图片

In [16]:
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2 import model_zoo
from google.colab.patches import cv2_imshow

import cv2
import numpy as np



cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml'))
cfg.MODEL.WEIGHTS = "./output/field_panoptic_fpn_R_50_3x_400iter/model_final.pth"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.99
cfg.MODEL.DEVICE = "cpu"
predictor = DefaultPredictor(cfg)


meta_data = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
meta_data.stuff_classes[1] = 'field' # 强行设置一号为field

image = cv2.imread("./field/train/3.jpg")
predictions, segmentinfo = predictor(image)['panoptic_seg']
predictions[predictions > 1] = 0  # 清除其他类
viz = Visualizer(image[:,:,::-1], meta_data)
output = viz.draw_panoptic_seg_predictions(predictions.to('cpu'), segmentinfo)
cv2_imshow(output.get_image()[:,:,::-1])

Output hidden; open in https://colab.research.google.com to view.

## 预测一个视频

In [17]:
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2 import model_zoo
from google.colab.patches import cv2_imshow
import numpy as np
import cv2
# cv2_imshow(image)
cfg = get_cfg()
device = 'cuda'
# Panoptic segmentation
cfg.merge_from_file(model_zoo.get_config_file('COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml'))
cfg.MODEL.WEIGHTS = "./output/field_panoptic_fpn_R_50_3x_400iter/model_final.pth"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9
cfg.MODEL.DEVICE = device

# 利用设置好的配置创建默认预测器
predictor = DefaultPredictor(cfg)
print('预测器创建成功！！！')

cap = cv2.VideoCapture('./field.mp4') # 输入视频路径
fps = cap.get(cv2.CAP_PROP_FPS)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
size = (int(width), int(height))
four_cc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter('./visiual/field4.mp4', four_cc, fps, size) # 输出视频路径
i = 0

meta_data = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
meta_data.stuff_classes[1] = 'field' # 强行设置一号为road

while cap.isOpened():
    ok, image = cap.read()
    if not ok:
        break
    predictions, segmentinfo = predictor(image)['panoptic_seg']
    predictions[predictions > 1] = 0  # 清除其他类
    viz = Visualizer(image[:,:,::-1], metadata=meta_data)
    output = viz.draw_panoptic_seg_predictions(predictions.to('cpu'), segmentinfo)
    image_visual = output.get_image()[:,:,::-1]
    writer.write(image_visual)
    i += 1
    print(i, end=',')
    if i % 10 == 0:
        print()
cap.release()
writer.release()
print('视频加载完成')

[09/07 09:43:19 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output/field_panoptic_fpn_R_50_3x_400iter/model_final.pth ...


roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


预测器创建成功！！！
1,2,3,4,5,6,7,8,9,10,
11,12,13,14,15,16,17,18,19,20,
21,22,23,24,25,26,27,28,29,30,
31,32,33,34,35,36,37,38,39,40,
41,42,43,44,45,46,47,48,49,50,
51,52,53,54,55,56,57,58,59,60,
61,62,63,64,65,66,67,68,69,70,
71,72,73,74,75,76,77,78,79,80,
81,82,83,84,85,86,87,88,89,90,
91,92,93,94,95,96,97,98,99,100,
101,102,103,104,105,106,107,108,109,110,
111,112,113,114,115,116,117,118,119,120,
121,122,123,124,125,126,127,128,129,130,
131,132,133,134,135,136,137,138,139,140,
141,142,143,144,145,146,147,148,149,150,
151,152,153,154,155,156,157,158,159,160,
161,162,163,164,165,166,167,168,169,170,
171,172,173,174,175,176,177,178,179,180,
181,182,183,184,185,186,187,188,189,190,
191,192,193,194,195,196,197,198,199,200,
201,202,203,204,205,206,207,208,209,210,
211,212,213,214,215,216,217,218,219,220,
221,222,223,224,225,226,227,228,229,230,
231,232,233,234,235,236,237,238,239,240,
241,242,243,244,245,246,247,248,249,250,
251,252,253,254,255,256,257,258,259,260,
261,262,263,264,265,266,267,268