构造完整模型（脱离配置文件）以导出onnx
1. 数据前处理
2. 模型及推理
3. 数据后处理

1 Dataset DataLoader

In [1]:
#数据前处理
from mmcv.transforms import LoadImageFromFile,Compose
from mmyolo.datasets.transforms.transforms import YOLOv5KeepRatioResize,LetterResize
from mmdet.datasets.transforms import PackDetInputs,LoadAnnotations

test_pipeline=Compose([
    LoadImageFromFile(file_client_args=dict(backend='disk')),
    LoadAnnotations(with_bbox=True),
    YOLOv5KeepRatioResize(scale=(640,640)),
    LetterResize(scale=(640,640),allow_scale_up=False,pad_val=dict(img=114)),
    PackDetInputs(meta_keys=('img_id','img_path','ori_shape','img_shape','scale_factor','pad_param'))
])


In [2]:
#1. Test Dataset
from mmyolo.datasets.yolov5_coco import YOLOv5CocoDataset

class_name = ('right','backslash',"wrong","half","bias","questionMark","remark","circle"
,"wavyLine","underline")  # 根据 class_with_id.txt 类别信息，设置 class_name
num_classes = len(class_name)
metainfo = dict(
    classes=class_name,
    num_classes=num_classes,
    palette=[(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230), (106, 0, 228),
         (0, 60, 100), (0, 80, 100), (0, 0, 70), (0, 0, 192), (250, 170, 30)]
)

test_dataset=YOLOv5CocoDataset(
    data_root='/project/volume/image_process/opemmmlab/edu_photo/',
    test_mode=True,
    data_prefix=dict(img='voc/images'),
    ann_file='coco/test.json',
    metainfo=metainfo,
    pipeline=test_pipeline,
    batch_shapes_cfg=None
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [3]:
test_dataset[0]



{'inputs': tensor([[[114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          ...,
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114]],
 
         [[114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          ...,
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114]],
 
         [[114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          ...,
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114],
          [114, 114, 114,  ..., 114, 114, 114]]], dtype=torch.uint8),
 'data_samples': <DetDataSample(
 
     META INFORMAT

In [4]:
#2. dataloader
from torch.utils.data import DataLoader
from mmengine.dataset import DefaultSampler,pseudo_collate

test_dataloader=DataLoader(
    batch_size=2,
    num_workers=2,
    persistent_workers=True,
    pin_memory=True,
    drop_last=False,
    sampler=DefaultSampler(dataset=test_dataset,shuffle=False),
    dataset=test_dataset,
    collate_fn=pseudo_collate, #使用mmengine内置的collata_fn函数，因为dataset元素是封装的DetDataSample
)

In [5]:
import torch
for test_data in test_dataloader:
    print(len(test_data['inputs']))
    print(test_data['inputs'][0].shape)
    # torch.cuda.empty_cache()
    break

2
torch.Size([3, 640, 640])


2 Model

In [6]:
#2.backbone
from mmyolo.models.backbones import YOLOv8CSPDarknet
backbone=YOLOv8CSPDarknet(arch='P5',last_stage_out_channels=512,deepen_factor=1.0,
                          widen_factor=1.25,norm_cfg=dict(type='BN',momentum=0.03,eps=0.001),
                          act_cfg=dict(type='SiLU',inplace=True))
backbone

YOLOv8CSPDarknet(
  (stem): ConvModule(
    (conv): Conv2d(3, 80, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
    (activate): SiLU(inplace=True)
  )
  (stage1): Sequential(
    (0): ConvModule(
      (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (activate): SiLU(inplace=True)
    )
    (1): CSPLayerWithTwoConv(
      (main_conv): ConvModule(
        (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (final_conv): ConvModule(
        (conv): Conv2d(400, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, 

In [7]:
#3.neck
from mmyolo.models.necks import YOLOv8PAFPN

neck=YOLOv8PAFPN(
    deepen_factor=1.0,
    widen_factor=1.25,
    in_channels=[256,512,512],
    out_channels=[256,512,512],
    num_csp_blocks=3,
    norm_cfg=dict(type='BN',momentum=0.03,eps=0.001),
    act_cfg=dict(type='SiLU',inplace=True)
)
neck

YOLOv8PAFPN(
  (reduce_layers): ModuleList(
    (0-2): 3 x Identity()
  )
  (upsample_layers): ModuleList(
    (0-1): 2 x Upsample(scale_factor=2.0, mode='nearest')
  )
  (top_down_layers): ModuleList(
    (0): CSPLayerWithTwoConv(
      (main_conv): ConvModule(
        (conv): Conv2d(1280, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(640, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (final_conv): ConvModule(
        (conv): Conv2d(1600, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(640, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (blocks): ModuleList(
        (0-2): 3 x DarknetBottleneck(
          (conv1): ConvModule(
            (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn): BatchNorm2d(320, eps=0.001, momentum=0.03,

In [8]:
#4.bbox_head-head_module
from mmyolo.models.dense_heads import YOLOv8HeadModule
num_classes=10

head_module=YOLOv8HeadModule(
    num_classes=num_classes,
    in_channels=[256,512,512],
    widen_factor=1.25,
    reg_max=16,
    norm_cfg=dict(type='BN',momentum=0.03,eps=0.001),
    act_cfg=dict(type='SiLU',inplace=True),
    featmap_strides=[8,16,32]
)

head_module

YOLOv8HeadModule(
  (cls_preds): ModuleList(
    (0): Sequential(
      (0): ConvModule(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(320, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (1): ConvModule(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(320, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (2): Conv2d(320, 10, kernel_size=(1, 1), stride=(1, 1))
    )
    (1-2): 2 x Sequential(
      (0): ConvModule(
        (conv): Conv2d(640, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(320, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (1): ConvModule(
        (conv): Conv2d(320, 32

In [9]:
#4.bbox_head-prior_generator
from mmdet.models.task_modules.prior_generators import MlvlPointGenerator

prior_generator=MlvlPointGenerator(
    offset=0.5,
    strides=[8,16,32],
)
prior_generator

<mmdet.models.task_modules.prior_generators.point_generator.MlvlPointGenerator at 0x7fa4af08e9b0>

In [10]:
#4.bbox_head-bbox_coder
from mmyolo.models.task_modules.coders.distance_point_bbox_coder import DistancePointBBoxCoder

bbox_coder=DistancePointBBoxCoder()
bbox_coder

<mmyolo.models.task_modules.coders.distance_point_bbox_coder.DistancePointBBoxCoder at 0x7fa49441bdc0>

In [11]:
#4.bbox_head-loss
from mmyolo.models.losses.iou_loss import CustomFocalLoss,IoULoss
from mmdet.models.losses import DistributionFocalLoss

loss_cls=CustomFocalLoss(
    reduction='none',
    loss_weight=0.5,
    gamma=1,
    alpha1=1,
    alpha2=12,
    use_de=True
)

loss_bbox=IoULoss(
    iou_mode='ciou',
    bbox_format='xyxy',
    reduction='sum',
    loss_weight=7.5,
    return_iou=False          
)

loss_dfl=DistributionFocalLoss(
    reduction='mean',
    loss_weight=0.375
)

loss_cls,loss_bbox,loss_dfl

(CustomFocalLoss(avg_non_ignore=False), IoULoss(), DistributionFocalLoss())

In [12]:
#YOLOv8Head源码改写
from mmyolo.models.dense_heads import YOLOv5Head
from typing import List
from mmyolo.registry import MODELS
from mmdet.models.dense_heads.base_dense_head import BaseDenseHead
import torch

class MyYOLOv5Head(YOLOv5Head,BaseDenseHead):
    def __init__(
            self,
            head_module,
            prior_generator,
            bbox_coder,
            loss_cls,
            loss_bbox,
            loss_obj=dict(type='mmdet.CrossEntropyLoss',use_sigmoid=True,reduction='mean',loss_weight=1.0),
            prior_match_thr: float = 4.0,
            near_neighbor_thr: float = 0.5,
            ignore_iof_thr: float = -1.0,
            obj_level_weights: List[float] = [4.0, 1.0, 0.4],
            train_cfg = None,
            test_cfg = None,
            init_cfg = None
    ):
        super(YOLOv5Head,self).__init__(init_cfg=init_cfg)

        self.head_module = head_module
        self.num_classes = self.head_module.num_classes
        self.featmap_strides = self.head_module.featmap_strides
        self.num_levels = len(self.featmap_strides)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

        self.loss_cls = loss_cls
        self.loss_bbox = loss_bbox
        self.loss_obj =MODELS.build(loss_obj)

        self.prior_generator = prior_generator
        self.bbox_coder = bbox_coder
        self.num_base_priors = self.prior_generator.num_base_priors[0]

        self.featmap_sizes = [torch.empty(1)] * self.num_levels

        self.prior_match_thr = prior_match_thr
        self.near_neighbor_thr = near_neighbor_thr
        self.obj_level_weights = obj_level_weights
        self.ignore_iof_thr = ignore_iof_thr

        self.special_init()

In [13]:
#YOLOv8Head源码改写
from mmyolo.models.dense_heads import YOLOv8Head

class MyYOLOv8Head(YOLOv8Head,MyYOLOv5Head):
    def __init__(
            self,
            head_module,
            prior_generator,
            bbox_coder,
            loss_cls,
            loss_bbox,
            loss_dfl,
            train_cfg = None,
            test_cfg = None,
            init_cfg = None
    ):
        super(YOLOv8Head,self).__init__(
            head_module=head_module,
            prior_generator=prior_generator,
            bbox_coder=bbox_coder,
            loss_cls=loss_cls,
            loss_bbox=loss_bbox,
            train_cfg=train_cfg,
            test_cfg=test_cfg,
            init_cfg=init_cfg)
        self.loss_dfl = loss_dfl
        # YOLOv8 doesn't need loss_obj
        self.loss_obj = None

In [44]:
#4.bbox_head

"""
#YOLOv8Head的构建需要参数全部是配置文件,这种方式不能用
bbox_head=YOLOv8Head(
    head_module=head_module,
    prior_generator=prior_generator,
    bbox_coder=bbox_coder,
    loss_cls=loss_cls,
    loss_bbox=loss_bbox,
    loss_dfl=loss_dfl
)
"""
#改写源码处报错主要源自于多个父类继承，参考https://blog.csdn.net/luhanhua/article/details/131409163
from mmengine.config import ConfigDict

bbox_head_test_cfg=ConfigDict(dict(
    multi_label=True,
    nms_pre=30000,
    score_thr=0.001,
    nms=dict(type='nms', iou_threshold=0.7),
    max_per_img=300))

bbox_head=MyYOLOv8Head(
    head_module=head_module,
    prior_generator=prior_generator,
    bbox_coder=bbox_coder,
    loss_cls=loss_cls,
    loss_bbox=loss_bbox,
    loss_dfl=loss_dfl,
    #test_cfg在配置文件中是不添加到bbox_head的
    #但是这里不添加会报错
    test_cfg=bbox_head_test_cfg
)

bbox_head

MyYOLOv8Head(
  (head_module): YOLOv8HeadModule(
    (cls_preds): ModuleList(
      (0): Sequential(
        (0): ConvModule(
          (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(320, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): SiLU(inplace=True)
        )
        (1): ConvModule(
          (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(320, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): SiLU(inplace=True)
        )
        (2): Conv2d(320, 10, kernel_size=(1, 1), stride=(1, 1))
      )
      (1-2): 2 x Sequential(
        (0): ConvModule(
          (conv): Conv2d(640, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(320, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): SiLU(inplac

In [45]:
#1.data_preprocessor,这是一个是非特殊的模块，要到BaseModel才会开始构建，干脆直接用配置文件
data_preprocessor=dict(
        _scope_='mmyolo',
        type='YOLOv5DetDataPreprocessor',
        mean=[
            0.0,
            0.0,
            0.0,
        ],
        std=[
            255.0,
            255.0,
            255.0,
        ],
        bgr_to_rgb=True)

#5.test_cfg
train_cfg=dict()
test_cfg=dict(
    multi_label=True,
    nms_pre=30000,
    score_thr=0.001,
    nms=dict(type='nms', iou_threshold=0.7),
    max_per_img=300)

In [46]:
from mmdet.models.detectors.single_stage import SingleStageDetector
from mmdet.models.detectors.base import BaseDetector

class MySingleStageDetector(SingleStageDetector,BaseDetector):
    def __init__(
            self,
            backbone,
            neck = None,
            bbox_head = None,
            train_cfg = None,
            test_cfg = None,
            data_preprocessor = None,
            init_cfg = None
    ):
        super(SingleStageDetector,self).__init__(
            data_preprocessor=data_preprocessor, init_cfg=init_cfg)
        self.backbone = backbone
        if neck is not None:
            self.neck = neck
        self.bbox_head = bbox_head
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

In [47]:
#6.model
from mmyolo.models import YOLODetector


class MyYOLODetector(YOLODetector,MySingleStageDetector):
    def __init__(
            self,
            backbone,
            neck,
            bbox_head,
            train_cfg= None,
            test_cfg = None,
            data_preprocessor = None,
            init_cfg = None,
            use_syncbn: bool = True
    ):
        super(YOLODetector,self).__init__(
            backbone=backbone,
            neck=neck,
            bbox_head=bbox_head,
            train_cfg=train_cfg,
            test_cfg=test_cfg,
            data_preprocessor=data_preprocessor,
            init_cfg=init_cfg)
        



In [48]:
model=MyYOLODetector(
    backbone=backbone,
    neck=neck,
    bbox_head=bbox_head,
    train_cfg=train_cfg,
    test_cfg=test_cfg,
    data_preprocessor=data_preprocessor,
    init_cfg=None
)
model

MyYOLODetector(
  (data_preprocessor): YOLOv5DetDataPreprocessor()
  (backbone): YOLOv8CSPDarknet(
    (stem): ConvModule(
      (conv): Conv2d(3, 80, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (activate): SiLU(inplace=True)
    )
    (stage1): Sequential(
      (0): ConvModule(
        (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (1): CSPLayerWithTwoConv(
        (main_conv): ConvModule(
          (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): SiLU(inplace=True)
        )
        (final_conv): ConvModule(
          (conv): Conv2d(400, 160

In [26]:
pretrained_model.keys()

dict_keys(['meta', 'state_dict', 'message_hub', 'ema_state_dict'])

In [49]:
#加载模型权重
import torch

pretrained_model=(torch.load("/project/volume/test_paper/application/work_dir/train_log/exp6/exp6_2_x_r0_d0_fl1112/best_coco_bbox_mAP_epoch_90.pth"))

model.load_state_dict(pretrained_model["state_dict"])
model

MyYOLODetector(
  (data_preprocessor): YOLOv5DetDataPreprocessor()
  (backbone): YOLOv8CSPDarknet(
    (stem): ConvModule(
      (conv): Conv2d(3, 80, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (activate): SiLU(inplace=True)
    )
    (stage1): Sequential(
      (0): ConvModule(
        (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (activate): SiLU(inplace=True)
      )
      (1): CSPLayerWithTwoConv(
        (main_conv): ConvModule(
          (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(160, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (activate): SiLU(inplace=True)
        )
        (final_conv): ConvModule(
          (conv): Conv2d(400, 160

In [51]:
for data in test_dataloader:
    print(data)
    break

{'inputs': [tensor([[[114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         ...,
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114]],

        [[114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         ...,
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114]],

        [[114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         ...,
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  ..., 114, 114, 114]]], dtype=torch.uint8), tensor([[[114, 114, 114,  ..., 114, 114, 114],
         [114, 114, 114,  .

In [63]:
import torch
for data in test_dataloader:
    with torch.no_grad():
        predict=model.test_step(data)[0]
        print(predict)
    break

### TODO 流程应该是没问题的，错误也找到了，就是不知道怎么改

IndexError: The shape of the mask [537600] at index 0 does not match the shape of the indexed tensor [8400, 4] at index 0