In [1]:
import argparse
import contextlib
import math
import os
import platform
import sys
from copy import deepcopy
from pathlib import Path

import torch
import torch.nn as nn

from models.common import (
    C3,
    C3SPP,
    C3TR,
    SPP,
    SPPF,
    Bottleneck,
    BottleneckCSP,
    C3Ghost,
    C3x,
    Classify,
    Concat,
    Contract,
    Conv,
    CrossConv,
    DetectMultiBackend,
    DWConv,
    DWConvTranspose2d,
    Expand,
    Focus,
    GhostBottleneck,
    GhostConv,
    Proto,
)
from models.experimental import MixConv2d
from utils.autoanchor import check_anchor_order
from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
from utils.plots import feature_visualization
from utils.torch_utils import (
    fuse_conv_and_bn,
    initialize_weights,
    model_info,
    profile,
    scale_img,
    select_device,
    time_sync,
)

try:
    import thop  # for FLOPs computation
except ImportError:
    thop = None

class Detect(nn.Module):
    # YOLOv5 Detect head for detection models
    stride = None  # strides computed during build
    dynamic = False  # force grid reconstruction
    export = False  # export mode

    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
        """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
        super().__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
        self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.inplace = inplace  # use inplace ops (e.g. slice assignment)

    def forward(self, x):
        """Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
        z = []  # inference output
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                if isinstance(self, Segment):  # (boxes + masks)
                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
                else:  # Detect (boxes only)
                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, conf), 4)
                z.append(y.view(bs, self.na * nx * ny, self.no))

        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)

    def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
        """Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
        d = self.anchors[i].device
        t = self.anchors[i].dtype
        shape = 1, self.na, ny, nx, 2  # grid shape
        y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
        yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x)  # torch>=0.7 compatibility
        grid = torch.stack((xv, yv), 2).expand(shape) - 0.5  # add grid offset, i.e. y = 2.0 * x - 0.5
        anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
        return grid, anchor_grid


class Segment(Detect):
    # YOLOv5 Segment head for segmentation models
    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
        """Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
        super().__init__(nc, anchors, ch, inplace)
        self.nm = nm  # number of masks
        self.npr = npr  # number of protos
        self.no = 5 + nc + self.nm  # number of outputs per anchor
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
        self.detect = Detect.forward

    def forward(self, x):
        """Processes input through the network, returning detections and prototypes; adjusts output based on
        training/export mode.
        """
        p = self.proto(x[0])
        x = self.detect(self, x)
        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--profile", action="store_true", help="profile model speed")
parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
opt = parser.parse_known_args()[0]
opt.cfg = check_yaml(opt.cfg)  # check YAML
print_args(vars(opt))

device = select_device(opt.device)

[34m[1m3592105272: [0mcfg=yolov5s.yaml, batch_size=1, device=, profile=False, line_profile=False, test=False
YOLOv5  299d3d70 Python-3.8.18 torch-1.13.1+cu116 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)



##### Read model.yaml

In [3]:
cfg="yolov5s.yaml"
if isinstance(cfg, dict):
    yaml = cfg  # model dict
else:  # is *.yaml
    import yaml  # for torch hub
    from pathlib import Path
    with open(cfg, encoding="ascii", errors="ignore") as f:
        yaml = yaml.safe_load(f)  # model dict

print('class num:', yaml['nc'])
print('anchors:', yaml['anchors'])
print('backbone:', yaml['backbone'])
print('head:', yaml['head'])

class num: 6
anchors: [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
backbone: [[-1, 1, 'Conv', [64, 6, 2, 2]], [-1, 1, 'Conv', [128, 3, 2]], [-1, 3, 'C3', [128]], [-1, 1, 'Conv', [256, 3, 2]], [-1, 6, 'C3', [256]], [-1, 1, 'Conv', [512, 3, 2]], [-1, 9, 'C3', [512]], [-1, 1, 'Conv', [1024, 3, 2]], [-1, 3, 'C3', [1024]], [-1, 1, 'SPPF', [1024, 5]]]
head: [[-1, 1, 'Conv', [512, 1, 1]], [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']], [[-1, 6], 1, 'Concat', [1]], [-1, 3, 'C3', [512, False]], [-1, 1, 'Conv', [256, 1, 1]], [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']], [[-1, 4], 1, 'Concat', [1]], [-1, 3, 'C3', [256, False]], [-1, 1, 'Conv', [256, 3, 2]], [[-1, 14], 1, 'Concat', [1]], [-1, 3, 'C3', [512, False]], [-1, 1, 'Conv', [512, 3, 2]], [[-1, 10], 1, 'Concat', [1]], [-1, 3, 'C3', [1024, False]], [[17, 20, 23], 1, 'Detect', ['nc', 'anchors']]]


##### Set nc, anchors, ch

In [4]:
nc=None
anchors=None
#  这两行代码设置了输入通道数量(ch)。默认值是3，代表RGB图像。如果YAML文件中有定义，那么会使用文件中的值.
ch=3  # classes, anchors, input channels
ch = yaml["ch"] = yaml.get("ch", ch)  # input channels

#  这两个判断语句检查是否需要覆盖类别数量和锚点的值。如果需要，那么就会用新的值覆盖YAML文件中的值。
#  如果不指定nc和anchors，那么就会使用YAML文件中的值。
if nc and nc != yaml["nc"]:
    LOGGER.info(f"Overriding model.yaml nc={yaml['nc']} with nc={nc}")
    yaml["nc"] = nc  # override yaml value
if anchors:
    LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
    yaml["anchors"] = round(anchors)  # override yaml value

# deepcopy yaml，防止修改原始数据
d = deepcopy(yaml)
ch=[ch]
print('ch:', ch, '默认值是3，代表RGB图像')

anchors, nc, gd, gw, act, ch_mul = (
    d["anchors"],
    d["nc"],
    d["depth_multiple"],
    d["width_multiple"],
    d.get("activation"),
    d.get("channel_multiple"),
)

print('anchors:', anchors)

ch: [3] 默认值是3，代表RGB图像
anchors: [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]


##### Set default activation and channel_multiple
`Conv.default_act = eval(act)`
eval(act) 是一个Python内置函数，它会执行一个字符串形式的表达式，并返回结果。在这个例子中，act 是一个字符串，代表了激活函数的名称，比如 "nn.SiLU()"。eval(act) 会返回这个激活函数的实例，然后赋值给 Conv.default_act。
在YOLOv5模型配置中，channel_multiple是一个可选参数，用于调整模型中每一层的通道数。这个参数可以用来控制模型的宽度。例如，如果channel_multiple设置为2，那么模型中每一层的通道数将会翻倍。这可以用来增加模型的复杂性和容量，可能会提高模型的性能，但同时也会增加模型的计算量和参数数量。如果没有在配置文件中指定channel_multiple，则默认值为8。

In [5]:
if act:
    Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
    LOGGER.info(f"{colorstr('activation:')} {act}")  # print
if not ch_mul:
    ch_mul = 8
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
print('number of anchors:', na)
no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
print('number of outputs:', no, '计算方式:', '[x,y,w,h,class]有5个参数, na个anchors, nc个Class')

# Initialization
layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out


number of anchors: 3
number of outputs: 33 计算方式: [x,y,w,h,class]有5个参数, na个anchors, nc个Class


`eval(m)`
表示将字符串m作为函数或类实现，大概是这样吧

In [9]:
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
    m = eval(m) if isinstance(m, str) else m  # eval strings
    for j, a in enumerate(args):
        with contextlib.suppress(NameError):
            args[j] = eval(a) if isinstance(a, str) else a  # eval strings

    n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
    if m in {
        Conv,
        GhostConv,
        Bottleneck,
        GhostBottleneck,
        SPP,
        SPPF,
        DWConv,
        MixConv2d,
        Focus,
        CrossConv,
        BottleneckCSP,
        C3,
        C3TR,
        C3SPP,
        C3Ghost,
        nn.ConvTranspose2d,
        DWConvTranspose2d,
        C3x,
    }:
        c1, c2 = ch[f], args[0]
        if c2 != no:  # if not output
            c2 = make_divisible(c2 * gw, ch_mul)

        args = [c1, c2, *args[1:]]
        if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
            args.insert(2, n)  # number of repeats
            n = 1
    elif m is nn.BatchNorm2d:
        args = [ch[f]]
    elif m is Concat:
        c2 = sum(ch[x] for x in f)
    # TODO: channel, gw, gd
    elif m in {Detect, Segment}:
        args.append([ch[x] for x in f])
        if isinstance(args[1], int):  # number of anchors
            args[1] = [list(range(args[1] * 2))] * len(f)
        if m is Segment:
            args[3] = make_divisible(args[3] * gw, ch_mul)
    elif m is Contract:
        c2 = ch[f] * args[0] ** 2
    elif m is Expand:
        c2 = ch[f] // args[0] ** 2
    else:
        c2 = ch[f]

    m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
    t = str(m)[8:-2].replace("__main__.", "")  # module type
    np = sum(x.numel() for x in m_.parameters())  # number params
    m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
    LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}")  # print
    save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
    layers.append(m_)
    if i == 0:
        ch = []
    ch.append(c2)


                 from  n    params  module                                  arguments                     
  0                -1  1    589888  models.common.Conv                      [512, 32, 6, 2, 2]            
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              
  8                -1  1   1182720  

TypeError: __init__() takes from 1 to 5 positional arguments but 7 were given

In [7]:
model = nn.Sequential(*layers)  # model
model.nc = nc  # attach number of classes to model
# layers print
print(layers)
save = sorted(save)


[Conv(
  (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2), bias=False)
  (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act): SiLU()
), Conv(
  (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act): SiLU()
), C3(
  (cv1): Conv(
    (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): SiLU()
  )
  (cv2): Conv(
    (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): SiLU()
  )
  (cv3): Conv(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Si

In [8]:
names = [str(i) for i in range(yaml["nc"])]  # default names
print('names:', names)

names: ['0', '1', '2', '3', '4', '5']


In [12]:
inplace = yaml.get("inplace", True)
print('inplace:', inplace)
# ，inplace 是一个布尔值，用于决定某些操作是否在原地进行。如果 inplace 为 True，则操作会直接修改数据，而不会创建新的数据或变量。这通常可以节省内存，但可能会覆盖原始数据，因此需要谨慎使用。如果 inplace 为 False，则操作会创建新的数据或变量，而不会修改原始数据。

inplace: True


In [16]:
m = model[-1]  # Detect()
print('m:', m)
nl = model[-1].nl  # number of detection layers (P3-P5)
print('nl:', nl)


m: Detect(
  (m): ModuleList(
    (0): Conv2d(128, 33, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(256, 33, kernel_size=(1, 1), stride=(1, 1))
    (2): Conv2d(512, 33, kernel_size=(1, 1), stride=(1, 1))
  )
)
nl: 3


TypeError: zeros(): argument 'size' must be tuple of SymInts, but found element of type list at pos 2