Skip to content

Latest commit

 

History

History
215 lines (193 loc) · 10.4 KB

models----detr----configuration_detr.py.md

File metadata and controls

215 lines (193 loc) · 10.4 KB

.\models\detr\configuration_detr.py

# 设置文件编码为utf-8
# 版权声明
# 根据Apache License, Version 2.0,除非符合许可证要求或书面同意,在不提供任何保证或条件的情况下分发软件
# 请查看许可证以获取更多信息:http://www.apache.org/licenses/LICENSE-2.0
# DETR模型配置

# 导入需要的库
from collections import OrderedDict
from typing import Mapping
from packaging import version
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING

# 获取日志记录器
logger = logging.get_logger(__name__)

# DETR预训练配置档案映射
DETR_PRETRAINED_CONFIG_ARCHIVE_MAP = {
    "facebook/detr-resnet-50": "https://huggingface.co/facebook/detr-resnet-50/resolve/main/config.json",
    # 查看所有DETR模型:https://huggingface.co/models?filter=detr
}

# DetrConfig类继承自PretrainedConfig
# 用于存储DetrModel的配置,并可根据指定参数实例化DETR模型架构
# 使用默认值实例化配置将产生类似于DETR[facebook/detr-resnet-50]架构的配置
# 配置对象继承自PretrainedConfig,并可用于控制模型输出
# 有关更多信息,请阅读来自PretrainedConfig的文档
# 示例
class DetrConfig(PretrainedConfig):
    """
    This is the configuration class to store the configuration of a [`DetrModel`]. It is used to instantiate a DETR
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the DETR
    [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Examples:

    ```python
    >>> from transformers import DetrConfig, DetrModel

    >>> # Initializing a DETR facebook/detr-resnet-50 style configuration
    >>> configuration = DetrConfig()

    >>> # Initializing a model (with random weights) from the facebook/detr-resnet-50 style configuration
    >>> model = DetrModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```py
    """
    # 模型类型为"detr"
    model_type = "detr"
    # 推断时要忽略的键
    keys_to_ignore_at_inference = ["past_key_values"]
    # 属性映射
    attribute_map = {
        "hidden_size": "d_model",
        "num_attention_heads": "encoder_attention_heads",
    }
    # 初始化函数,用于创建一个新的模型实例
    def __init__(
        self,
        use_timm_backbone=True,  # 是否使用timm库提供的预训练骨干网络,默认为True
        backbone_config=None,    # 骨干网络配置参数,用于指定骨干网络的设置,可以为None
        num_channels=3,          # 输入图像通道数,默认为3(RGB图像)
        num_queries=100,         # 解码器中查询的数量,默认为100
        encoder_layers=6,        # 编码器的层数,默认为6层
        encoder_ffn_dim=2048,    # 编码器中全连接层的维度,默认为2048
        encoder_attention_heads=8,  # 编码器中的注意力头数,默认为8个
        decoder_layers=6,        # 解码器的层数,默认为6层
        decoder_ffn_dim=2048,    # 解码器中全连接层的维度,默认为2048
        decoder_attention_heads=8,  # 解码器中的注意力头数,默认为8个
        encoder_layerdrop=0.0,    # 编码器的层丢弃率,默认为0.0(不丢弃)
        decoder_layerdrop=0.0,    # 解码器的层丢弃率,默认为0.0(不丢弃)
        is_encoder_decoder=True,  # 是否使用编码-解码结构,默认为True
        activation_function="relu",  # 激活函数的类型,默认为ReLU
        d_model=256,              # 模型维度,默认为256
        dropout=0.1,              # 全连接层的丢弃率,默认为0.1
        attention_dropout=0.0,    # 注意力层的丢弃率,默认为0.0
        activation_dropout=0.0,   # 激活函数的丢弃率,默认为0.0
        init_std=0.02,            # 参数初始化的标准差,默认为0.02
        init_xavier_std=1.0,      # Xavier初始化的标准差,默认为1.0
        auxiliary_loss=False,     # 是否使用辅助损失,默认为False
        position_embedding_type="sine",  # 位置嵌入的类型,默认为正弦位置嵌入
        backbone="resnet50",      # 使用的骨干网络类型,默认为ResNet-50
        use_pretrained_backbone=True,  # 是否使用预训练的骨干网络,默认为True
        dilation=False,           # 是否使用扩张卷积,默认为False
        class_cost=1,             # 类别损失的权重,默认为1
        bbox_cost=5,              # 边界框损失的权重,默认为5
        giou_cost=2,              # GIoU损失的权重,默认为2
        mask_loss_coefficient=1,  # 掩模损失的系数,默认为1
        dice_loss_coefficient=1,  # Dice损失的系数,默认为1
        bbox_loss_coefficient=5,  # 边界框损失的系数,默认为5
        giou_loss_coefficient=2,  # GIoU损失的系数,默认为2
        eos_coefficient=0.1,      # EOS(终止符)损失的系数,默认为0.1
        **kwargs,                 # 其他关键字参数,用于灵活传递额外的参数
        ):
            # 如果同时指定了 `backbone_config` 和 `use_timm_backbone`,则抛出 ValueError
            if backbone_config is not None and use_timm_backbone:
                raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.")

            # 如果不使用 timm 的 backbone
            if not use_timm_backbone:
                # 如果未指定 backbone_config,则初始化配置为默认的 ResNet backbone
                if backbone_config is None:
                    logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.")
                    backbone_config = CONFIG_MAPPING["resnet"](out_features=["stage4"])
                # 如果 backbone_config 是字典类型
                elif isinstance(backbone_config, dict):
                    # 从字典中获取 backbone_model_type
                    backbone_model_type = backbone_config.get("model_type")
                    # 根据 backbone_model_type 获取对应的配置类
                    config_class = CONFIG_MAPPING[backbone_model_type]
                    # 根据字典创建配置对象
                    backbone_config = config_class.from_dict(backbone_config)
                # 设置 timm 相关属性为 None
                dilation, backbone, use_pretrained_backbone = None, None, None

            # 设置类的属性
            self.use_timm_backbone = use_timm_backbone
            self.backbone_config = backbone_config
            self.num_channels = num_channels
            self.num_queries = num_queries
            self.d_model = d_model
            self.encoder_ffn_dim = encoder_ffn_dim
            self.encoder_layers = encoder_layers
            self.encoder_attention_heads = encoder_attention_heads
            self.decoder_ffn_dim = decoder_ffn_dim
            self.decoder_layers = decoder_layers
            self.decoder_attention_heads = decoder_attention_heads
            self.dropout = dropout
            self.attention_dropout = attention_dropout
            self.activation_dropout = activation_dropout
            self.activation_function = activation_function
            self.init_std = init_std
            self.init_xavier_std = init_xavier_std
            self.encoder_layerdrop = encoder_layerdrop
            self.decoder_layerdrop = decoder_layerdrop
            self.num_hidden_layers = encoder_layers
            self.auxiliary_loss = auxiliary_loss
            self.position_embedding_type = position_embedding_type
            self.backbone = backbone
            self.use_pretrained_backbone = use_pretrained_backbone
            self.dilation = dilation
            # Hungarian matcher
            self.class_cost = class_cost
            self.bbox_cost = bbox_cost
            self.giou_cost = giou_cost
            # Loss coefficients
            self.mask_loss_coefficient = mask_loss_coefficient
            self.dice_loss_coefficient = dice_loss_coefficient
            self.bbox_loss_coefficient = bbox_loss_coefficient
            self.giou_loss_coefficient = giou_loss_coefficient
            self.eos_coefficient = eos_coefficient
            # 调用父类的构造函数
            super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs)

        @property
        def num_attention_heads(self) -> int:
            # 返回编码器注意力头的数量
            return self.encoder_attention_heads

        @property
        def hidden_size(self) -> int:
            # 返回隐藏层大小
            return self.d_model

        @classmethod
    # 从预训练的骨干模型配置实例化一个`DetrConfig`(或派生类)

    def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs):
        """Instantiate a [`DetrConfig`] (or a derived class) from a pre-trained backbone model configuration.

        Args:
            backbone_config ([`PretrainedConfig`]):
                The backbone configuration. # 骨干配置

        Returns:
            [`DetrConfig`]: An instance of a configuration object # 返回一个配置对象的实例
        """
        # 返回使用给定的骨干配置和其他传入参数的类实例
        return cls(backbone_config=backbone_config, **kwargs)
# 定义一个名为 DetrOnnxConfig 的类,继承自 OnnxConfig
class DetrOnnxConfig(OnnxConfig):
    # 设置 torch_onnx_minimum_version 属性为解析后的版本号 1.11
    torch_onnx_minimum_version = version.parse("1.11")

    # 定义 inputs 属性,返回一个有序字典,包含输入张量的名称及其维度索引映射
    @property
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        return OrderedDict(
            [
                ("pixel_values", {0: "batch", 1: "num_channels", 2: "height", 3: "width"}),
                ("pixel_mask", {0: "batch"}),
            ]
        )

    # 定义 atol_for_validation 属性,返回用于验证的绝对误差阈值
    @property
    def atol_for_validation(self) -> float:
        return 1e-5

    # 定义 default_onnx_opset 属性,返回默认的 ONNX 运算集版本号
    @property
    def default_onnx_opset(self) -> int:
        return 12