include/NvInferPluginUtils.h

/*
 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef NV_INFER_PLUGIN_UTILS_H
#define NV_INFER_PLUGIN_UTILS_H

#include "NvInferRuntimeCommon.h"

//!
//! \file NvInferPluginUtils.h
//!
//! This is the API for the Nvidia provided TensorRT plugin utilities.
//! It lists all the parameters utilized by the TensorRT plugins.
//!

namespace nvinfer1
{
namespace plugin
{

//!
//! \struct PriorBoxParameters
//!
//! \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
//! dimensions (H x W).
//!
//! PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer.
//!
struct PriorBoxParameters
{
    float *minSize;          //!< Minimum box size in pixels. Can not be nullptr.
    float *maxSize;          //!< Maximum box size in pixels. Can be nullptr.
    float *aspectRatios;     //!< Aspect ratios of the boxes. Can be nullptr.
    int32_t numMinSize;      //!< Number of elements in minSize. Must be larger than 0.
    int32_t numMaxSize;      //!< Number of elements in maxSize. Can be 0 or same as numMinSize.
    int32_t numAspectRatios; //!< Number of elements in aspectRatios. Can be 0.
    bool flip;               //!< If true, will flip each aspect ratio. For example,
                             //!< if there is an aspect ratio "r", the aspect ratio "1.0/r" will be generated as well.
    bool clip;               //!< If true, will clip the prior so that it is within [0,1].
    float variance[4];       //!< Variance for adjusting the prior boxes.
    int32_t imgH;            //!< Image height. If 0, then the H dimension of the data tensor will be used.
    int32_t imgW;            //!< Image width. If 0, then the W dimension of the data tensor will be used.
    float stepH;             //!< Step in H. If 0, then (float)imgH/h will be used where h is the H dimension of the 1st input tensor.
    float stepW;             //!< Step in W. If 0, then (float)imgW/w will be used where w is the W dimension of the 1st input tensor.
    float offset;            //!< Offset to the top left corner of each cell.
};

//!
//! \struct RPROIParams
//!
//! \brief RPROIParams is used to create the RPROIPlugin instance.
//!
struct RPROIParams
{
    int32_t poolingH;          //!< Height of the output in pixels after ROI pooling on feature map.
    int32_t poolingW;          //!< Width of the output in pixels after ROI pooling on feature map.
    int32_t featureStride;     //!< Feature stride; ratio of input image size to feature map size.
                               //!< Assuming that max pooling layers in the neural network use square filters.
    int32_t preNmsTop;         //!< Number of proposals to keep before applying NMS.
    int32_t nmsMaxOut;         //!< Number of remaining proposals after applying NMS.
    int32_t anchorsRatioCount; //!< Number of anchor box ratios.
    int32_t anchorsScaleCount; //!< Number of anchor box scales.
    float iouThreshold;        //!< IoU (Intersection over Union) threshold used for the NMS step.
    float minBoxSize;          //!< Minimum allowed bounding box size before scaling, used for anchor box calculation.
    float spatialScale;        //!< Spatial scale between the input image and the last feature map.
};

//!
//! \struct GridAnchorParameters
//!
//! \brief The Anchor Generator plugin layer generates the prior boxes of designated sizes and aspect ratios across all dimensions (H x W).
//! GridAnchorParameters defines a set of parameters for creating the plugin layer for all feature maps.
//!
struct GridAnchorParameters
{
    float minSize;           //!< Scale of anchors corresponding to finest resolution.
    float maxSize;           //!< Scale of anchors corresponding to coarsest resolution.
    float* aspectRatios;     //!< List of aspect ratios to place on each grid point.
    int32_t numAspectRatios; //!< Number of elements in aspectRatios.
    int32_t H;               //!< Height of feature map to generate anchors for.
    int32_t W;               //!< Width of feature map to generate anchors for.
    float variance[4];       //!< Variance for adjusting the prior boxes.
};

//!
//! \enum CodeTypeSSD
//!
//! \brief The type of encoding used for decoding the bounding boxes and loc_data.
//!
//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
//!
enum class CodeTypeSSD : int32_t
{
    CORNER TRT_DEPRECATED_ENUM = 0,      //!< Use box corners.
    CENTER_SIZE TRT_DEPRECATED_ENUM = 1, //!< Use box centers and size.
    CORNER_SIZE TRT_DEPRECATED_ENUM = 2, //!< Use box centers and size.
    TF_CENTER TRT_DEPRECATED_ENUM = 3    //!< Use box centers and size but flip x and y coordinates.
};

//!
//! \struct DetectionOutputParameters
//!
//! \brief The DetectionOutput plugin layer generates the detection output
//! based on location and confidence predictions by doing non maximum suppression.
//!
//! This plugin first decodes the bounding boxes based on the anchors generated.
//! It then performs non_max_suppression on the decoded bounding boxes.
//! DetectionOutputParameters defines a set of parameters for creating the DetectionOutput plugin layer.
//!
//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
//!
struct TRT_DEPRECATED DetectionOutputParameters
{
    bool shareLocation;           //!< If true, bounding box are shared among different classes.
    bool varianceEncodedInTarget; //!< If true, variance is encoded in target.
                                  //!< Otherwise we need to adjust the predicted offset accordingly.
    int32_t backgroundLabelId;    //!< Background label ID. If there is no background class, set it as -1.
    int32_t numClasses;           //!< Number of classes to be predicted.
    int32_t topK;                 //!< Number of boxes per image with top confidence scores that are fed
                                  //!< into the NMS algorithm.
    int32_t keepTopK;             //!< Number of total bounding boxes to be kept per image after NMS step.
    float confidenceThreshold;    //!< Only consider detections whose confidences are larger than a threshold.
    float nmsThreshold;           //!< Threshold to be used in NMS.
    CodeTypeSSD codeType;         //!< Type of coding method for bbox.
    int32_t inputOrder[3];        //!< Specifies the order of inputs {loc_data, conf_data, priorbox_data}.
    bool confSigmoid;             //!< Set to true to calculate sigmoid of confidence scores.
    bool isNormalized;            //!< Set to true if bounding box data is normalized by the network.
    bool isBatchAgnostic{true};   //!< Defaults to true. Set to false if prior boxes are unique per batch.
};

//!
//! \brief When performing yolo9000, softmaxTree is helping to do softmax on confidence scores,
//! for element to get the precise classification through word-tree structured classification definition.
//!
struct softmaxTree
{
    int32_t* leaf;
    int32_t n;
    int32_t* parent;
    int32_t* child;
    int32_t* group;
    char** name;
    int32_t groups;
    int32_t* groupSize;
    int32_t* groupOffset;
};

//!
//! \brief The Region plugin layer performs region proposal calculation.
//!
//! Generate 5 bounding boxes per cell (for yolo9000, generate 3 bounding boxes per cell).
//! For each box, calculating its probabilities of objects detections from 80 pre-defined classifications
//! (yolo9000 has 9418 pre-defined classifications, and these 9418 items are organized as work-tree structure).
//! RegionParameters defines a set of parameters for creating the Region plugin layer.
//!
struct RegionParameters
{
    int32_t num;         //!< Number of predicted bounding box for each grid cell.
    int32_t coords;      //!< Number of coordinates for a bounding box.
    int32_t classes;     //!< Number of classifications to be predicted.
    softmaxTree* smTree; //!< Helping structure to do softmax on confidence scores.
};

//!
//! \brief The NMSParameters are used by the BatchedNMSPlugin for performing
//! the non_max_suppression operation over boxes for object detection networks.
//!
//! \deprecated Deprecated in TensorRT 10.0. BatchedNMSPlugin plugin is deprecated.
//!
struct TRT_DEPRECATED NMSParameters
{
    bool shareLocation;        //!< If set to true, the boxes inputs are shared across all classes.
                               //!< If set to false, the boxes input should account for per class box data.
    int32_t backgroundLabelId; //!< Label ID for the background class.
                               //!< If there is no background class, set it as -1
    int32_t numClasses;        //!< Number of classes in the network.
    int32_t topK;              //!< Number of bounding boxes to be fed into the NMS step.
    int32_t keepTopK;          //!< Number of total bounding boxes to be kept per image after NMS step.
                               //!< Should be less than or equal to the topK value.
    float scoreThreshold;      //!< Scalar threshold for score (low scoring boxes are removed).
    float iouThreshold;        //!< A scalar threshold for IOU (new boxes that have high IOU overlap
                               //!< with previously selected boxes are removed).
    bool isNormalized;         //!< Set to false, if the box coordinates are not normalized,
                               //!< i.e. not in the range [0,1]. Defaults to false.
};

} // namespace plugin
} // namespace nvinfer1

#endif // NV_INFER_PLUGIN_UTILS_H