/
NvInferPluginUtils.h
204 lines (190 loc) · 9.91 KB
/
NvInferPluginUtils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NV_INFER_PLUGIN_UTILS_H
#define NV_INFER_PLUGIN_UTILS_H
#include "NvInferRuntimeCommon.h"
//!
//! \file NvInferPluginUtils.h
//!
//! This is the API for the Nvidia provided TensorRT plugin utilities.
//! It lists all the parameters utilized by the TensorRT plugins.
//!
namespace nvinfer1
{
namespace plugin
{
//!
//! \struct PriorBoxParameters
//!
//! \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
//! dimensions (H x W).
//!
//! PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer.
//!
struct PriorBoxParameters
{
float *minSize; //!< Minimum box size in pixels. Can not be nullptr.
float *maxSize; //!< Maximum box size in pixels. Can be nullptr.
float *aspectRatios; //!< Aspect ratios of the boxes. Can be nullptr.
int32_t numMinSize; //!< Number of elements in minSize. Must be larger than 0.
int32_t numMaxSize; //!< Number of elements in maxSize. Can be 0 or same as numMinSize.
int32_t numAspectRatios; //!< Number of elements in aspectRatios. Can be 0.
bool flip; //!< If true, will flip each aspect ratio. For example,
//!< if there is an aspect ratio "r", the aspect ratio "1.0/r" will be generated as well.
bool clip; //!< If true, will clip the prior so that it is within [0,1].
float variance[4]; //!< Variance for adjusting the prior boxes.
int32_t imgH; //!< Image height. If 0, then the H dimension of the data tensor will be used.
int32_t imgW; //!< Image width. If 0, then the W dimension of the data tensor will be used.
float stepH; //!< Step in H. If 0, then (float)imgH/h will be used where h is the H dimension of the 1st input tensor.
float stepW; //!< Step in W. If 0, then (float)imgW/w will be used where w is the W dimension of the 1st input tensor.
float offset; //!< Offset to the top left corner of each cell.
};
//!
//! \struct RPROIParams
//!
//! \brief RPROIParams is used to create the RPROIPlugin instance.
//!
struct RPROIParams
{
int32_t poolingH; //!< Height of the output in pixels after ROI pooling on feature map.
int32_t poolingW; //!< Width of the output in pixels after ROI pooling on feature map.
int32_t featureStride; //!< Feature stride; ratio of input image size to feature map size.
//!< Assuming that max pooling layers in the neural network use square filters.
int32_t preNmsTop; //!< Number of proposals to keep before applying NMS.
int32_t nmsMaxOut; //!< Number of remaining proposals after applying NMS.
int32_t anchorsRatioCount; //!< Number of anchor box ratios.
int32_t anchorsScaleCount; //!< Number of anchor box scales.
float iouThreshold; //!< IoU (Intersection over Union) threshold used for the NMS step.
float minBoxSize; //!< Minimum allowed bounding box size before scaling, used for anchor box calculation.
float spatialScale; //!< Spatial scale between the input image and the last feature map.
};
//!
//! \struct GridAnchorParameters
//!
//! \brief The Anchor Generator plugin layer generates the prior boxes of designated sizes and aspect ratios across all dimensions (H x W).
//! GridAnchorParameters defines a set of parameters for creating the plugin layer for all feature maps.
//!
struct GridAnchorParameters
{
float minSize; //!< Scale of anchors corresponding to finest resolution.
float maxSize; //!< Scale of anchors corresponding to coarsest resolution.
float* aspectRatios; //!< List of aspect ratios to place on each grid point.
int32_t numAspectRatios; //!< Number of elements in aspectRatios.
int32_t H; //!< Height of feature map to generate anchors for.
int32_t W; //!< Width of feature map to generate anchors for.
float variance[4]; //!< Variance for adjusting the prior boxes.
};
//!
//! \enum CodeTypeSSD
//!
//! \brief The type of encoding used for decoding the bounding boxes and loc_data.
//!
//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
//!
enum class CodeTypeSSD : int32_t
{
CORNER TRT_DEPRECATED_ENUM = 0, //!< Use box corners.
CENTER_SIZE TRT_DEPRECATED_ENUM = 1, //!< Use box centers and size.
CORNER_SIZE TRT_DEPRECATED_ENUM = 2, //!< Use box centers and size.
TF_CENTER TRT_DEPRECATED_ENUM = 3 //!< Use box centers and size but flip x and y coordinates.
};
//!
//! \struct DetectionOutputParameters
//!
//! \brief The DetectionOutput plugin layer generates the detection output
//! based on location and confidence predictions by doing non maximum suppression.
//!
//! This plugin first decodes the bounding boxes based on the anchors generated.
//! It then performs non_max_suppression on the decoded bounding boxes.
//! DetectionOutputParameters defines a set of parameters for creating the DetectionOutput plugin layer.
//!
//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
//!
struct TRT_DEPRECATED DetectionOutputParameters
{
bool shareLocation; //!< If true, bounding box are shared among different classes.
bool varianceEncodedInTarget; //!< If true, variance is encoded in target.
//!< Otherwise we need to adjust the predicted offset accordingly.
int32_t backgroundLabelId; //!< Background label ID. If there is no background class, set it as -1.
int32_t numClasses; //!< Number of classes to be predicted.
int32_t topK; //!< Number of boxes per image with top confidence scores that are fed
//!< into the NMS algorithm.
int32_t keepTopK; //!< Number of total bounding boxes to be kept per image after NMS step.
float confidenceThreshold; //!< Only consider detections whose confidences are larger than a threshold.
float nmsThreshold; //!< Threshold to be used in NMS.
CodeTypeSSD codeType; //!< Type of coding method for bbox.
int32_t inputOrder[3]; //!< Specifies the order of inputs {loc_data, conf_data, priorbox_data}.
bool confSigmoid; //!< Set to true to calculate sigmoid of confidence scores.
bool isNormalized; //!< Set to true if bounding box data is normalized by the network.
bool isBatchAgnostic{true}; //!< Defaults to true. Set to false if prior boxes are unique per batch.
};
//!
//! \brief When performing yolo9000, softmaxTree is helping to do softmax on confidence scores,
//! for element to get the precise classification through word-tree structured classification definition.
//!
struct softmaxTree
{
int32_t* leaf;
int32_t n;
int32_t* parent;
int32_t* child;
int32_t* group;
char** name;
int32_t groups;
int32_t* groupSize;
int32_t* groupOffset;
};
//!
//! \brief The Region plugin layer performs region proposal calculation.
//!
//! Generate 5 bounding boxes per cell (for yolo9000, generate 3 bounding boxes per cell).
//! For each box, calculating its probabilities of objects detections from 80 pre-defined classifications
//! (yolo9000 has 9418 pre-defined classifications, and these 9418 items are organized as work-tree structure).
//! RegionParameters defines a set of parameters for creating the Region plugin layer.
//!
struct RegionParameters
{
int32_t num; //!< Number of predicted bounding box for each grid cell.
int32_t coords; //!< Number of coordinates for a bounding box.
int32_t classes; //!< Number of classifications to be predicted.
softmaxTree* smTree; //!< Helping structure to do softmax on confidence scores.
};
//!
//! \brief The NMSParameters are used by the BatchedNMSPlugin for performing
//! the non_max_suppression operation over boxes for object detection networks.
//!
//! \deprecated Deprecated in TensorRT 10.0. BatchedNMSPlugin plugin is deprecated.
//!
struct TRT_DEPRECATED NMSParameters
{
bool shareLocation; //!< If set to true, the boxes inputs are shared across all classes.
//!< If set to false, the boxes input should account for per class box data.
int32_t backgroundLabelId; //!< Label ID for the background class.
//!< If there is no background class, set it as -1
int32_t numClasses; //!< Number of classes in the network.
int32_t topK; //!< Number of bounding boxes to be fed into the NMS step.
int32_t keepTopK; //!< Number of total bounding boxes to be kept per image after NMS step.
//!< Should be less than or equal to the topK value.
float scoreThreshold; //!< Scalar threshold for score (low scoring boxes are removed).
float iouThreshold; //!< A scalar threshold for IOU (new boxes that have high IOU overlap
//!< with previously selected boxes are removed).
bool isNormalized; //!< Set to false, if the box coordinates are not normalized,
//!< i.e. not in the range [0,1]. Defaults to false.
};
} // namespace plugin
} // namespace nvinfer1
#endif // NV_INFER_PLUGIN_UTILS_H