/
NvInferRuntimePlugin.h
1093 lines (1025 loc) · 46.3 KB
/
NvInferRuntimePlugin.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NV_INFER_RUNTIME_PLUGIN_H
#define NV_INFER_RUNTIME_PLUGIN_H
#include "NvInferRuntimeBase.h"
//!
//! \file NvInferRuntimePlugin.h
//!
//! This file contains common definitions, data structures and interfaces that relate to plugins and are shared
//! between the standard and safe runtime.
//!
//! \warning Do not directly include this file. Instead include either NvInferRuntime.h (for the standard runtime) or
//! NvInferSafeRuntime.h (for the safety runtime).
//!
//!
//! \namespace nvinfer1
//!
//! \brief The TensorRT API version 1 namespace.
//!
namespace nvinfer1
{
//!
//! \brief PluginFormat is reserved for backward compatibility.
//!
//! \see IPluginV2::supportsFormat()
//!
using PluginFormat = TensorFormat;
//!
//! \brief Bit at the plugin version to identify that it is a plugin.
//!
static constexpr int32_t kPLUGIN_VERSION_PYTHON_BIT = 0x40;
//!
//! \struct PluginTensorDesc
//!
//! \brief Fields that a plugin might see for an input or output.
//!
//! Scale is only valid when data type is DataType::kINT8. TensorRT will set
//! the value to -1.0F if it is invalid.
//!
//! \see IPluginV2IOExt::supportsFormatCombination
//! \see IPluginV2IOExt::configurePlugin
//!
struct PluginTensorDesc
{
//! Dimensions.
Dims dims;
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
DataType type;
//! Tensor format.
TensorFormat format;
//! Scale for INT8 data type.
float scale;
};
//!
//! \struct PluginVersion
//!
//! \brief Definition of plugin versions.
//!
//! Tag for plug-in versions. Used in upper byte of getTensorRTVersion().
//!
enum class PluginVersion : uint8_t
{
//! IPluginV2
kV2 = 0,
//! IPluginV2Ext
kV2_EXT = 1,
//! IPluginV2IOExt
kV2_IOEXT = 2,
//! IPluginV2DynamicExt
kV2_DYNAMICEXT = 3,
//! IPluginV2DynamicExt-based Python plugins
kV2_DYNAMICEXT_PYTHON = kPLUGIN_VERSION_PYTHON_BIT | 3
};
//!
//! \enum PluginCreatorVersion
//!
//! \brief Enum to identify version of the plugin creator.
//!
enum class PluginCreatorVersion : int32_t
{
//! IPluginCreator
kV1 = 0,
//! IPluginCreator-based Python plugin creators
kV1_PYTHON = kPLUGIN_VERSION_PYTHON_BIT
};
//!
//! \class IPluginV2
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. When
//! combined with IPluginCreator it provides a mechanism to register plugins and
//! look up the Plugin Registry during de-serialization.
//!
//! \see IPluginCreator
//! \see IPluginRegistry
//!
//! \deprecated Deprecated in TensorRT 8.5. Implement IPluginV2DynamicExt or IPluginV2IOExt depending on your
//! requirement.
//!
class TRT_DEPRECATED IPluginV2
{
public:
//!
//! \brief Return the API version with which this plugin was built.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \return The TensorRT version in the format (major * 100 + minor) * 100 + patch.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
//!
virtual int32_t getTensorRTVersion() const noexcept
{
return NV_TENSORRT_VERSION;
}
//!
//! \brief Return the plugin type. Should match the plugin name returned by the corresponding plugin creator
//!
//! \see IPluginCreator::getPluginName()
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual AsciiChar const* getPluginType() const noexcept = 0;
//!
//! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator
//!
//! \see IPluginCreator::getPluginVersion()
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual AsciiChar const* getPluginVersion() const noexcept = 0;
//!
//! \brief Get the number of outputs from the layer.
//!
//! \return The number of outputs, which is a positive integer.
//!
//! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
//! prior to any call to initialize().
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual int32_t getNbOutputs() const noexcept = 0;
//!
//! \brief Get the dimension of an output tensor.
//!
//! \param index The index of the output tensor. Will lie in the valid range (between 0 and getNbOutputs()-1
//! inclusive).
//! \param inputs The input tensor dimensions. Will be the start address of a Dims array of length nbInputDims.
//! \param nbInputDims The number of input tensors. Will be a non-negative integer.
//!
//! \return The output tensor dimensions if the index is in the valid range.
//! An invalid value of Dims{-1, {}} must be returned if the index is not in the valid range.
//!
//! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
//! prior to any call to initialize().
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
//! \note In any non-IPluginV2DynamicExt plugin, batch size must not be included in the returned dimensions,
//! even if the plugin is expected to be run in a network with explicit batch mode enabled.
//! Please see the TensorRT Developer Guide for more details on how plugin inputs and outputs behave.
//!
virtual Dims getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept = 0;
//!
//! \brief Check format support.
//!
//! \param type DataType requested.
//! \param format PluginFormat requested.
//!
//! \return true if the plugin supports the type-format combination.
//!
//! This function is called by the implementations of INetworkDefinition, IBuilder, and
//! safe::ICudaEngine/ICudaEngine. In particular, it is called when creating an engine and when deserializing an
//! engine.
//!
//! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
//! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use PluginV2IOExt
//! or PluginV2DynamicExt for other PluginFormats.
//!
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0;
//!
//! \brief Configure the layer.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of its weights, dimensions, and maximum batch size.
//!
//! \param inputDims The input tensor dimensions. Will be the start address of a Dims array of length nbInputs.
//! \param nbInputs The number of inputs. Will be a non-negative integer.
//! \param outputDims The output tensor dimensions. Will be the start address of a Dims array of length nbOutputs.
//! \param nbOutputs The number of outputs. Will be a positive integer identical to the return value of
//! getNbOutputs().
//! \param type The data type selected for the engine.
//! \param format The format selected for the engine.
//! \param maxBatchSize The maximum batch size. Will be a positive integer.
//!
//! The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
//! 3-dimensional CHW dimensions).
//!
//! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
//! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use PluginV2IOExt
//! or PluginV2DynamicExt for other PluginFormats.
//!
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
//!
//! \see clone()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual void configureWithFormat(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
DataType type, PluginFormat format, int32_t maxBatchSize) noexcept
= 0;
//!
//! \brief Initialize the layer for execution. This is called when the engine is created.
//!
//! \return 0 for success, else non-zero (which will cause engine termination).
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when using multiple
//! execution contexts using this plugin.
//!
virtual int32_t initialize() noexcept = 0;
//!
//! \brief Release resources acquired during plugin layer initialization. This is called when the engine is
//! destroyed.
//!
//! \see initialize()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when using multiple
//! execution contexts using this plugin. However, TensorRT will not call this method from
//! two threads simultaneously on a given clone of a plugin.
//!
virtual void terminate() noexcept = 0;
//!
//! \brief Find the workspace size required by the layer.
//!
//! This function is called during engine startup, after initialize(). The workspace size returned must be
//! sufficient for any batch size up to the maximum.
//!
//! \param maxBatchSize The maximum batch size, which will be a positive integer.
//!
//! \return The workspace size in bytes, i.e. the device memory size that the plugin requires for its internal
//! computations.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept = 0;
//!
//! \brief Execute the layer.
//!
//! \param batchSize The number of inputs in the batch.
//! \param inputs The memory for the input tensors. Will be an array of device addresses corresponding to input
//! tensors of length nbInputs, where nbInputs is the second parameter passed to configureWithFormat().
//! The i-th input tensor will have the dimensions inputDims[i], where inputDims is the first parameter
//! that was passed to configureWithFormat().
//! \param outputs The memory for the output tensors. Will be an array of device addresses corresponding to output
//! tensors of length getNbOutputs().
//! \param workspace Workspace for execution. Will be the start address of a device buffer whose length will be at
//! least getWorkspaceSize(batchSize).
//! \param stream The stream in which to execute the kernels. This will be a valid CUDA stream.
//!
//! \return 0 for success, else non-zero (which will cause engine termination).
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when multiple execution contexts are used during runtime.
//!
virtual int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) noexcept
= 0;
//!
//! \brief Find the size of the serialization buffer required to store the plugin configuration in a binary file.
//!
//! \return The size of the serialization buffer in bytes.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual size_t getSerializationSize() const noexcept = 0;
//!
//! \brief Serialize the layer.
//!
//! \param buffer A pointer to a host buffer to serialize data. Size of buffer will be at least as large as the
//! value returned by getSerializationSize.
//!
//! \see getSerializationSize()
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void serialize(void* buffer) const noexcept = 0;
//!
//! \brief Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void destroy() noexcept = 0;
//!
//! \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with
//! these parameters.
//!
//! The TensorRT runtime calls clone() to clone the plugin when an execution context is created for an engine,
//! after the engine has been created. The runtime does not call initialize() on the cloned plugin,
//! so the cloned plugin must be created in an initialized state.
//!
//! \return A cloned plugin object in an initialized state with the same parameters as the current object.
//! nullptr must be returned if the cloning fails, e.g. because of resource exhaustion.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when creating multiple
//! execution contexts.
//!
virtual IPluginV2* clone() const noexcept = 0;
//!
//! \brief Set the namespace that this plugin object belongs to. Ideally, all plugin
//! objects from the same plugin library must have the same namespace.
//!
//! \param pluginNamespace The namespace for the plugin object.
//!
//! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including the
//! NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;
//!
//! \brief Return the namespace of the plugin object.
//!
//! \return The namespace string that was passed to setPluginNamespace(), possibly after truncation to 1024 bytes
//! if a longer string was passed. An empty string must be returned as default value.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
// @cond SuppressDoxyWarnings
IPluginV2() = default;
virtual ~IPluginV2() noexcept = default;
// @endcond
protected:
// @cond SuppressDoxyWarnings
IPluginV2(IPluginV2 const&) = default;
IPluginV2(IPluginV2&&) = default;
IPluginV2& operator=(IPluginV2 const&) & = default;
IPluginV2& operator=(IPluginV2&&) & = default;
// @endcond
};
//!
//! \class IPluginV2Ext
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. This
//! interface provides additional capabilities to the IPluginV2 interface by
//! supporting different output data types and broadcast across batches.
//!
//! \see IPluginV2
//!
//! \deprecated Deprecated in TensorRT 8.5. Implement IPluginV2DynamicExt or IPluginV2IOExt depending on your
//! requirement.
//!
class TRT_DEPRECATED IPluginV2Ext : public IPluginV2
{
public:
//!
//! \brief Return the DataType of the plugin output at the requested index.
//!
//! \param index The output tensor index in the valid range between 0 and getNbOutputs()-1.
//! \param inputTypes The data types of the input tensors, stored in an array of length nbInputs.
//! \param nbInputs The number of input tensors. Will be a non-negative integer.
//!
//! \return The data type of the output tensor with the provided index if the input tensors have the data types
//! provided in inputTypes, provided the output tensor index is in the valid range. DataType::kFLOAT must be
//! returned if the index is not in the valid range.
//!
//! The default behavior must be to return the type of the first input, or DataType::kFLOAT if the layer has no
//! inputs. The returned data type must have a format that is supported by the plugin.
//!
//! \see supportsFormat()
//!
//! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual nvinfer1::DataType getOutputDataType(
int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept
= 0;
//!
//! \brief Return true if the output tensor is broadcast across a batch.
//!
//! \param outputIndex The index of the output tensor, which will be in the valid range between 0 and
//! nbOutputs()-1.
//! \param inputIsBroadcasted A boolean array of length nbInputs. The i-th element will be true if and only if
//! the tensor for the ith input is broadcast across a batch.
//! \param nbInputs The number of inputs. Will be a non-negative integer.
//!
//! The values in inputIsBroadcasted refer to broadcasting at the semantic level,
//! i.e. are unaffected by whether method canBroadcastInputAcrossBatch requests
//! physical replication of the values.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
//!
TRT_DEPRECATED virtual bool isOutputBroadcastAcrossBatch(
int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept
= 0;
//!
//! \brief Return true if the plugin can use an input tensor that is broadcast across batch without replication.
//!
//! \param inputIndex Index of input that could be broadcast. Will be in the valid range between 0 and
//! nbInputs - 1 where nbInputs is the maximum number of input tensors supported by this plugin.
//!
//! \return true if the index is in the valid range and the plugin is able to broadcast a single copy of this
//! input tensor across the batch. False otherwise.
//!
//! For each input whose tensor is semantically broadcast across a batch,
//! TensorRT calls this method before calling configurePlugin.
//! If canBroadcastInputAcrossBatch returns true, TensorRT will not replicate the input tensor;
//! i.e., there will be a single copy that the plugin must share across the batch.
//! If it returns false, TensorRT will replicate the input tensor
//! so that it appears like a non-broadcasted tensor.
//!
//! This method is called only for inputs that can be broadcast.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
//! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
//!
TRT_DEPRECATED virtual bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept = 0;
//!
//! \brief Configure the layer with input and output data types.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of its weights, dimensions, data types and maximum batch size.
//!
//! \param inputDims The input tensor dimensions. Will be an array of length nbInputs.
//! \param nbInputs The number of inputs. Will be a non-negative integer.
//! \param outputDims The output tensor dimensions. Will be an array of length nbOutputs.
//! \param nbOutputs The number of outputs. Will be a positive integer.
//! \param inputTypes The data types selected for the plugin inputs. Will be an array of length nbInputs.
//! \param outputTypes The data types selected for the plugin outputs. Will be an array of length nbOutputs.
//! \param inputIsBroadcast True for each input that the plugin must broadcast across the batch.
//! Will be an array of length nbInputs.
//! \param outputIsBroadcast True for each output that TensorRT will broadcast across the batch.
//! Will be an array of length nbOutputs.
//! \param floatFormat The format selected for the engine for the floating point inputs/outputs.
//! \param maxBatchSize The maximum batch size. Will be a positive integer.
//!
//! The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
//! 3-dimensional CHW dimensions). When inputIsBroadcast or outputIsBroadcast is true, the outermost batch size for
//! that input or output must be treated as if it is one.
//! Index 'i' of inputIsBroadcast is true only if the input is semantically broadcast across the batch and
//! calling canBroadcastInputAcrossBatch with argument 'i' returns true.
//! Index 'i' of outputIsBroadcast is true only if calling isOutputBroadcastAcrossBatch with argument 'i'
//! returns true.
//!
//! \warning for the floatFormat field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and
//! PluginFormat::kCHW32 will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use
//! PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual void configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast,
bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept
= 0;
IPluginV2Ext() = default;
~IPluginV2Ext() override = default;
//!
//! \brief Attach the plugin object to an execution context and grant the plugin the access to some context
//! resources.
//!
//! \param cudnn The cuDNN context handle of the execution context. Will be a valid cuDNN context handle, or
//! nullptr if TacticSource::kCUDNN is disabled.
//! \param cublas The cuBLAS context handle of the execution context. Will be a valid cuBLAS context handle, or
//! nullptr if TacticSource::kCUBLAS is disabled.
//! \param allocator The allocator used by the execution context
//!
//! This function is called automatically for each plugin when a new execution context is created. If the context
//! was created without resources, this method is not called until the resources are assigned. It is also called if
//! new resources are assigned to the context.
//!
//! If the plugin needs per-context resource, it can be allocated here.
//! The plugin can also get context-owned cuDNN and cuBLAS context here.
//!
//! \note The TacticSource::kCUDNN and TacticSource::kCUBLAS flag is disabled by default.
//! The allocator pointer is unique to each building or execution context instance having overlapping lifetimes.
//! It can be used as a key to manage resources across plugin instances sharing the same context.
//! Plugins attached to different contexts will have different handles as their execution will not overlap.
//!
//! \see TacticSources
//! \see getPluginCudnnHandle(void* executionContextIdentifier)
//! \see getPluginCublasHandle(void* excecutionContextIdentifier)
//!
//! \note In the automotive safety context, the cuDNN and cuBLAS parameters will be nullptr because cuDNN and cuBLAS
//! are not used by the safe runtime.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void attachToContext(
cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*allocator*/) noexcept
{
}
//!
//! \brief Detach the plugin object from its execution context.
//!
//! This function is called automatically for each plugin when an execution context is destroyed or the context
//! resources are unassigned from the context.
//!
//! If the plugin owns per-context resource, it can be released here.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual void detachFromContext() noexcept {}
//!
//! \brief Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin
//! object with these parameters. If the source plugin is pre-configured with configurePlugin(), the returned object
//! must also be pre-configured. The returned object must allow attachToContext() with a new execution context.
//! Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object
//! (e.g. via ref-counting) to avoid duplication.
//!
//! \return A pointer to a cloned plugin object if cloning was successful, otherwise nullptr.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
IPluginV2Ext* clone() const noexcept override = 0;
protected:
// @cond SuppressDoxyWarnings
IPluginV2Ext(IPluginV2Ext const&) = default;
IPluginV2Ext(IPluginV2Ext&&) = default;
IPluginV2Ext& operator=(IPluginV2Ext const&) & = default;
IPluginV2Ext& operator=(IPluginV2Ext&&) & = default;
// @endcond
//!
//! \brief Return the API version with which this plugin was built. The
//! upper byte reserved by TensorRT and is used to differentiate this from IPluginV2.
//!
//! \return In the lower three bytes, the TensorRT version in the format
//! (major * 100 + minor) * 100 + patch.
//! In the upper byte, the value 1.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
//!
int32_t getTensorRTVersion() const noexcept override
{
return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_EXT) << 24U)
| (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));
}
//!
//! \brief Derived classes must not implement this. In a C++11 API it would be override final.
//!
//! IPluginV2Ext::configureWithFormat() is a NOP operation for all classes derived from IPluginV2Ext.
//! These classes call configurePlugin() instead.
//!
void configureWithFormat(Dims const* /*inputDims*/, int32_t /*nbInputs*/, Dims const* /*outputDims*/,
int32_t /*nbOutputs*/, DataType /*type*/, PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept override
{
}
};
//!
//! \class IPluginV2IOExt
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. This interface provides additional
//! capabilities to the IPluginV2Ext interface by extending different I/O data types and tensor formats.
//!
//! \see IPluginV2Ext
//!
//! \deprecated Deprecated in TensorRT 10.0.
//!
class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext
{
public:
//!
//! \brief Configure the layer.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of the provided I/O PluginTensorDesc.
//!
//! \param in The input tensors attributes that are used for configuration.
//! \param nbInput Number of input tensors.
//! \param out The output tensors attributes that are used for configuration.
//! \param nbOutput Number of output tensors.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin. However, TensorRT
//! will not call this method from two threads simultaneously on a given clone of a plugin.
//!
virtual void configurePlugin(
PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) noexcept
= 0;
//!
//! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
//!
//! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
//! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs.
//!
//! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
//! by inOut[pos].format and inOut[pos].type. The override must return true if that format/datatype at inOut[pos]
//! are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
//! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
//! that the plugin supports. The override must not inspect inOut[pos+1..nbInputs+nbOutputs-1],
//! which will have invalid values. In other words, the decision for pos must be based on inOut[0..pos] only.
//!
//! Some examples:
//!
//! * A definition for a plugin that supports only FP16 NCHW:
//!
//! return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
//!
//! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
//! and FP32 NCHW for its single output:
//!
//! return inOut.format[pos] == TensorFormat::kLINEAR &&
//! (inOut.type[pos] == (pos < 2 ? DataType::kHALF : DataType::kFLOAT));
//!
//! * A definition for a "polymorphic" plugin with two inputs and one output that supports
//! any format or type, but the inputs and output must have the same format and type:
//!
//! return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
//!
//! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin.
//!
virtual bool supportsFormatCombination(
int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) const noexcept
= 0;
// @cond SuppressDoxyWarnings
IPluginV2IOExt() = default;
~IPluginV2IOExt() override = default;
// @endcond
protected:
// @cond SuppressDoxyWarnings
IPluginV2IOExt(IPluginV2IOExt const&) = default;
IPluginV2IOExt(IPluginV2IOExt&&) = default;
IPluginV2IOExt& operator=(IPluginV2IOExt const&) & = default;
IPluginV2IOExt& operator=(IPluginV2IOExt&&) & = default;
// @endcond
//!
//! \brief Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and is
//! used to differentiate this from IPluginV2 and IPluginV2Ext.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
//!
int32_t getTensorRTVersion() const noexcept override
{
return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_IOEXT) << 24U)
| (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));
}
private:
// Following are obsolete base class methods, and must not be implemented or used.
//!
//! \brief Set plugin configuration.
//!
void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
bool const*, PluginFormat, int32_t) noexcept final
{
}
//!
//! \brief Check if provided data type is supported.
//!
bool supportsFormat(DataType, PluginFormat) const noexcept final
{
return false;
}
};
//!
//! \enum PluginFieldType
//!
//! \brief The possible field types for custom layer.
//!
enum class PluginFieldType : int32_t
{
//! FP16 field type.
kFLOAT16 = 0,
//! FP32 field type.
kFLOAT32 = 1,
//! FP64 field type.
kFLOAT64 = 2,
//! INT8 field type.
kINT8 = 3,
//! INT16 field type.
kINT16 = 4,
//! INT32 field type.
kINT32 = 5,
//! char field type.
kCHAR = 6,
//! nvinfer1::Dims field type.
kDIMS = 7,
//! Unknown field type.
kUNKNOWN = 8,
//! BF16 field type.
kBF16 = 9,
//! INT64 field type.
kINT64 = 10,
//! FP8 field type.
kFP8 = 11,
};
//!
//! \class PluginField
//!
//! \brief Structure containing plugin attribute field names and associated data
//! This information can be parsed to decode necessary plugin metadata
//!
//!
class PluginField
{
public:
//! Plugin field attribute name
AsciiChar const* name;
//! Plugin field attribute data
void const* data;
//! Plugin field attribute type
PluginFieldType type;
//! Number of data entries in the Plugin attribute
int32_t length;
PluginField(AsciiChar const* const name_ = nullptr, void const* const data_ = nullptr,
PluginFieldType const type_ = PluginFieldType::kUNKNOWN, int32_t const length_ = 0) noexcept
: name(name_)
, data(data_)
, type(type_)
, length(length_)
{
}
};
//!
//! \struct PluginFieldCollection
//!
//! \brief Plugin field collection struct.
//!
struct PluginFieldCollection
{
//! Number of PluginField entries.
int32_t nbFields;
//! Pointer to PluginField entries.
PluginField const* fields;
};
//!
//! \enum PluginCapabilityType
//!
//! \brief Enumerates the different capability types a IPluginV3 object may have
//!
enum class PluginCapabilityType : int32_t
{
//! Core capability. Every IPluginV3 object must have this.
kCORE = 0,
//! Build capability. IPluginV3 objects provided to TensorRT build phase must have this.
kBUILD = 1,
//! Runtime capability. IPluginV3 objects provided to TensorRT build and execution phases must have this.
kRUNTIME = 2
};
//!
//! \enum TensorRTPhase
//!
//! \brief Indicates a phase of operation of TensorRT
//!
enum class TensorRTPhase : int32_t
{
//! Build phase of TensorRT
kBUILD = 0,
//! Execution phase of TensorRT
kRUNTIME = 1
};
namespace v_1_0
{
class IPluginCreatorInterface : public IVersionedInterface
{
public:
~IPluginCreatorInterface() noexcept override = default;
protected:
IPluginCreatorInterface() = default;
IPluginCreatorInterface(IPluginCreatorInterface const&) = default;
IPluginCreatorInterface(IPluginCreatorInterface&&) = default;
IPluginCreatorInterface& operator=(IPluginCreatorInterface const&) & = default;
IPluginCreatorInterface& operator=(IPluginCreatorInterface&&) & = default;
};
class TRT_DEPRECATED IPluginCreator : public IPluginCreatorInterface
{
public:
//!
//! \brief Return the plugin name.
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
//! the NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual AsciiChar const* getPluginName() const noexcept = 0;
//!
//! \brief Return the plugin version.
//!
//! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
//! the NULL terminator.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual AsciiChar const* getPluginVersion() const noexcept = 0;
//!
//! \brief Return a list of fields that need to be passed to createPlugin.
//!
//! \see PluginFieldCollection
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
//!
//! \brief Return a plugin object. Return nullptr in case of error.
//!
//! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
//! \param fc A pointer to a collection of fields needed for constructing the plugin.
//!
//! \usage
//! - Allowed context for the API call
//! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
//! when building networks on multiple devices sharing the same plugin or when deserializing
//! multiple engines concurrently sharing plugins.
//!
virtual IPluginV2* createPlugin(AsciiChar const* name, PluginFieldCollection const* fc) noexcept = 0;