/
onnx2trt.py
121 lines (104 loc) · 4.78 KB
/
onnx2trt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import tensorrt as trt
from .base import TRTModel
from ..calibrators import EntropyCalibrator2
from ..datasets import CustomDataset
from .. import utils
def onnx2trt(
model,
log_level='ERROR',
max_batch_size=1,
min_input_shapes=None,
max_input_shapes=None,
max_workspace_size=1,
fp16_mode=False,
strict_type_constraints=False,
int8_mode=False,
int8_calibrator=None):
"""build TensorRT model from Onnx model.
Args:
model (string or io object): Onnx model name
log_level (string, default is ERROR): TensorRT logger level, now
INTERNAL_ERROR, ERROR, WARNING, INFO, VERBOSE are support.
max_batch_size (int, default=1): The maximum batch size which can be
used at execution time, and also the batch size for which the
ICudaEngine will be optimized.
min_input_shapes (list, default is None): Minimum input shapes, should
be provided when shape is dynamic. For example, [(3, 224, 224)] is
for only one input.
max_input_shapes (list, default is None): Maximum input shapes, should
be provided when shape is dynamic. For example, [(3, 224, 224)] is
for only one input.
max_workspace_size (int, default is 1): The maximum GPU temporary
memory which the ICudaEngine can use at execution time. default is
1GB.
fp16_mode (bool, default is False): Whether or not 16-bit kernels are
permitted. During engine build fp16 kernels will also be tried when
this mode is enabled.
strict_type_constraints (bool, default is False): When strict type
constraints is set, TensorRT will choose the type constraints that
conforms to type constraints. If the flag is not enabled higher
precision implementation may be chosen if it results in higher
performance.
int8_mode (bool, default is False): Whether Int8 mode is used.
int8_calibrator (volksdep.calibrators.base.BaseCalibrator,
default is None): calibrator for int8 mode, if None, default
calibrator will be used as calibration data.
"""
logger = trt.Logger(getattr(trt.Logger, log_level))
builder = trt.Builder(logger)
network = builder.create_network(
1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, logger)
if isinstance(model, str):
with open(model, 'rb') as f:
flag = parser.parse(f.read())
else:
flag = parser.parse(model.read())
if not flag:
for error in range(parser.num_errors):
print(parser.get_error(error))
# re-order output tensor
output_tensors = [network.get_output(i)
for i in range(network.num_outputs)]
[network.unmark_output(tensor) for tensor in output_tensors]
for tensor in output_tensors:
identity_out_tensor = network.add_identity(tensor).get_output(0)
identity_out_tensor.name = 'identity_{}'.format(tensor.name)
network.mark_output(tensor=identity_out_tensor)
builder.max_batch_size = max_batch_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size * (1 << 25)
if fp16_mode:
config.set_flag(trt.BuilderFlag.FP16)
if strict_type_constraints:
config.set_flag(trt.BuilderFlag.STRICT_TYPES)
if int8_mode:
config.set_flag(trt.BuilderFlag.INT8)
if int8_calibrator is None:
shapes = [(1,) + network.get_input(i).shape[1:]
for i in range(network.num_inputs)]
dummy_data = utils.gen_ones(shapes)
int8_calibrator = EntropyCalibrator2(CustomDataset(dummy_data))
config.int8_calibrator = int8_calibrator
# set dynamic shape profile
assert not (bool(min_input_shapes) ^ bool(max_input_shapes))
profile = builder.create_optimization_profile()
input_shapes = [network.get_input(i).shape[1:]
for i in range(network.num_inputs)]
if not min_input_shapes:
min_input_shapes = input_shapes
if not max_input_shapes:
max_input_shapes = input_shapes
assert len(min_input_shapes) == len(max_input_shapes) == len(input_shapes)
for i in range(network.num_inputs):
tensor = network.get_input(i)
name = tensor.name
min_shape = (1,) + min_input_shapes[i]
max_shape = (max_batch_size,) + max_input_shapes[i]
opt_shape = [(min_ + max_) // 2
for min_, max_ in zip(min_shape, max_shape)]
profile.set_shape(name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
engine = builder.build_engine(network, config)
trt_model = TRTModel(engine)
return trt_model