Skip to content

Commit

Permalink
[Paddle-TRT] upgrade EnqueueV2 to EnqueueV3 (#60807)
Browse files Browse the repository at this point in the history
  • Loading branch information
lizexu123 committed Jan 22, 2024
1 parent 8d219cb commit 94c63de
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 36 deletions.
40 changes: 28 additions & 12 deletions paddle/fluid/inference/tensorrt/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/tensorrt/engine.h"

#include <NvInfer.h>
#include <glog/logging.h>

#include <string>

#include "NvInferRuntimeCommon.h"
Expand Down Expand Up @@ -174,11 +172,27 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context,
return cuda_graph_.Launch(stream);
}

#if IS_TRT_VERSION_GE(8500)
for (size_t j = 0; j < buffers->size(); ++j) {
auto name = context->getEngine().getBindingName(j);
if (context->getEngine().isShapeBinding(j) &&
context->getEngine().bindingIsInput(j)) {
continue;
} else {
context->setTensorAddress(name, (*buffers)[j]);
}
}
#endif

bool ret;
if (!with_dynamic_shape()) {
ret = context->enqueue(batch_size, buffers->data(), stream, nullptr);
} else {
#if IS_TRT_VERSION_GE(8500)
ret = context->enqueueV3(stream);
#else
ret = context->enqueueV2(buffers->data(), stream, nullptr);
#endif
}
return ret;
}
Expand Down Expand Up @@ -469,12 +483,12 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer,
"of the network at the same time.",
name));
network()->markOutput(*output);
PADDLE_ENFORCE_EQ(
output->isNetworkOutput(),
true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output of the network.",
name));
PADDLE_ENFORCE_EQ(output->isNetworkOutput(),
true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output "
"of the network.",
name));
}

void TensorRTEngine::DeclareOutput(const std::string &name) {
Expand Down Expand Up @@ -567,8 +581,8 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
trt_in_shape.nbDims = 1;
trt_in_shape.d[0] = 1;
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
// In fact , this is not always right, because we can't determine if the
// 0th dimension is batch. Just for run chenqu's model
if (!with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
Expand Down Expand Up @@ -626,8 +640,10 @@ void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) {
infer_engine_,
platform::errors::Fatal(
"Building TRT cuda engine failed when deserializing engine info. "
"Please check:\n1. Your TRT serialization is generated and loaded "
"on the same GPU architecture;\n2. The Paddle Inference version of "
"Please check:\n1. Your TRT serialization is generated and "
"loaded "
"on the same GPU architecture;\n2. The Paddle Inference version "
"of "
"generating serialization file and doing inference are "
"consistent."));

Expand Down
22 changes: 21 additions & 1 deletion paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,25 +131,45 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
std::vector<int> shape_v = {8, 8, 4};
PrepareInputOutput(x_v, {8, 8, 4});
PrepareShapeInput(shape_v);
#if IS_TRT_VERSION_GE(8500)
engine_->context()->setInputShape("input", nvinfer1::Dims2{8, 32});
#else
engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32});
engine_->context()->setBindingDimensions(1, shape_dim);
engine_->context()->setInputShapeBinding(1, shape_v.data());

#endif
auto *x_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *shape_gpu_data = shape_.mutable_data<int>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

buffers[0] = reinterpret_cast<void *>(x_gpu_data);
buffers[1] = reinterpret_cast<void *>(shape_gpu_data);
buffers[2] = reinterpret_cast<void *>(y_gpu_data);
#if IS_TRT_VERSION_GE(8500)
for (size_t i = 0; i < buffers.size(); i++) {
auto name = engine_->engine()->getBindingName(i);
if (engine_->engine()->isShapeBinding(i) &&
engine_->engine()->bindingIsInput(i)) {
engine_->context()->setTensorAddress(name, shape_v.data());
} else {
engine_->context()->setTensorAddress(name, buffers[i]);
}
}
#endif

engine_->Execute(-1, &buffers, ctx_->stream());
cudaStreamSynchronize(ctx_->stream());

std::vector<float> y_cpu;
GetOutput(&y_cpu);
ASSERT_EQ(y_cpu[0], 0);
ASSERT_EQ(y_cpu[1], 1);
#if IS_TRT_VERSION_GE(8500)
const char *name1 = engine_->engine()->getBindingName(2);
auto dims = engine_->context()->getTensorShape(name1);
#else
auto dims = engine_->context()->getBindingDimensions(2);
#endif
ASSERT_EQ(dims.nbDims, 3);
ASSERT_EQ(dims.d[0], 8);
ASSERT_EQ(dims.d[1], 8);
Expand Down
55 changes: 53 additions & 2 deletions paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,41 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
} else {
#if IS_TRT_VERSION_GE(6000)
#if IS_TRT_VERSION_GE(8500)
if (engine->engine()->isShapeBinding(bind_index) &&
engine->engine()->bindingIsInput(bind_index)) {
std::vector<int> shape_v(t.numel());
if (t.dtype() == phi::DataType::INT32) {
paddle::memory::Copy(platform::CPUPlace(),
shape_v.data(),
t.place(),
t.data<int32_t>(),
t.numel() * sizeof(int),
nullptr);
} else if (t.dtype() == phi::DataType::INT64) {
std::string x_t = x + "_cast_to_INT32";
if (scope.FindVar(x_t) == nullptr) {
const_cast<framework::Scope *>(&scope)->Var(x_t);
}
auto int32_tensor =
scope.FindVar(x_t)->GetMutable<phi::DenseTensor>();
*int32_tensor = phi::Cast<int64_t>(
reinterpret_cast<const phi::GPUContext &>(dev_ctx),
t,
phi::DataType::INT32);
paddle::memory::Copy(platform::CPUPlace(),
shape_v.data(),
int32_tensor->place(),
int32_tensor->data<int32_t>(),
int32_tensor->numel() * sizeof(int),
nullptr);
}
trt_context->setTensorAddress(x.c_str(), shape_v.data());
} else {
trt_context->setInputShape(
x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
}
#else
trt_context->setBindingDimensions(
bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
// If this x is a shape tensor, we need call setInputShapeBinding
Expand Down Expand Up @@ -644,6 +679,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
trt_context->setInputShapeBinding(bind_index, shape_v.data());
}
#endif
#endif
}
runtime_batch = t_shape[0];
Expand Down Expand Up @@ -718,7 +754,20 @@ class TensorRTEngineOp : public framework::OperatorBase {
ddim.push_back(dims.d[i]);
}
} else {
#if IS_TRT_VERSION_GE(6000)
#if IS_TRT_VERSION_GE(8500)
auto x_name = engine->engine()->getBindingName(bind_index);
auto dims = trt_context->getTensorShape(x_name);
int nb_dims = dims.nbDims;
for (; nb_dims > 0; nb_dims--) {
// some 'x 1' of shape is normal, no need to remove it
if (dims.d[nb_dims - 1] != 1 ||
nb_dims == origin_output_rank[output_index])
break;
}
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
}
#else
auto dims = trt_context->getBindingDimensions(bind_index);
int nb_dims = dims.nbDims;
for (; nb_dims > 0; nb_dims--) {
Expand All @@ -727,7 +776,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
nb_dims == origin_output_rank[output_index])
break;
}
for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]);
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
}
#endif
}
auto *fluid_v = scope.FindVar(y);
Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/test_trt_convert_bitwise_and.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
Expand Down
18 changes: 3 additions & 15 deletions test/ir/inference/test_trt_convert_bitwise_not.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,13 @@ def sample_program_configs(self):
def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
if dims == 0:
return np.random.random([]).astype(np.bool8)
elif dims == 1:
if dims == 1:
return np.random.random([32]).astype(np.bool8)
elif dims == 2:
return np.random.random([3, 32]).astype(np.int8)
elif dims == 3:
return np.random.random([3, 32, 32]).astype(np.int32)
else:
return np.random.random([batch, 3, 32, 32]).astype(np.int64)

for dims in [0, 1, 2, 3, 4]:
for batch in [1, 4]:
for dims in [0, 1, 4]:
for batch in [1]:
self.dims = dims
dics = [{}]

Expand Down Expand Up @@ -84,14 +80,6 @@ def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1]}
self.dynamic_shape.max_input_shape = {"input_data": [64]}
self.dynamic_shape.opt_input_shape = {"input_data": [32]}
elif self.dims == 2:
self.dynamic_shape.min_input_shape = {"input_data": [1, 16]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 32]}
self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]}
elif self.dims == 3:
self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]}
self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 16, 16]
Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/test_trt_convert_bitwise_or.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/test_trt_convert_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,8 @@ def clear_dynamic_shape():
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), (1, 3), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), (1, 3), 1e-3

def test(self):
Expand Down

0 comments on commit 94c63de

Please sign in to comment.