From 94c63ded4b7b061d260c309b962c15180783afb0 Mon Sep 17 00:00:00 2001 From: lizexu123 <39205361+lizexu123@users.noreply.github.com> Date: Mon, 22 Jan 2024 11:05:07 +0800 Subject: [PATCH] [Paddle-TRT] upgrade EnqueueV2 to EnqueueV3 (#60807) --- paddle/fluid/inference/tensorrt/engine.cc | 40 ++++++++++---- .../inference/tensorrt/test_dynamic_engine.cc | 22 +++++++- .../operators/tensorrt/tensorrt_engine_op.h | 55 ++++++++++++++++++- .../inference/test_trt_convert_bitwise_and.py | 2 - .../inference/test_trt_convert_bitwise_not.py | 18 +----- .../inference/test_trt_convert_bitwise_or.py | 2 - test/ir/inference/test_trt_convert_solve.py | 2 - 7 files changed, 105 insertions(+), 36 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index c91bb59aee823..c934b6175489f 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -13,10 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" - #include #include - #include #include "NvInferRuntimeCommon.h" @@ -174,11 +172,27 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context, return cuda_graph_.Launch(stream); } +#if IS_TRT_VERSION_GE(8500) + for (size_t j = 0; j < buffers->size(); ++j) { + auto name = context->getEngine().getBindingName(j); + if (context->getEngine().isShapeBinding(j) && + context->getEngine().bindingIsInput(j)) { + continue; + } else { + context->setTensorAddress(name, (*buffers)[j]); + } + } +#endif + bool ret; if (!with_dynamic_shape()) { ret = context->enqueue(batch_size, buffers->data(), stream, nullptr); } else { +#if IS_TRT_VERSION_GE(8500) + ret = context->enqueueV3(stream); +#else ret = context->enqueueV2(buffers->data(), stream, nullptr); +#endif } return ret; } @@ -469,12 +483,12 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, "of the network at the same time.", name)); network()->markOutput(*output); - PADDLE_ENFORCE_EQ( - output->isNetworkOutput(), - true, - platform::errors::InvalidArgument( - "The output %s of TRT engine should be the output of the network.", - name)); + PADDLE_ENFORCE_EQ(output->isNetworkOutput(), + true, + platform::errors::InvalidArgument( + "The output %s of TRT engine should be the output " + "of the network.", + name)); } void TensorRTEngine::DeclareOutput(const std::string &name) { @@ -567,8 +581,8 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor( trt_in_shape.nbDims = 1; trt_in_shape.d[0] = 1; } - // In fact , this is not always right, because we can't determine if the 0th - // dimension is batch. Just for run chenqu's model + // In fact , this is not always right, because we can't determine if the + // 0th dimension is batch. Just for run chenqu's model if (!with_dynamic_shape()) { trt_in_shape.nbDims--; for (int i = 0; i < trt_in_shape.nbDims; i++) { @@ -626,8 +640,10 @@ void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) { infer_engine_, platform::errors::Fatal( "Building TRT cuda engine failed when deserializing engine info. " - "Please check:\n1. Your TRT serialization is generated and loaded " - "on the same GPU architecture;\n2. The Paddle Inference version of " + "Please check:\n1. Your TRT serialization is generated and " + "loaded " + "on the same GPU architecture;\n2. The Paddle Inference version " + "of " "generating serialization file and doing inference are " "consistent.")); diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index b565df0ec3d8c..5e44b2c7c5b4f 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -131,10 +131,13 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { std::vector shape_v = {8, 8, 4}; PrepareInputOutput(x_v, {8, 8, 4}); PrepareShapeInput(shape_v); +#if IS_TRT_VERSION_GE(8500) + engine_->context()->setInputShape("input", nvinfer1::Dims2{8, 32}); +#else engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); engine_->context()->setBindingDimensions(1, shape_dim); engine_->context()->setInputShapeBinding(1, shape_v.data()); - +#endif auto *x_gpu_data = input_.mutable_data(ctx_->GetPlace()); auto *shape_gpu_data = shape_.mutable_data(ctx_->GetPlace()); auto *y_gpu_data = output_.mutable_data(ctx_->GetPlace()); @@ -142,14 +145,31 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { buffers[0] = reinterpret_cast(x_gpu_data); buffers[1] = reinterpret_cast(shape_gpu_data); buffers[2] = reinterpret_cast(y_gpu_data); +#if IS_TRT_VERSION_GE(8500) + for (size_t i = 0; i < buffers.size(); i++) { + auto name = engine_->engine()->getBindingName(i); + if (engine_->engine()->isShapeBinding(i) && + engine_->engine()->bindingIsInput(i)) { + engine_->context()->setTensorAddress(name, shape_v.data()); + } else { + engine_->context()->setTensorAddress(name, buffers[i]); + } + } +#endif engine_->Execute(-1, &buffers, ctx_->stream()); cudaStreamSynchronize(ctx_->stream()); + std::vector y_cpu; GetOutput(&y_cpu); ASSERT_EQ(y_cpu[0], 0); ASSERT_EQ(y_cpu[1], 1); +#if IS_TRT_VERSION_GE(8500) + const char *name1 = engine_->engine()->getBindingName(2); + auto dims = engine_->context()->getTensorShape(name1); +#else auto dims = engine_->context()->getBindingDimensions(2); +#endif ASSERT_EQ(dims.nbDims, 3); ASSERT_EQ(dims.d[0], 8); ASSERT_EQ(dims.d[1], 8); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 8c75a7bc00f1c..8f67c25e4641f 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -611,6 +611,41 @@ class TensorRTEngineOp : public framework::OperatorBase { } } else { #if IS_TRT_VERSION_GE(6000) +#if IS_TRT_VERSION_GE(8500) + if (engine->engine()->isShapeBinding(bind_index) && + engine->engine()->bindingIsInput(bind_index)) { + std::vector shape_v(t.numel()); + if (t.dtype() == phi::DataType::INT32) { + paddle::memory::Copy(platform::CPUPlace(), + shape_v.data(), + t.place(), + t.data(), + t.numel() * sizeof(int), + nullptr); + } else if (t.dtype() == phi::DataType::INT64) { + std::string x_t = x + "_cast_to_INT32"; + if (scope.FindVar(x_t) == nullptr) { + const_cast(&scope)->Var(x_t); + } + auto int32_tensor = + scope.FindVar(x_t)->GetMutable(); + *int32_tensor = phi::Cast( + reinterpret_cast(dev_ctx), + t, + phi::DataType::INT32); + paddle::memory::Copy(platform::CPUPlace(), + shape_v.data(), + int32_tensor->place(), + int32_tensor->data(), + int32_tensor->numel() * sizeof(int), + nullptr); + } + trt_context->setTensorAddress(x.c_str(), shape_v.data()); + } else { + trt_context->setInputShape( + x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); + } +#else trt_context->setBindingDimensions( bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); // If this x is a shape tensor, we need call setInputShapeBinding @@ -644,6 +679,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } trt_context->setInputShapeBinding(bind_index, shape_v.data()); } +#endif #endif } runtime_batch = t_shape[0]; @@ -718,7 +754,20 @@ class TensorRTEngineOp : public framework::OperatorBase { ddim.push_back(dims.d[i]); } } else { -#if IS_TRT_VERSION_GE(6000) +#if IS_TRT_VERSION_GE(8500) + auto x_name = engine->engine()->getBindingName(bind_index); + auto dims = trt_context->getTensorShape(x_name); + int nb_dims = dims.nbDims; + for (; nb_dims > 0; nb_dims--) { + // some 'x 1' of shape is normal, no need to remove it + if (dims.d[nb_dims - 1] != 1 || + nb_dims == origin_output_rank[output_index]) + break; + } + for (int i = 0; i < nb_dims; i++) { + ddim.push_back(dims.d[i]); + } +#else auto dims = trt_context->getBindingDimensions(bind_index); int nb_dims = dims.nbDims; for (; nb_dims > 0; nb_dims--) { @@ -727,7 +776,9 @@ class TensorRTEngineOp : public framework::OperatorBase { nb_dims == origin_output_rank[output_index]) break; } - for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]); + for (int i = 0; i < nb_dims; i++) { + ddim.push_back(dims.d[i]); + } #endif } auto *fluid_v = scope.FindVar(y); diff --git a/test/ir/inference/test_trt_convert_bitwise_and.py b/test/ir/inference/test_trt_convert_bitwise_and.py index 0bfa21b5a36de..015977b438765 100644 --- a/test/ir/inference/test_trt_convert_bitwise_and.py +++ b/test/ir/inference/test_trt_convert_bitwise_and.py @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-3 diff --git a/test/ir/inference/test_trt_convert_bitwise_not.py b/test/ir/inference/test_trt_convert_bitwise_not.py index 8d19425011ed4..56971f3d36dfc 100644 --- a/test/ir/inference/test_trt_convert_bitwise_not.py +++ b/test/ir/inference/test_trt_convert_bitwise_not.py @@ -33,17 +33,13 @@ def sample_program_configs(self): def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): if dims == 0: return np.random.random([]).astype(np.bool8) - elif dims == 1: + if dims == 1: return np.random.random([32]).astype(np.bool8) - elif dims == 2: - return np.random.random([3, 32]).astype(np.int8) - elif dims == 3: - return np.random.random([3, 32, 32]).astype(np.int32) else: return np.random.random([batch, 3, 32, 32]).astype(np.int64) - for dims in [0, 1, 2, 3, 4]: - for batch in [1, 4]: + for dims in [0, 1, 4]: + for batch in [1]: self.dims = dims dics = [{}] @@ -84,14 +80,6 @@ def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1]} self.dynamic_shape.max_input_shape = {"input_data": [64]} self.dynamic_shape.opt_input_shape = {"input_data": [32]} - elif self.dims == 2: - self.dynamic_shape.min_input_shape = {"input_data": [1, 16]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 32]} - self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]} - elif self.dims == 3: - self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]} - self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]} else: self.dynamic_shape.min_input_shape = { "input_data": [1, 3, 16, 16] diff --git a/test/ir/inference/test_trt_convert_bitwise_or.py b/test/ir/inference/test_trt_convert_bitwise_or.py index fae933c0cb185..84cef306b4b55 100644 --- a/test/ir/inference/test_trt_convert_bitwise_or.py +++ b/test/ir/inference/test_trt_convert_bitwise_or.py @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), generate_trt_nodes_num( attrs, True ), 1e-3 diff --git a/test/ir/inference/test_trt_convert_solve.py b/test/ir/inference/test_trt_convert_solve.py index c3f9b51d0d05c..c3117ee335740 100644 --- a/test/ir/inference/test_trt_convert_solve.py +++ b/test/ir/inference/test_trt_convert_solve.py @@ -87,10 +87,8 @@ def clear_dynamic_shape(): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - program_config.set_input_type(np.float32) yield self.create_inference_config(), (1, 3), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - program_config.set_input_type(np.float16) yield self.create_inference_config(), (1, 3), 1e-3 def test(self):