diff --git a/csrc/fastdeploy/backends/paddle/paddle_backend.cc b/csrc/fastdeploy/backends/paddle/paddle_backend.cc index 7210225b90c..85369e6d78c 100644 --- a/csrc/fastdeploy/backends/paddle/paddle_backend.cc +++ b/csrc/fastdeploy/backends/paddle/paddle_backend.cc @@ -19,6 +19,32 @@ namespace fastdeploy { void PaddleBackend::BuildOption(const PaddleBackendOption& option) { if (option.use_gpu) { config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id); + if (option.enable_trt) { +#ifdef ENABLE_TRT_BACKEND + auto precision = paddle_infer::PrecisionType::kFloat32; + if (option.trt_option.enable_fp16) { + precision = paddle_infer::PrecisionType::kHalf; + } + config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, precision, false); + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + for (auto iter = option.trt_option.max_shape.begin(); iter != option.trt_option.max_shape.end(); ++iter) { + auto min_iter = option.trt_option.min_shape.find(iter->first); + auto opt_iter = option.trt_option.opt_shape.find(iter->first); + FDASSERT(min_iter != option.trt_option.min_shape.end(), "Cannot find " + iter->first + " in TrtBackendOption::min_shape."); + FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find " + iter->first + " in TrtBackendOption::opt_shape."); + max_shape[iter->first].assign(iter->second.begin(), iter->second.end()); + min_shape[iter->first].assign(min_iter->second.begin(), min_iter->second.end()); + opt_shape[iter->first].assign(opt_iter->second.begin(), opt_iter->second.end()); + } + if (max_shape.size() > 0) { + config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape); + } +#else + FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl; +#endif + } } else { config_.DisableGpu(); if (option.enable_mkldnn) { diff --git a/csrc/fastdeploy/backends/paddle/paddle_backend.h b/csrc/fastdeploy/backends/paddle/paddle_backend.h index e634fd7b66d..3f0d9637542 100644 --- a/csrc/fastdeploy/backends/paddle/paddle_backend.h +++ b/csrc/fastdeploy/backends/paddle/paddle_backend.h @@ -22,6 +22,10 @@ #include "fastdeploy/backends/backend.h" #include "paddle_inference_api.h" // NOLINT +#ifdef ENABLE_TRT_BACKEND +#include "fastdeploy/backends/tensorrt/trt_backend.h" +#endif + namespace fastdeploy { struct PaddleBackendOption { @@ -34,6 +38,12 @@ struct PaddleBackendOption { bool enable_log_info = false; + // Enable TensorRT backend by Paddle2TRT + bool enable_trt = false; +#ifdef ENABLE_TRT_BACKEND + TrtBackendOption trt_option; +#endif + int mkldnn_cache_size = 1; int cpu_thread_num = 8; // initialize memory size(MB) for GPU diff --git a/csrc/fastdeploy/fastdeploy_runtime.cc b/csrc/fastdeploy/fastdeploy_runtime.cc index daf7a239b87..b6121211b44 100644 --- a/csrc/fastdeploy/fastdeploy_runtime.cc +++ b/csrc/fastdeploy/fastdeploy_runtime.cc @@ -209,6 +209,16 @@ void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) { pd_mkldnn_cache_size = size; } +void RuntimeOption::EnablePaddleToTrt() { + FDASSERT(backend == Backend::TRT, "Should call UseTrtBackend() before call EnablePaddleToTrt()."); +#ifdef ENABLE_PADDLE_BACKEND + backend = Backend::PDINFER; + pd_enable_trt = true; +#else + FDASSERT(false, "The FastDeploy is not compiled with Paddle Inference backend, cannot EnablePaddleToTrt now."); +#endif +} + void RuntimeOption::SetTrtInputShape(const std::string& input_name, const std::vector& min_shape, const std::vector& opt_shape, @@ -312,6 +322,22 @@ void Runtime::CreatePaddleBackend() { pd_option.gpu_id = option.device_id; pd_option.delete_pass_names = option.pd_delete_pass_names; pd_option.cpu_thread_num = option.cpu_thread_num; +#ifdef ENABLE_TRT_BACKEND + if (option.pd_enable_trt) { + pd_option.enable_trt = true; + auto trt_option = TrtBackendOption(); + trt_option.gpu_id = option.device_id; + trt_option.enable_fp16 = option.trt_enable_fp16; + trt_option.enable_int8 = option.trt_enable_int8; + trt_option.max_batch_size = option.trt_max_batch_size; + trt_option.max_workspace_size = option.trt_max_workspace_size; + trt_option.max_shape = option.trt_max_shape; + trt_option.min_shape = option.trt_min_shape; + trt_option.opt_shape = option.trt_opt_shape; + trt_option.serialize_file = option.trt_serialize_file; + pd_option.trt_option = trt_option; + } +#endif FDASSERT(option.model_format == Frontend::PADDLE, "PaddleBackend only support model format of Frontend::PADDLE."); backend_ = utils::make_unique(); diff --git a/csrc/fastdeploy/fastdeploy_runtime.h b/csrc/fastdeploy/fastdeploy_runtime.h index a64ac375d7d..1b2aa44afb2 100644 --- a/csrc/fastdeploy/fastdeploy_runtime.h +++ b/csrc/fastdeploy/fastdeploy_runtime.h @@ -63,6 +63,8 @@ struct FASTDEPLOY_DECL RuntimeOption { // use tensorrt backend void UseTrtBackend(); + void EnablePaddleToTrt(); + // use openvino backend void UseOpenVINOBackend(); @@ -119,8 +121,9 @@ struct FASTDEPLOY_DECL RuntimeOption { // ======Only for Paddle Backend===== bool pd_enable_mkldnn = true; - bool pd_enable_log_info = false; int pd_mkldnn_cache_size = 1; + bool pd_enable_trt = false; + bool pd_enable_log_info = false; std::vector pd_delete_pass_names; // ======Only for Trt Backend=======