diff --git a/paddle/fluid/jit/CMakeLists.txt b/paddle/fluid/jit/CMakeLists.txt index febfe84f4dfb4..69d2d63880c6a 100644 --- a/paddle/fluid/jit/CMakeLists.txt +++ b/paddle/fluid/jit/CMakeLists.txt @@ -37,6 +37,7 @@ if(WITH_TESTING AND NOT WIN32) COMMAND tar zxf multi_program_load.tar.gz) set(JIT_DEPS phi + phi_api elementwise_add_op matmul_v2_op activation_op diff --git a/paddle/fluid/jit/executor_function.h b/paddle/fluid/jit/executor_function.h index a9b9d59d21bf4..a1245a647096b 100644 --- a/paddle/fluid/jit/executor_function.h +++ b/paddle/fluid/jit/executor_function.h @@ -38,6 +38,7 @@ class ExecutorFunction : public BaseFunction { : info_(info), place_(place), inner_exe_(place_) { utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); + info_->RemoveDescFeedFetch(); } ~ExecutorFunction() noexcept {} diff --git a/paddle/fluid/jit/function_schema.cc b/paddle/fluid/jit/function_schema.cc index 9fb2f11e7d051..20cbcfdbd1c88 100644 --- a/paddle/fluid/jit/function_schema.cc +++ b/paddle/fluid/jit/function_schema.cc @@ -62,8 +62,6 @@ FunctionInfo::FunctionInfo(const std::string& func_name, for (auto& out_name : program_desc_.GetFetchTargetNames()) { schema_.AddOutputArg(out_name); } - // remove feed fetch op - utils::RemoveFeedFetch(&program_desc_); } const std::string& FunctionInfo::FunctionName() const { return func_name_; } @@ -84,5 +82,9 @@ const std::vector FunctionInfo::OutputArgNames() const { return schema_.OutputArgNames(); } +void FunctionInfo::RemoveDescFeedFetch() { + utils::RemoveFeedFetch(&program_desc_); +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/function_schema.h b/paddle/fluid/jit/function_schema.h index 5995fce3e3379..5dcea8517e40e 100644 --- a/paddle/fluid/jit/function_schema.h +++ b/paddle/fluid/jit/function_schema.h @@ -70,6 +70,8 @@ class FunctionInfo { const std::vector OutputArgNames() const; + void RemoveDescFeedFetch(); + private: std::string func_name_; std::vector param_names_; diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc index 793afacb79dc7..9ac99b50e40e1 100644 --- a/paddle/fluid/jit/layer_test.cc +++ b/paddle/fluid/jit/layer_test.cc @@ -12,17 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/phi/api/include/api.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/fluid/jit/function_utils.h" #include "paddle/fluid/jit/layer.h" #include "paddle/fluid/jit/serializer.h" @@ -52,7 +55,7 @@ namespace paddle { namespace jit { using DenseTensor = phi::DenseTensor; -std::vector PrepareInputs(const phi::Place& place) { +std::vector PrepareInputs(const phi::Place& place) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& dev_ctx = *pool.Get(place); @@ -61,7 +64,7 @@ std::vector PrepareInputs(const phi::Place& place) { t.mutable_data(place); phi::funcs::set_constant(dev_ctx, &t, 2.); - return {t}; + return utils::ToTensors({t}); } TEST(CpuLayerTest, Construct) { @@ -78,34 +81,38 @@ TEST(CpuLayerTest, Construct) { outs = (*func)(inputs); out_data = outs[0].data(); EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); + auto pow_out = + paddle::experimental::pow(outs[0], paddle::experimental::Scalar(2)); + out_data = pow_out.data(); + EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); } #if defined(PADDLE_WITH_CUDA) TEST(GpuLayerTest, Construct) { auto place = phi::GPUPlace(); - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto& dev_ctx = *pool.Get(place); - const auto* dev_ctx_gpu = static_cast(&dev_ctx); - DenseTensor cpu_dense_tensor; std::string path = "./multi_program_load/export"; auto layer = jit::Load(path, place); auto inputs = PrepareInputs(place); auto outs = layer.forward(inputs); - auto out_dense_tensor = outs[0]; - phi::Copy( - *dev_ctx_gpu, out_dense_tensor, phi::CPUPlace(), true, &cpu_dense_tensor); - auto out_data = cpu_dense_tensor.data(); + auto gpu_tensor = outs[0]; + auto cpu_tensor = + paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true); + auto out_data = cpu_tensor.data(); EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); auto func = layer.Function("infer"); outs = (*func)(inputs); - out_dense_tensor = outs[0]; - phi::Copy( - *dev_ctx_gpu, out_dense_tensor, phi::CPUPlace(), true, &cpu_dense_tensor); - out_data = cpu_dense_tensor.data(); + gpu_tensor = outs[0]; + cpu_tensor = paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true); + out_data = cpu_tensor.data(); EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); + + auto sqrt_out = paddle::experimental::sqrt(outs[0]); + cpu_tensor = paddle::experimental::copy_to(sqrt_out, phi::CPUPlace(), true); + out_data = cpu_tensor.data(); + EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6); } #endif diff --git a/paddle/fluid/jit/pe_function.h b/paddle/fluid/jit/pe_function.h index f174a0e996467..8dfdfc1bc08a9 100644 --- a/paddle/fluid/jit/pe_function.h +++ b/paddle/fluid/jit/pe_function.h @@ -39,6 +39,7 @@ class PEFunction : public BaseFunction { : info_(info), place_(place) { utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); + info_->RemoveDescFeedFetch(); } ~PEFunction() noexcept {} @@ -51,13 +52,14 @@ class PEFunction : public BaseFunction { std::vector operator()(const std::vector &inputs) { std::string prog_string; std::hash string_hash; + auto &program_desc = info_->ProgramDesc(); // TODO(dev): Serialize is very slow. const_cast(&program_desc) ->Proto() ->SerializePartialToString(&prog_string); - int64_t program_id = static_cast(string_hash(prog_string)); + const framework::BlockDesc &global_block = program_desc.Block(0); int64_t start_op_index = 0; int64_t end_op_index = static_cast(global_block.OpSize()); @@ -97,6 +99,8 @@ class PEFunction : public BaseFunction { return res; } + const std::shared_ptr &Info() const { return info_; } + private: std::shared_ptr info_; framework::Scope scope_; diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc index a557f9edc6ce3..2dee9ee879a22 100644 --- a/paddle/fluid/jit/serializer.cc +++ b/paddle/fluid/jit/serializer.cc @@ -19,8 +19,11 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/jit/executor_function.h" +#include "paddle/fluid/jit/pe_function.h" #include "paddle/fluid/jit/serializer_utils.h" +DECLARE_string(jit_engine_type); + namespace paddle { namespace jit { @@ -55,9 +58,19 @@ Layer Deserializer::operator()(const std::string& path, Layer layer = Layer(infos, params_dict, place); for (auto& info : infos) { - layer.SetFunction( - info->FunctionName(), - utils::MakeFunction(info, params_dict, place)); + if (FLAGS_jit_engine_type == "Executor") { + VLOG(3) << "Add function type: ExecutorFunction."; + layer.SetFunction( + info->FunctionName(), + utils::MakeFunction(info, params_dict, place)); + } else if (FLAGS_jit_engine_type == "PE") { + VLOG(3) << "Add function type: PEFunction."; + layer.SetFunction( + info->FunctionName(), + utils::MakeFunction(info, params_dict, place)); + } else { + PD_THROW("Invalid JitLayer funciton type."); + } } return layer; @@ -85,7 +98,7 @@ void Deserializer::ReadAttributeData(const std::string& file_path, Name2VariableMap* attrs_dict) const {} framework::ProgramDesc Deserializer::LoadProgram(const std::string& file_name) { - VLOG(3) << "LoadProgram " << file_name; + VLOG(3) << "LoadProgram from: " << file_name; std::ifstream fin(file_name, std::ios::in | std::ios::binary); fin.seekg(0, std::ios::end); std::string buffer(fin.tellg(), ' '); diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index f4d395a2afaa1..d5a93817354a6 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -916,3 +916,18 @@ PADDLE_DEFINE_EXPORTED_bool( einsum_opt, false, "EinsumOp backward will be speedup at the expense of more gpu memory."); + +/** + * JitLayer related FLAG + * Name: FLAGS_jit_engine_type + * Since Version: 2.3.0 + * Value Range: string, {Executor, PE}, + * default=PE + * Example: + * Note: + * FLAGS_jit_engine_type == Executor, using ExecutorFunction by default + * FLAGS_jit_engine_type == PE, using PEFunction by default + */ +PADDLE_DEFINE_EXPORTED_string(jit_engine_type, + "PE", + "Choose default funciton type in JitLayer."); diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 185b81677125d..ccb1dae58010f 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope_guard.h" +#include "paddle/fluid/jit/executor_function.h" +#include "paddle/fluid/jit/pe_function.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/utils.h" @@ -52,6 +54,7 @@ extern PyTypeObject* g_framework_tensor_pytype; extern PyTypeObject* g_framework_lodtensorarray_pytype; extern PyTypeObject* g_custom_op_kernel_ctx_pytype; extern PyTypeObject* g_executor_function_pytype; +extern PyTypeObject* g_pe_function_pytype; int TensorDtype2NumpyDtype(phi::DataType dtype) { switch (dtype) { @@ -234,6 +237,9 @@ std::shared_ptr CastPyArg2BaseFunction(PyObject* obj, obj, reinterpret_cast(g_executor_function_pytype))) { return ::pybind11::handle(obj) .cast>(); + } else if (PyObject_IsInstance( + obj, reinterpret_cast(g_pe_function_pytype))) { + return ::pybind11::handle(obj).cast>(); } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index eb1bbdb303418..2b0793be487e4 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -19,7 +19,7 @@ typedef SSIZE_T ssize_t; #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/jit/executor_function.h" +#include "paddle/fluid/jit/base_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" diff --git a/paddle/fluid/pybind/jit.cc b/paddle/fluid/pybind/jit.cc index be2ad50400c77..79576e6547f9a 100644 --- a/paddle/fluid/pybind/jit.cc +++ b/paddle/fluid/pybind/jit.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/jit/executor_function.h" #include "paddle/fluid/jit/function_schema.h" #include "paddle/fluid/jit/layer.h" +#include "paddle/fluid/jit/pe_function.h" #include "paddle/fluid/jit/serializer.h" namespace py = pybind11; @@ -29,6 +30,7 @@ namespace paddle { namespace pybind { PyTypeObject *g_executor_function_pytype = nullptr; +PyTypeObject *g_pe_function_pytype = nullptr; using Variable = paddle::framework::Variable; void BindJit(pybind11::module *m) { @@ -44,6 +46,11 @@ void BindJit(pybind11::module *m) { reinterpret_cast(executor_function.ptr()); executor_function.def("info", &jit::ExecutorFunction::Info); + py::class_> pe_function( + *m, "PEFunction", R"DOC(PEFunction Class.)DOC"); + g_pe_function_pytype = reinterpret_cast(pe_function.ptr()); + pe_function.def("info", &jit::PEFunction::Info); + py::class_>( *m, "FunctionInfo", R"DOC(FunctionInfo Class.)DOC") .def("name", &jit::FunctionInfo::FunctionName)