enable custom device save model on device memory

PaddlePaddle · Dec 8, 2022 · 809c678 · 809c678
1 parent 47e7b7a
commit 809c678
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 9 deletions.
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
@@ -365,6 +365,11 @@ struct Argument {
                       MixedBlackList,
                       std::unordered_set<std::string>);
 
+  // custom device
+  DECL_ARGUMENT_FIELD(use_custom_device, UseCustomDevice, bool);
+  DECL_ARGUMENT_FIELD(custom_device_type, CustomDeviceType, std::string);
+  DECL_ARGUMENT_FIELD(custom_device_id, CustomDeviceId, int);
+
  private:
   std::unordered_set<std::string> valid_fields_;
 };

diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
 
+#include <cstdlib>
 #include <string>
 #include <unordered_set>
 
@@ -26,6 +27,11 @@
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/phi/common/data_type.h"
 
+DEFINE_bool(
+    custom_model_save_cpu,
+    false,
+    "Keep old mode for developers, the model is saved on cpu not device.");
+
 namespace paddle {
 namespace inference {
 namespace analysis {
@@ -71,9 +77,9 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
     }
   }
 }
+#endif
 
-#else
-
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
   // The parameters are on the cpu, therefore, synchronization is not necessary.
   if (!argument->use_gpu()) return;
@@ -148,21 +154,83 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
     }
   }
 }
+#endif
+
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+void IrParamsSyncAmongDevicesPass::CopyParamsToCustomDevice(
+    Argument *argument) {
+  if (!argument->use_custom_device()) return;
+
+  // On old mode, the model is saved on cpu not device.
+  if (argument->custom_device_type() == "OpenCL") {
+    PADDLE_ENFORCE_EQ(
+        FLAGS_custom_model_save_cpu,
+        false,
+        phi::errors::InvalidArgument(
+            "'FLAGS_custom_model_save_cpu = false' is only for the developers "
+            "who have not completed custom device memory settings. Setting to "
+            "true will make "
+            "model memory reserve on the cpu, and make inference slower."));
+  }
+
+  if (FLAGS_custom_model_save_cpu) return;
+
+  auto &graph = argument->main_graph();
+  std::vector<std::string> repetitive_params;
+
+  if (graph.Has(framework::ir::kRepetitiveParamAttr))
+    repetitive_params = graph.Get<std::vector<std::string>>(
+        framework::ir::kRepetitiveParamAttr);
 
+  LOG(INFO) << "Sync params from CPU to CustomDevice"
+            << argument->custom_device_type() << "/"
+            << argument->custom_device_id();
+
+  platform::Place place = platform::CustomPlace(argument->custom_device_type(),
+                                                argument->custom_device_id());
+  auto *scope = argument->scope_ptr();
+  std::vector<std::string> all_vars = scope->LocalVarNames();
+
+  for (auto &var_name : all_vars) {
+    auto *var = scope->FindLocalVar(var_name);
+    PADDLE_ENFORCE_NOT_NULL(
+        var,
+        platform::errors::PreconditionNotMet("The var should not be nullptr"));
+
+    if (var->IsType<phi::DenseTensor>() || var->IsType<phi::DenseTensor>()) {
+      auto *t = var->GetMutable<phi::DenseTensor>();
+
+      platform::CPUPlace cpu_place;
+      phi::DenseTensor temp_tensor;
+      temp_tensor.Resize(t->dims());
+
+      paddle::framework::TensorCopySync(*t, cpu_place, &temp_tensor);
+      t->clear();
+      paddle::framework::TensorCopySync(temp_tensor, place, t);
+    }
+  }
+}
 #endif
 
 void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
   PADDLE_ENFORCE_EQ(
       argument->scope_valid(),
       true,
       platform::errors::PreconditionNotMet("The scope field should be valid"));
-
 #ifdef PADDLE_WITH_ASCEND_CL
-  if (!argument->use_npu_valid()) return;
-  CopyParamsToNpu(argument);
-#else
-  if (!argument->use_gpu_valid()) return;
-  CopyParamsToGpu(argument);
+  if (argument->use_npu_valid()) {
+    CopyParamsToNpu(argument);
+  }
+#endif
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  if (argument->use_gpu_valid()) {
+    CopyParamsToGpu(argument);
+  }
+#endif
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+  if (argument->use_custom_device_valid()) {
+    CopyParamsToCustomDevice(argument);
+  }
 #endif
 }
 

diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
@@ -37,9 +37,15 @@ class IrParamsSyncAmongDevicesPass : public AnalysisPass {
  private:
 #ifdef PADDLE_WITH_ASCEND_CL
   void CopyParamsToNpu(Argument *argument);
-#else
+#endif
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   void CopyParamsToGpu(Argument *argument);
 #endif
+
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+  void CopyParamsToCustomDevice(Argument *argument);
+#endif
 };
 
 }  // namespace analysis

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1223,6 +1223,15 @@ void AnalysisPredictor::PrepareArgument() {
   }
 #endif
 
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+  argument_.SetUseCustomDevice(config_.use_custom_device());
+  if (config_.use_custom_device()) {
+    LOG(INFO) << "CustomDevice is enabled";
+    argument_.SetCustomDeviceType(config_.custom_device_type());
+    argument_.SetCustomDeviceId(config_.custom_device_id());
+  }
+#endif
+
   auto *pass_builder = config_.pass_builder();
   if (model_precision_ != phi::DataType::FLOAT32) {
     LOG(INFO) << "Model is mixed precision type with " << model_precision_