Skip to content

Commit

Permalink
enable custom device save model on device memory
Browse files Browse the repository at this point in the history
  • Loading branch information
engineer1109 committed Dec 8, 2022
1 parent 47e7b7a commit 809c678
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 9 deletions.
5 changes: 5 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ struct Argument {
MixedBlackList,
std::unordered_set<std::string>);

// custom device
DECL_ARGUMENT_FIELD(use_custom_device, UseCustomDevice, bool);
DECL_ARGUMENT_FIELD(custom_device_type, CustomDeviceType, std::string);
DECL_ARGUMENT_FIELD(custom_device_id, CustomDeviceId, int);

private:
std::unordered_set<std::string> valid_fields_;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"

#include <cstdlib>
#include <string>
#include <unordered_set>

Expand All @@ -26,6 +27,11 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"

DEFINE_bool(
custom_model_save_cpu,
false,
"Keep old mode for developers, the model is saved on cpu not device.");

namespace paddle {
namespace inference {
namespace analysis {
Expand Down Expand Up @@ -71,9 +77,9 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
}
}
}
#endif

#else

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
// The parameters are on the cpu, therefore, synchronization is not necessary.
if (!argument->use_gpu()) return;
Expand Down Expand Up @@ -148,21 +154,83 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
}
}
}
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
void IrParamsSyncAmongDevicesPass::CopyParamsToCustomDevice(
Argument *argument) {
if (!argument->use_custom_device()) return;

// On old mode, the model is saved on cpu not device.
if (argument->custom_device_type() == "OpenCL") {
PADDLE_ENFORCE_EQ(
FLAGS_custom_model_save_cpu,
false,
phi::errors::InvalidArgument(
"'FLAGS_custom_model_save_cpu = false' is only for the developers "
"who have not completed custom device memory settings. Setting to "
"true will make "
"model memory reserve on the cpu, and make inference slower."));
}

if (FLAGS_custom_model_save_cpu) return;

auto &graph = argument->main_graph();
std::vector<std::string> repetitive_params;

if (graph.Has(framework::ir::kRepetitiveParamAttr))
repetitive_params = graph.Get<std::vector<std::string>>(
framework::ir::kRepetitiveParamAttr);

LOG(INFO) << "Sync params from CPU to CustomDevice"
<< argument->custom_device_type() << "/"
<< argument->custom_device_id();

platform::Place place = platform::CustomPlace(argument->custom_device_type(),
argument->custom_device_id());
auto *scope = argument->scope_ptr();
std::vector<std::string> all_vars = scope->LocalVarNames();

for (auto &var_name : all_vars) {
auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE_NOT_NULL(
var,
platform::errors::PreconditionNotMet("The var should not be nullptr"));

if (var->IsType<phi::DenseTensor>() || var->IsType<phi::DenseTensor>()) {
auto *t = var->GetMutable<phi::DenseTensor>();

platform::CPUPlace cpu_place;
phi::DenseTensor temp_tensor;
temp_tensor.Resize(t->dims());

paddle::framework::TensorCopySync(*t, cpu_place, &temp_tensor);
t->clear();
paddle::framework::TensorCopySync(temp_tensor, place, t);
}
}
}
#endif

void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE_EQ(
argument->scope_valid(),
true,
platform::errors::PreconditionNotMet("The scope field should be valid"));

#ifdef PADDLE_WITH_ASCEND_CL
if (!argument->use_npu_valid()) return;
CopyParamsToNpu(argument);
#else
if (!argument->use_gpu_valid()) return;
CopyParamsToGpu(argument);
if (argument->use_npu_valid()) {
CopyParamsToNpu(argument);
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (argument->use_gpu_valid()) {
CopyParamsToGpu(argument);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (argument->use_custom_device_valid()) {
CopyParamsToCustomDevice(argument);
}
#endif
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,15 @@ class IrParamsSyncAmongDevicesPass : public AnalysisPass {
private:
#ifdef PADDLE_WITH_ASCEND_CL
void CopyParamsToNpu(Argument *argument);
#else
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void CopyParamsToGpu(Argument *argument);
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
void CopyParamsToCustomDevice(Argument *argument);
#endif
};

} // namespace analysis
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,15 @@ void AnalysisPredictor::PrepareArgument() {
}
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
argument_.SetUseCustomDevice(config_.use_custom_device());
if (config_.use_custom_device()) {
LOG(INFO) << "CustomDevice is enabled";
argument_.SetCustomDeviceType(config_.custom_device_type());
argument_.SetCustomDeviceId(config_.custom_device_id());
}
#endif

auto *pass_builder = config_.pass_builder();
if (model_precision_ != phi::DataType::FLOAT32) {
LOG(INFO) << "Model is mixed precision type with " << model_precision_
Expand Down

0 comments on commit 809c678

Please sign in to comment.