Skip to content

Commit

Permalink
simplify inference api (#11104)
Browse files Browse the repository at this point in the history
  • Loading branch information
Superjomn committed Jun 1, 2018
1 parent 86d8659 commit 18d6402
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 27 deletions.
40 changes: 23 additions & 17 deletions paddle/contrib/inference/paddle_inference_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,23 @@ struct PaddleBuf {
struct PaddleTensor {
std::string name; // variable name.
std::vector<int> shape;
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
PaddleBuf data; // blob of data.
PaddleDType dtype;
};

enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
// TODO(Superjomn) support following engines latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};

/*
* A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios.
* TODO(Superjomn) Support another API for NLP-related usages.
*/
class PaddlePredictor {
public:
Expand All @@ -69,15 +78,6 @@ class PaddlePredictor {
// Destroy the Predictor.
virtual ~PaddlePredictor() {}

enum class EngineKind {
kNative = -1, // Use the native Fluid facility.
// TODO(Superjomn) support latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};

// The common configs for all the predictors.
struct Config {
std::string model_dir; // path to the model directory.
Expand All @@ -86,18 +86,24 @@ class PaddlePredictor {
};

struct NativeConfig : public PaddlePredictor::Config {
// GPU related fields.
bool use_gpu{false};
int device;
float fraction_of_gpu_memory;
int device{0};
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization.

std::string prog_file;
std::string param_file;
bool share_variables;
};

// A factory to help create difference predictor.
template <
typename ConfigT,
PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative>
// A factory to help create different predictors.
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type and engine kind. Similar
// configs can be merged, but there shouldn't be a huge config containing
// different fields for more than one kind of predictors.
//
// Similarly, each engine kind should map to a unique predictor implementation.
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);

} // namespace paddle
22 changes: 13 additions & 9 deletions paddle/contrib/inference/paddle_inference_api_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ std::string num2str(T a) {
bool NativePaddlePredictor::Init() {
VLOG(3) << "Predictor::init()";

// TODO(panyx0718): Should CPU vs GPU device be decided by id?
if (config_.device >= 0) {
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
} else {
place_ = paddle::platform::CPUPlace();
Expand All @@ -85,11 +84,13 @@ bool NativePaddlePredictor::Init() {
}
ctx_ = executor_->Prepare(*inference_program_, 0);

// Create variables
// TODO(panyx0718): Why need to test share_variables here?
if (config_.share_variables) {
executor_->CreateVariables(*inference_program_, scope_.get(), 0);
}
// Create temporary variables first, so that the first batch do not need to
// create variables in the runtime. This is the logics of the old inference
// API.
// TODO(Superjomn) this should be modified when `Clone` is valid for
// multi-thread application.
executor_->CreateVariables(*inference_program_, scope_.get(), 0);

// Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
Expand Down Expand Up @@ -124,7 +125,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
scope_.get(),
&feed_targets,
&fetch_targets,
!config_.share_variables);
false /* don't create variable eatch time */);
if (!GetFetch(fetchs, output_data)) {
LOG(ERROR) << "fail to get fetchs";
return false;
Expand Down Expand Up @@ -242,11 +243,14 @@ bool NativePaddlePredictor::GetFetch(

template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<NativeConfig, PaddlePredictor::EngineKind::kNative>(
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE(
config.fraction_of_gpu_memory > 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
Expand Down
1 change: 0 additions & 1 deletion paddle/contrib/inference/test_paddle_inference_api_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ NativeConfig GetConfig() {
config.fraction_of_gpu_memory = 0.15;
config.use_gpu = true;
config.device = 0;
config.share_variables = true;
return config;
}

Expand Down

0 comments on commit 18d6402

Please sign in to comment.