Skip to content

Commit

Permalink
[cherry-pick] Add FC padding, ernie test unit and layernorm parallel (#…
Browse files Browse the repository at this point in the history
…22198)

* Optimize the kernel implementation of layernorm with openmp (#20895)

* Add ernie c++ inference test (#21015)

* Add ernie unit test
test=develop

* Add ernie unit test
test=develop

* Add ernie unit test
test=develop

* remove ngraph

* optimize gpu test
test=develop

* optimize codes
test=develop

* fix cmake fails on inference_download_and_uncompress (#21185)

* solve cmake fails on inference_download_and_uncompress
test=develop

* solve cmake fails on inference_download_and_uncompress
test=develop

* Add fc padding to improve mkl GEMM's performance when N and K are multiple of 128. (#20972)

* Add fc padding to solve mkl performance
test=develop

* fix gpu pass and error information
test=develop

* fix fc_fuse_pass_test
test=develop

* fix error information
test=develop

* fix error information
test=develop

* fix name and add fc op padding test
test=develop

* fix attributes
test=develop

* optimize fc padding
test=develop

* fix test
test=develop

* Polish the codes of fc when needs padding (#21378)

test=develop

* Add ernie large c++ inference test (#21365)

* add ernie-large test
test=develop

* add ernie large c++ inference test
test=develop

* Modify padding strategy: remove weight copy in fc padding (#21650)

test=develop

* optimize fc jit (#21878)

test=develop

Co-authored-by: Yihua Xu <yihuaxu@hotmail.com>
  • Loading branch information
2 people authored and Xreki committed Jan 10, 2020
1 parent e8e1249 commit 3df38f5
Show file tree
Hide file tree
Showing 14 changed files with 545 additions and 156 deletions.
29 changes: 29 additions & 0 deletions paddle/fluid/framework/ir/fc_fuse_pass.cc
Expand Up @@ -89,6 +89,35 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
std::string activation_type = with_relu ? "relu" : "";
desc.SetAttr("activation_type", activation_type);

// This is to add padding for dimension 128 on concern of MKL performance
auto* scope = param_scope();
auto* weight = scope->FindVar(w->Name())->GetMutable<LoDTensor>();
auto place = weight->place();
bool use_gpu = Get<bool>("use_gpu");
auto* weight_data = weight->data<float>();
auto weight_dims = weight->dims();
int weight_num = product(weight_dims);
int w_h = weight_dims[0];
int w_w = weight_dims[1];
if (!use_gpu) {
if (w_h % 128 == 0 && w_w % 128 == 0) {
auto* weight_data_tmp = new float[weight_num];
for (int i = 0; i < w_h; i++) {
memcpy(weight_data_tmp + i * w_w, weight_data + i * w_w,
w_w * sizeof(float));
}
weight->Resize(DDim{weight_dims[0] + 4, weight_dims[1] + 4});
auto* weight_data_new =
weight->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < w_h; i++) {
memcpy(weight_data_new + i * (w_w + 4), weight_data_tmp + i * w_w,
w_w * sizeof(float));
}
delete[] weight_data_tmp;
desc.SetAttr("padding_weights", true);
}
}

// For anakin subgraph int8
// When in anakin subgraph int8 mode, the pattern like "fake_quant + mul +
// fake_dequant" can be detected by the quant_dequant_fuse_pass. This pass
Expand Down
20 changes: 20 additions & 0 deletions paddle/fluid/framework/ir/fc_fuse_pass_tester.cc
Expand Up @@ -21,6 +21,24 @@ namespace paddle {
namespace framework {
namespace ir {

void AddVarToScope(Scope* param_scope, const std::string& name,
const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
tensor->Resize(dims);
tensor->mutable_data<float>(platform::CPUPlace());
}

Scope* CreateParamScope() {
auto param_scope = new Scope();
AddVarToScope(param_scope, "conv2d_filters_0", {});
AddVarToScope(param_scope, "conv2d_bias_0", {});
AddVarToScope(param_scope, "weights_0", {});
AddVarToScope(param_scope, "weights_1", {});
AddVarToScope(param_scope, "bias_1", {});
AddVarToScope(param_scope, "bias_2", {});
return param_scope;
}

TEST(FCFusePass, basic) {
// inputs operator output
// --------------------------------------------------------
Expand Down Expand Up @@ -50,6 +68,8 @@ TEST(FCFusePass, basic) {

std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
auto pass = PassRegistry::Instance().Get("fc_fuse_pass");
pass->Set("use_gpu", new bool(true));
graph->Set("__param_scope__", CreateParamScope());
int num_nodes_before = graph->Nodes().size();
int num_mul_nodes_before = GetNumOpNodes(graph, "mul");
VLOG(3) << DebugString(graph);
Expand Down
37 changes: 22 additions & 15 deletions paddle/fluid/inference/tests/api/CMakeLists.txt
Expand Up @@ -27,10 +27,14 @@ function(download_model_and_data install_dir model_name data_name)
download_data(${install_dir} ${data_name})
endfunction()

function(download_result install_dir result_name)
download_data(${install_dir} ${result_name})
endfunction()

function(inference_analysis_api_test target install_dir filename)
inference_analysis_test(${target} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt)
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt --refer_result=${install_dir}/result.txt)
endfunction()

function(inference_analysis_api_test_build TARGET_NAME filename)
Expand Down Expand Up @@ -72,13 +76,6 @@ function(inference_analysis_api_test_with_fake_data_run TARGET_NAME test_binary
--disable_mkldnn_fc=${disable_fc})
endfunction()

function(inference_analysis_api_test_with_refer_result target install_dir filename)
inference_analysis_test(${target} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt
--refer_result=${install_dir}/result.txt)
endfunction()

function(inference_analysis_api_qat_test_run TARGET_NAME test_binary fp32_model_dir int8_model_dir data_path)
inference_analysis_test_run(${TARGET_NAME}
COMMAND ${test_binary}
Expand Down Expand Up @@ -147,6 +144,20 @@ set(PYRAMID_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/pyramid_dnn")
download_model_and_data(${PYRAMID_DNN_INSTALL_DIR} "PyramidDNN_model.tar.gz" "PyramidDNN_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR} analyzer_pyramid_dnn_tester.cc)

#Ernie
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" "Ernie_data.txt.tar.gz" "Ernie_result.txt.tar.gz")
download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz")
inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)

#Ernie large
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_Large")
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_large_model.tar.gz" "Ernie_large_data.txt.tar.gz" "Ernie_large_result.txt.tar.gz")
download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz")
inference_analysis_test(test_analyzer_ernie_large SRCS analyzer_ernie_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
ARGS --infer_model=${ERNIE_INSTALL_DIR}/model --infer_data=${ERNIE_INSTALL_DIR}/data.txt --refer_result=${ERNIE_INSTALL_DIR}/result.txt --ernie_large=true)

# text_classification
set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification")
download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz")
Expand All @@ -170,14 +181,14 @@ set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
if (NOT EXISTS ${OCR_INSTALL_DIR})
inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Focr.tar.gz")
endif()
inference_analysis_api_test_with_refer_result(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)
inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)

# mobilenet with transpose op
set(MOBILENET_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet")
if (NOT EXISTS ${MOBILENET_INSTALL_DIR})
inference_download_and_uncompress(${MOBILENET_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Fmobilenet.tar.gz")
endif()
inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc)
inference_analysis_api_test(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc)

### Image classification tests with fake data
set(IMG_CLASS_TEST_APP "test_analyzer_image_classification")
Expand Down Expand Up @@ -334,13 +345,9 @@ inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${RESNET50_MODEL_DIR}/model)

set(CAPI_MODEL_INSTALL_PD_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_mobilenet")
if (NOT EXISTS ${CAPI_MODEL_INSTALL_PD_DIR})
inference_download_and_uncompress(${CAPI_MODEL_INSTALL_PD_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Fmobilenet.tar.gz")
endif()
inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${CAPI_MODEL_INSTALL_PD_DIR}/model)
ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model)

if(WITH_MKLDNN)
inference_analysis_test(test_analyzer_capi_int SRCS analyzer_capi_int_tester.cc
Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
Expand Up @@ -153,7 +153,6 @@ void profile(bool use_mkldnn = false, bool use_ngraph = false) {

if (use_mkldnn) {
config.EnableMKLDNN();
config.pass_builder()->AppendPass("fc_mkldnn_pass");
}

if (use_ngraph) {
Expand Down Expand Up @@ -193,7 +192,6 @@ void compare(bool use_mkldnn = false, bool use_ngraph = false) {
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
}

if (use_ngraph) {
Expand Down

0 comments on commit 3df38f5

Please sign in to comment.