From 28942cddcc23d27df15b1009305d4b92c8a89337 Mon Sep 17 00:00:00 2001 From: wangliu Date: Tue, 29 May 2018 13:48:06 +0800 Subject: [PATCH 01/26] refine unit test --- src/framework/op_registry.h | 8 ------ src/io.cpp | 36 ++++-------------------- src/operators/kernel/arm/conv_kernel.cpp | 11 ++------ test/executor_for_test.h | 6 ++-- test/operators/test_sigmoid_op.cpp | 4 --- test/operators/test_softmax_op.cpp | 2 +- 6 files changed, 11 insertions(+), 56 deletions(-) diff --git a/src/framework/op_registry.h b/src/framework/op_registry.h index 233de642be7..62398dcb15d 100644 --- a/src/framework/op_registry.h +++ b/src/framework/op_registry.h @@ -90,14 +90,6 @@ class OpRegistry { const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap attrs, std::shared_ptr scope) { - LOG(paddle_mobile::kLOG_DEBUG1) << " type: " << type; - LOG(paddle_mobile::kLOG_DEBUG1) << " input size: " << inputs.size(); - LOG(paddle_mobile::kLOG_DEBUG1) << " output size: " << outputs.size(); - LOG(paddle_mobile::kLOG_DEBUG1) << " attr size: " << attrs.size(); - LOG(paddle_mobile::kLOG_DEBUG1) - << " OpInfoMap size: " << OpInfoMap::Instance()->map().size(); - LOG(paddle_mobile::kLOG_DEBUG1) << " has type: " << type << " " - << OpInfoMap::Instance()->Has(type); auto& info = OpInfoMap::Instance()->Get(type); auto op = info.Creator()(type, inputs, outputs, attrs, scope); return std::shared_ptr>(op); diff --git a/src/io.cpp b/src/io.cpp index f03e9a56e2d..d92eec510e3 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -45,7 +45,7 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { printf("%s \n", file_name); FILE *fp; fp = fopen(file_name, "rb"); - PADDLE_MOBILE_ENFORCE(fp != NULL, "open failed !"); + PADDLE_MOBILE_ENFORCE(fp != NULL, " %s open failed !", file_name); fseek(fp, 0, SEEK_END); size_t size = ftell(fp); @@ -210,7 +210,7 @@ const framework::Program Loader::Load( tensor->Resize(framework::make_ddim(dim)); } else { auto dim = var_desc->Tensor_desc().Dims(); - PADDLE_MOBILE_ENFORCE(dim.size() > 1, "dim size is 0"); + PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0"); dim[0] = 1; auto tensor = var->GetMutable(); tensor->Resize(framework::make_ddim(dim)); @@ -380,7 +380,8 @@ void Executor::InitMemory() { program_.model_path + "/" + var_desc->Name()); } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { - auto tensor = var->template GetMutable(); + auto tensor = var->template GetMutable(); + tensor->template mutable_data(); } } @@ -388,44 +389,17 @@ void Executor::InitMemory() { } } -template -std::shared_ptr Executor::predict( - framework::Tensor &t) { - // feed - auto scope = program_.scope; - framework::Variable *g_feed_value = scope->Var("pixel"); - auto tensor = g_feed_value->GetMutable(); - tensor->ShareDataWith(t); - - framework::Variable *con_output = scope->Var("conv2d_0.tmp_0"); - framework::Tensor *output_tensor = - con_output->GetMutable(); - output_tensor->mutable_data({1, 16, 32, 32}); - // std::cout << typeid(output_tensor).name() << std::endl; - // std::cout << "output_tensor dims: " << output_tensor->dims() << - // std::endl; - - std::shared_ptr out_tensor = - std::make_shared(); - out_tensor.reset(output_tensor); - - predict(t, 0); - return out_tensor; -} - template void Executor::predict(const framework::Tensor &t, int block_id) { framework::Variable *g_feed_value = program_.scope->Var("feed"); auto feed_tensor = g_feed_value->GetMutable(); feed_tensor->Resize(t.dims()); - feed_tensor->ShareDataWith(t); - std::shared_ptr to_predict_block = to_predict_program_->Block(block_id); for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { auto op = ops_of_block_[*to_predict_block.get()][j]; - op->Run(); + op->Run(); } } diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp index c8ac141f9ca..51d99605774 100644 --- a/src/operators/kernel/arm/conv_kernel.cpp +++ b/src/operators/kernel/arm/conv_kernel.cpp @@ -44,13 +44,13 @@ void ConvKernel::Compute(const ConvParam ¶m) const { std::vector paddings = param.Paddings(); std::vector dilations = param.Dilations(); - DLOG << " compute end get Attrs " << strides[0]; +// DLOG << " compute end get Attrs " << strides[0]; const int batch_size = static_cast(input->dims()[0]); std::vector filter_shape_vec(framework::vectorize(filter.dims())); - std::vector output_shape_vec(framework::vectorize(output->dims())); + std::vector output_shape_vec(framework::vectorize(output->dims())); size_t data_dim = filter_shape_vec.size() - 2; std::vector col_shape_vec(1 + 2 * data_dim); col_shape_vec[0] = input->dims()[1] / groups; @@ -71,8 +71,6 @@ void ConvKernel::Compute(const ConvParam ¶m) const { col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } - DLOG << " col_shape = " << col_shape; - DLOG << " col_matrix_shape = " << col_matrix_shape; framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); @@ -80,8 +78,6 @@ void ConvKernel::Compute(const ConvParam ¶m) const { framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - DLOG << " filter.deims() = " << filter.dims(); - framework::DDim output_matrix_shape = { output->dims()[1], output->numel() / (output->dims()[0] * output->dims()[1])}; @@ -118,9 +114,6 @@ void ConvKernel::Compute(const ConvParam ¶m) const { // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - DLOG << " out_slice " << out_slice.dims(); - DLOG << " filter_slice " << filter_slice.dims(); - DLOG << " col_matrix " << col_matrix.dims(); math::matmul(filter_slice, false, col_matrix, false, static_cast(1), &out_slice, static_cast(0)); diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 35bc71f1101..045658cbfc8 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -77,13 +77,13 @@ class Executor4Test : public Executor { const DDim &dDim) { auto scope = this->program_.scope; Variable *g_feed_value = scope->Var(input); - auto tensor = g_feed_value->GetMutable(); + auto tensor = g_feed_value->GetMutable(); tensor->ShareDataWith(t); Variable *con_output = scope->Var(output); - auto *output_tensor = con_output->GetMutable(); + auto *output_tensor = con_output->GetMutable(); output_tensor->mutable_data(dDim); - std::shared_ptr out_tensor = std::make_shared(); + std::shared_ptr out_tensor = std::make_shared(); out_tensor.reset(output_tensor); std::shared_ptr to_predict_block = diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp index e053ca1e904..adf03761327 100644 --- a/test/operators/test_sigmoid_op.cpp +++ b/test/operators/test_sigmoid_op.cpp @@ -19,16 +19,12 @@ limitations under the License. */ int main() { paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor output; - DLOG << 1; SetupTensor(&input, {1, 4, 60, 60}, static_cast(0), static_cast(1)); - DLOG << 2; auto out_ddim = paddle_mobile::framework::make_ddim({1, 4, 60, 60}); output.Resize(out_ddim); - DLOG << 3; paddle_mobile::operators::sigmoid(&input, &output); - DLOG << 4; auto *output_ptr = output.data(); for (int j = 0; j < output.numel(); ++j) { DLOG << " value of output: " << output_ptr[j]; diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp index 5dd42e83e3c..ed5a1a49f55 100644 --- a/test/operators/test_softmax_op.cpp +++ b/test/operators/test_softmax_op.cpp @@ -18,7 +18,7 @@ limitations under the License. */ int main() { paddle_mobile::Loader loader; - auto program = loader.Load(std::string("models/mobilenet")); + auto program = loader.Load(std::string("../models/mobilenet")); if (program.originProgram == nullptr) { DLOG << "program read file"; } From d5230003971cdabae4c36521b389e1c93fe1e222 Mon Sep 17 00:00:00 2001 From: wangliu Date: Tue, 29 May 2018 13:55:25 +0800 Subject: [PATCH 02/26] modify code style --- src/io.cpp | 2 +- src/operators/kernel/arm/conv_kernel.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io.cpp b/src/io.cpp index d92eec510e3..e0df6e732ec 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -399,7 +399,7 @@ void Executor::predict(const framework::Tensor &t, int block_id) { to_predict_program_->Block(block_id); for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { auto op = ops_of_block_[*to_predict_block.get()][j]; - op->Run(); + op->Run(); } } diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp index 51d99605774..35ee1950329 100644 --- a/src/operators/kernel/arm/conv_kernel.cpp +++ b/src/operators/kernel/arm/conv_kernel.cpp @@ -44,7 +44,7 @@ void ConvKernel::Compute(const ConvParam ¶m) const { std::vector paddings = param.Paddings(); std::vector dilations = param.Dilations(); -// DLOG << " compute end get Attrs " << strides[0]; + // DLOG << " compute end get Attrs " << strides[0]; const int batch_size = static_cast(input->dims()[0]); From 030daef95b2c2cae0efc44c065f27366eae158ab Mon Sep 17 00:00:00 2001 From: eclipsess Date: Tue, 29 May 2018 15:56:32 +0800 Subject: [PATCH 03/26] update excutor for test --- src/framework/tensor.h | 3 +- src/io.cpp | 11 +- src/operators/kernel/arm/conv_kernel.cpp | 3 +- src/operators/op_param.h | 12 +- test/CMakeLists.txt | 4 +- test/executor_for_test.h | 40 +++-- test/operators/test_batchnorm_op.cpp | 12 +- test/operators/test_box_coder_op.cpp | 8 +- test/operators/test_concat_op.cpp | 188 ++++++-------------- test/operators/test_elementwise_add_op.cpp | 151 ++++------------ test/operators/test_fushion_fc_op.cpp | 16 +- test/operators/test_lrn_op.cpp | 131 ++++---------- test/operators/test_mul_op.cpp | 189 ++++++--------------- test/operators/test_multiclass_nms_op.cpp | 6 +- test/operators/test_prior_box_op.cpp | 8 +- test/operators/test_relu_op.cpp | 37 ++-- test/test_helper.h | 6 + 17 files changed, 278 insertions(+), 547 deletions(-) diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 7fdb52c435c..b6a7c724ad1 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -219,7 +219,8 @@ class Tensor { inline void check_memory_size() const { PADDLE_MOBILE_ENFORCE( - holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); + holder_ != nullptr, + "Tensor holds no memory. Call Tensor::mutable_data first."); PADDLE_MOBILE_ENFORCE( numel() * SizeOfType(type()) <= memory_size(), "Tensor's dims_ is out of bound. CallTensor::mutable_data " diff --git a/src/io.cpp b/src/io.cpp index f03e9a56e2d..271a3190ae0 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -210,7 +210,7 @@ const framework::Program Loader::Load( tensor->Resize(framework::make_ddim(dim)); } else { auto dim = var_desc->Tensor_desc().Dims(); - PADDLE_MOBILE_ENFORCE(dim.size() > 1, "dim size is 0"); + PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0"); dim[0] = 1; auto tensor = var->GetMutable(); tensor->Resize(framework::make_ddim(dim)); @@ -221,7 +221,7 @@ const framework::Program Loader::Load( } } - // originProgramDesc->Description("program: "); + originProgramDesc->Description("program: "); paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); return program; @@ -380,7 +380,7 @@ void Executor::InitMemory() { program_.model_path + "/" + var_desc->Name()); } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { - auto tensor = var->template GetMutable(); + auto tensor = var->template GetMutable(); tensor->template mutable_data(); } } @@ -416,7 +416,8 @@ std::shared_ptr Executor::predict( template void Executor::predict(const framework::Tensor &t, int block_id) { framework::Variable *g_feed_value = program_.scope->Var("feed"); - auto feed_tensor = g_feed_value->GetMutable(); + framework::Tensor *feed_tensor = + g_feed_value->GetMutable(); feed_tensor->Resize(t.dims()); feed_tensor->ShareDataWith(t); @@ -434,7 +435,7 @@ std::vector::Ptype> Executor::predict( const std::vector &input, const std::vector &dims) { DLOG << "start predict: "; - framework::Tensor tensor; + framework::LoDTensor tensor; auto ddim = framework::make_ddim(dims); auto input_ptr = tensor.mutable_data(ddim); diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp index c8ac141f9ca..7a566b6ac8f 100644 --- a/src/operators/kernel/arm/conv_kernel.cpp +++ b/src/operators/kernel/arm/conv_kernel.cpp @@ -38,7 +38,6 @@ void ConvKernel::Compute(const ConvParam ¶m) const { Tensor filter = *param.Filter(); Tensor *output = param.Output(); output->mutable_data(); - int groups = param.Groups(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); @@ -80,7 +79,7 @@ void ConvKernel::Compute(const ConvParam ¶m) const { framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - DLOG << " filter.deims() = " << filter.dims(); + DLOG << " filter.dims() = " << filter.dims(); framework::DDim output_matrix_shape = { output->dims()[1], diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 5ac6fc67af5..02bda7147aa 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -207,7 +207,7 @@ class ConvParam : OpParam { const Tensor *Input() const { return input_; } - const LoDTensor *Filter() const { return filter_; } + const Tensor *Filter() const { return filter_; } Tensor *Output() const { return output_; } @@ -222,7 +222,7 @@ class ConvParam : OpParam { private: Tensor *input_; Tensor *output_; - LoDTensor *filter_; + Tensor *filter_; vector strides_; vector paddings_; vector dilations_; @@ -717,10 +717,10 @@ class FushionFcParam : public OpParam { public: FushionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, const Scope &scope) { - input_x_ = InputXFrom(inputs, scope); - input_y_ = InputYFrom(inputs, scope); - input_z_ = InputZFrom(inputs, scope); - out_ = OutFrom(outputs, scope); + input_x_ = InputXFrom(inputs, scope); + input_y_ = InputYFrom(inputs, scope); + input_z_ = InputZFrom(inputs, scope); + out_ = OutFrom(outputs, scope); x_num_col_dims_ = GetAttr("x_num_col_dims", attrs); y_num_col_dims_ = GetAttr("y_num_col_dims", attrs); axis_ = GetAttr("axis", attrs); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 20d6cfe7a78..f464c3bd94f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -11,11 +11,11 @@ ADD_EXECUTABLE(test-mul-op operators/test_mul_op.cpp test_helper.h test_includ target_link_libraries(test-mul-op paddle-mobile) # gen test -ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h) +ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h) target_link_libraries(test-elementwiseadd-op paddle-mobile) # gen test -ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h) +ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h) target_link_libraries(test-concat-op paddle-mobile) # gen test diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 35bc71f1101..a54a8bb191a 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -21,6 +21,7 @@ limitations under the License. */ #include "common/log.h" #include "framework/op_registry.h" #include "operators/conv_op.h" +#include "operators/elementwise_add_op.h" #include "operators/pool_op.h" #include "operators/relu_op.h" #include "operators/reshape_op.h" @@ -37,6 +38,7 @@ using paddle_mobile::framework::Program; using paddle_mobile::framework::Tensor; using paddle_mobile::framework::Variable; using std::string; +using std::vector; template class Executor4Test : public Executor { public: @@ -73,18 +75,34 @@ class Executor4Test : public Executor { } } - std::shared_ptr predict(const Tensor &t, string input, string output, - const DDim &dDim) { + template + vector> predict(const vector &ts, + const vector &input_names, + const vector &output_names, + const vector &ddims) { auto scope = this->program_.scope; - Variable *g_feed_value = scope->Var(input); - auto tensor = g_feed_value->GetMutable(); - tensor->ShareDataWith(t); + size_t input_size = input_names.size(); + size_t out_size = output_names.size(); - Variable *con_output = scope->Var(output); - auto *output_tensor = con_output->GetMutable(); - output_tensor->mutable_data(dDim); - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); + vector input_vars(input_size); + vector input_tensors(input_size); + for (int i = 0; i < input_size; i++) { + input_vars[i] = scope->Var(input_names[i]); + input_tensors[i] = input_vars[i]->GetMutable(); + input_tensors[i]->ShareDataWith(ts[i]); + } + + vector output_vars(out_size); + vector output_tensors(out_size); + vector> output_tensor_sptrs(out_size); + + for (int i = 0; i < out_size; i++) { + output_vars[i] = scope->Var(output_names[i]); + output_tensors[i] = output_vars[i]->GetMutable(); + output_tensors[i]->mutable_data(ddims[i]); + output_tensor_sptrs[i] = std::make_shared(); + output_tensor_sptrs[i].reset(output_tensors[i]); + } std::shared_ptr to_predict_block = this->to_predict_program_->Block(0); @@ -94,6 +112,6 @@ class Executor4Test : public Executor { op->Run(); } - return out_tensor; + return output_tensor_sptrs; } }; diff --git a/test/operators/test_batchnorm_op.cpp b/test/operators/test_batchnorm_op.cpp index 385617317df..ba2e06b80b4 100644 --- a/test/operators/test_batchnorm_op.cpp +++ b/test/operators/test_batchnorm_op.cpp @@ -68,27 +68,27 @@ class TestBatchNormOp { // feed auto scope = program_.scope; Variable *x1_feed_value = scope->Var("conv2d_0.tmp_0"); - auto tensor_x1 = x1_feed_value->GetMutable(); + auto tensor_x1 = x1_feed_value->GetMutable(); tensor_x1->ShareDataWith(t1); Variable *mean_feed_value = scope->Var("batch_norm_0.w_1"); - auto tensor_mean = mean_feed_value->GetMutable(); + auto tensor_mean = mean_feed_value->GetMutable(); tensor_mean->ShareDataWith(t2); Variable *scale_feed_value = scope->Var("batch_norm_0.w_0"); - auto tensor_scale = scale_feed_value->GetMutable(); + auto tensor_scale = scale_feed_value->GetMutable(); tensor_scale->ShareDataWith(t3); Variable *variance_feed_value = scope->Var("batch_norm_0.w_2"); - auto tensor_variance = variance_feed_value->GetMutable(); + auto tensor_variance = variance_feed_value->GetMutable(); tensor_variance->ShareDataWith(t4); Variable *bias_feed_value = scope->Var("batch_norm_0.b_0"); - auto tensor_bias = bias_feed_value->GetMutable(); + auto tensor_bias = bias_feed_value->GetMutable(); tensor_bias->ShareDataWith(t5); Variable *output = scope->Var("batch_norm_0.tmp_2"); - auto *output_tensor = output->GetMutable(); + auto *output_tensor = output->GetMutable(); output_tensor->mutable_data({4, 10, 2, 2}); // DLOG << typeid(output_tensor).name(); // DLOG << "output_tensor dims: " << output_tensor->dims(); diff --git a/test/operators/test_box_coder_op.cpp b/test/operators/test_box_coder_op.cpp index dea59e8bf2c..b7695c91dfb 100644 --- a/test/operators/test_box_coder_op.cpp +++ b/test/operators/test_box_coder_op.cpp @@ -62,19 +62,19 @@ class TestBoxCoderOp { // feed auto scope = program_.scope; Variable *prior_box = scope->Var("concat_0.tmp_0"); - auto tensor_x1 = prior_box->GetMutable(); + auto tensor_x1 = prior_box->GetMutable(); tensor_x1->ShareDataWith(t1); Variable *prior_box_var = scope->Var("concat_1.tmp_0"); - auto tensor_x2 = prior_box_var->GetMutable(); + auto tensor_x2 = prior_box_var->GetMutable(); tensor_x2->ShareDataWith(t2); Variable *target_box = scope->Var("concat_2.tmp_0"); - auto tensor_x3 = target_box->GetMutable(); + auto tensor_x3 = target_box->GetMutable(); tensor_x3->ShareDataWith(t3); Variable *boxes_output = scope->Var("box_coder_0.tmp_0"); - auto *boxes_output_tensor = boxes_output->GetMutable(); + auto *boxes_output_tensor = boxes_output->GetMutable(); boxes_output_tensor->mutable_data({1, 1917, 4}); // DLOG << typeid(output_tensor).name(); diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp index 205274ea7ab..a9bb072f1e9 100644 --- a/test/operators/test_concat_op.cpp +++ b/test/operators/test_concat_op.cpp @@ -12,148 +12,64 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#pragma once +#include "../executor_for_test.h" #include "../test_include.h" #include "operators/concat_op.h" -namespace paddle_mobile { -namespace framework { - -template -class TestConcatOp { - public: - explicit TestConcatOp(const Program p) : program_(p) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - - const std::vector> blocks = - to_predict_program_->Blocks(); - // DLOG << " **block size " << blocks.size(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - // DLOG << " ops " << ops.size(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - if (op->Type() == "concat" && op->Input("X")[0] == "conv2d_3.tmp_1") { - DLOG << " mul attr size: " << op->GetAttrMap().size(); - DLOG << " inputs size: " << op->GetInputs().size(); - DLOG << " outputs size: " << op->GetOutputs().size(); - DLOG << " Input X is : " << op->Input("X")[0]; - DLOG << " Output Out is : " << op->Output("Out")[0]; - DLOG << " axis : " << op->GetAttrMap().at("axis").Get(); - - std::shared_ptr> concat = - std::make_shared>( - op->Type(), op->GetInputs(), op->GetOutputs(), - op->GetAttrMap(), program_.scope); - ops_of_block_[*block_desc.get()].push_back(concat); - } - } - } - } - - std::shared_ptr predict_concat(const Tensor &t1, const Tensor &t2, - const Tensor &t3, const Tensor &t4) { - // feed - auto scope = program_.scope; - Variable *x1_feed_value = scope->Var("conv2d_3.tmp_1"); - auto tensor_x1 = x1_feed_value->GetMutable(); - tensor_x1->ShareDataWith(t1); - - Variable *x2_feed_value = scope->Var("conv2d_5.tmp_1"); - auto tensor_x2 = x2_feed_value->GetMutable(); - tensor_x2->ShareDataWith(t2); - - Variable *x3_feed_value = scope->Var("conv2d_7.tmp_1"); - auto tensor_x3 = x3_feed_value->GetMutable(); - tensor_x3->ShareDataWith(t3); - - Variable *x4_feed_value = scope->Var("conv2d_8.tmp_1"); - auto tensor_x4 = x4_feed_value->GetMutable(); - tensor_x4->ShareDataWith(t4); - - Variable *con_output = scope->Var("concat_0.tmp_0"); - auto *output_tensor = con_output->GetMutable(); - output_tensor->mutable_data({4, 100, 2, 2}); - // DLOG << typeid(output_tensor).name(); - // DLOG << "output_tensor dims: " << output_tensor->dims(); - - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); - - predict_concat(t1, t2, t3, t4, 0); - return out_tensor; - } - - private: - const framework::Program program_; - std::shared_ptr to_predict_program_; - std::map>>> - ops_of_block_; - bool use_optimize_ = false; - - void predict_concat(const Tensor &t1, const Tensor &t2, const Tensor &t3, - const Tensor &t4, int block_id) { - std::shared_ptr to_predict_block = - to_predict_program_->Block(block_id); - for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { - auto op = ops_of_block_[*to_predict_block.get()][j]; - DLOG << "op -> run()"; - op->Run(); - } - } -}; - -template class TestConcatOp; -} // namespace framework -} // namespace paddle_mobile - int main() { - DLOG << "----------**********----------"; - DLOG << "begin to run ConcatOp Test"; paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../../test/models/googlenet")); - - /// input x (4,10,2,2) - paddle_mobile::framework::Tensor inputx1; - SetupTensor(&inputx1, {4, 10, 2, 2}, static_cast(0), - static_cast(1)); - auto *inputx1_ptr = inputx1.data(); - /// input x (4,20,2,2) - paddle_mobile::framework::Tensor inputx2; - SetupTensor(&inputx2, {4, 20, 2, 2}, static_cast(0), - static_cast(1)); - auto *inputx2_ptr = inputx2.data(); - /// input x (4,30,2,2) - paddle_mobile::framework::Tensor inputx3; - SetupTensor(&inputx3, {4, 30, 2, 2}, static_cast(0), - static_cast(1)); - auto *inputx3_ptr = inputx3.data(); - /// input x (4,40,2,2) - paddle_mobile::framework::Tensor inputx4; - SetupTensor(&inputx4, {4, 40, 2, 2}, static_cast(0), - static_cast(1)); - auto *inputx4_ptr = inputx4.data(); - - paddle_mobile::framework::TestConcatOp testConcatOp( - program); - - auto output_concat = - testConcatOp.predict_concat(inputx1, inputx2, inputx3, inputx4); - auto *output_concat_ptr = output_concat->data(); - + auto program = loader.Load(g_googlenet); + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); + + Executor4Test> + executor(program, "concat"); + + // 1. input_tensors; + vector input_tensors; + + Tensor input1; + auto input1_data = CreateInput(&input1, {4, 10, 2, 2}, 0, 1); + input_tensors.push_back(input1); + Tensor input2; + auto input2_data = CreateInput(&input2, {4, 20, 2, 2}, 0, 1); + input_tensors.push_back(input2); + Tensor input3; + auto input3_data = CreateInput(&input3, {4, 30, 2, 2}, 0, 1); + input_tensors.push_back(input3); + Tensor input4; + auto input4_data = CreateInput(&input4, {4, 40, 2, 2}, 0, 1); + input_tensors.push_back(input4); + // 2. input_names + vector input_names({ + "conv2d_3.tmp_1", + "conv2d_5.tmp_1", + "conv2d_7.tmp_1", + "conv2d_8.tmp_1", + }); + + // 3. output_names + vector output_names({"concat_0.tmp_0"}); + + // 4. out_dims; + vector out_ddims; + auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2}); + out_ddims.push_back(out_ddim); + + auto output = executor.predict(input_tensors, input_names, + output_names, out_ddims); + + auto output0_data = output[0]->data(); + + // 5. test one example. int input_n = 1; int input_c = 2; int input_h = 0; int input_w = 1; - int stride0 = inputx3.numel() / inputx3.dims()[0]; - int stride1 = inputx3.numel() / inputx3.dims()[0] / inputx3.dims()[1]; - int stride2 = inputx3.dims()[3]; + int stride0 = input3.numel() / input3.dims()[0]; + int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1]; + int stride2 = input3.dims()[3]; /// inputx1 (4,10,2,2), /// inputx2 (4,20,2,2), /// inputx3 (4,30,2,2), @@ -163,10 +79,10 @@ int main() { int input_index = input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w; int output_index = input_n * 100 * 2 * 2 + - (input_c + inputx1.dims()[1] + inputx2.dims()[1]) * 2 * 2 + + (input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 + input_h * 2 + input_w; - DLOG << " inputx3[1,2,0,1] = " << inputx3_ptr[input_index]; - DLOG << " output[1,12,0,1] = " << output_concat_ptr[output_index]; + DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index]; + DLOG << " output [1,32,0,1] = " << output0_data[output_index]; return 0; } diff --git a/test/operators/test_elementwise_add_op.cpp b/test/operators/test_elementwise_add_op.cpp index eeb642a3f48..1b4bf457a2c 100644 --- a/test/operators/test_elementwise_add_op.cpp +++ b/test/operators/test_elementwise_add_op.cpp @@ -12,133 +12,52 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#pragma once +#include "../executor_for_test.h" #include "../test_include.h" -#include "operators/elementwise_add_op.h" -namespace paddle_mobile { -namespace framework { - -template -class TestElementwiseAddOp { - public: - explicit TestElementwiseAddOp(const Program p) : program_(p) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - - const std::vector> blocks = - to_predict_program_->Blocks(); - // DLOG << " **block size " << blocks.size(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - // DLOG << " ops " << ops.size(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - if (op->Type() == "elementwise_add" && - op->Input("X")[0] == "batch_norm_2.tmp_2") { - DLOG << " elementwise_add attr size: " << op->GetAttrMap().size(); - DLOG << " inputs size: " << op->GetInputs().size(); - DLOG << " outputs size: " << op->GetOutputs().size(); - DLOG << " Input X is : " << op->Input("X")[0]; - DLOG << " Input Y is : " << op->Input("Y")[0]; - DLOG << " Output Out is : " << op->Output("Out")[0]; - Attribute axis_attr = op->GetAttrMap().at("axis"); - int axis = axis_attr.Get(); - DLOG << " Attr axis is : " << axis; - - std::shared_ptr> add = - std::make_shared>( - op->Type(), op->GetInputs(), op->GetOutputs(), - op->GetAttrMap(), program_.scope); - ops_of_block_[*block_desc.get()].push_back(add); - } - } - } - } - - std::shared_ptr predict_add(const Tensor &t1, const Tensor &t2) { - // feed - auto scope = program_.scope; - Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2"); - auto tensor_x = x_feed_value->GetMutable(); - tensor_x->ShareDataWith(t1); - - Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3"); - auto tensor_y = y_feed_value->GetMutable(); - tensor_y->ShareDataWith(t2); - - Variable *con_output = scope->Var("elementwise_add_0.tmp_0"); - auto *output_tensor = con_output->GetMutable(); - output_tensor->mutable_data({1, 3, 224, 224}); - // DLOG << typeid(output_tensor).name(); - // DLOG << "output_tensor dims: " << output_tensor->dims(); +int main() { + paddle_mobile::Loader loader; + auto program = loader.Load(g_resnet); + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); + Executor4Test> + executor(program, "elementwise_add"); - predict_add(t1, t2, 0); - return out_tensor; - } + // 1. input_tensors; + vector input_tensors; - private: - const framework::Program program_; - std::shared_ptr to_predict_program_; - std::map>>> - ops_of_block_; - bool use_optimize_ = false; + Tensor input1; + auto input1_data = CreateInput(&input1, {1, 3, 224, 224}, 0, 1); + input_tensors.push_back(input1); - void predict_add(const Tensor &t1, const Tensor &t2, int block_id) { - std::shared_ptr to_predict_block = - to_predict_program_->Block(block_id); - for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { - auto op = ops_of_block_[*to_predict_block.get()][j]; - DLOG << "op -> run()"; - op->Run(); - } - } -}; + Tensor input2; + auto input2_data = CreateInput(&input2, {224}, 0, 1); + input_tensors.push_back(input2); -template class TestElementwiseAddOp; -} // namespace framework -} // namespace paddle_mobile -int main() { - DLOG << "----------**********----------"; - DLOG << "begin to run ElementAddOp Test"; - paddle_mobile::Loader loader; - auto program = - loader.Load(std::string("../models/" - "image_classification_resnet.inference.model")); + // 2. input_names + vector input_names({ + "batch_norm_2.tmp_2", + "batch_norm_0.tmp_3", + }); - /// input x (1,3,224,224) - paddle_mobile::framework::Tensor inputx; - SetupTensor(&inputx, {1, 3, 224, 224}, static_cast(0), - static_cast(1)); - auto *inputx_ptr = inputx.data(); - /// input y (224,) - paddle_mobile::framework::Tensor inputy; - SetupTensor(&inputy, {224}, static_cast(0), - static_cast(1)); - auto *inputy_ptr = inputy.data(); + // 3. output_names + vector output_names({"elementwise_add_0.tmp_0"}); - paddle_mobile::framework::TestElementwiseAddOp - testElementwiseAddOp(program); + // 4. out_dims; + vector out_ddims; + auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224}); + out_ddims.push_back(out_ddim); - auto output_add = testElementwiseAddOp.predict_add(inputx, inputy); - auto *output_add_ptr = output_add->data(); - // for (int j = 0; j < output_add->numel(); ++j) { - // DLOG << "value of output: " << output_add_ptr[j]; - // } + auto output = executor.predict(input_tensors, input_names, + output_names, out_ddims); + auto output0_data = output[0]->data(); /// output (1,3,224,224) - DLOG << "output memory size : " << output_add->memory_size(); - DLOG << "output numel : " << output_add->numel(); + DLOG << "output memory size : " << output[0]->memory_size(); + DLOG << "output numel : " << output[0]->numel(); - DLOG << inputx_ptr[226] << " + " << inputy_ptr[2] << " = " - << output_add_ptr[226]; - return 0; + DLOG << input1_data[226] << " + " << input2_data[2] << " = " + << output0_data[226]; } diff --git a/test/operators/test_fushion_fc_op.cpp b/test/operators/test_fushion_fc_op.cpp index b52989b2e8b..6063772d85a 100644 --- a/test/operators/test_fushion_fc_op.cpp +++ b/test/operators/test_fushion_fc_op.cpp @@ -64,24 +64,24 @@ class TestFcOp { // feed auto scope = program_.scope; Variable *x_feed_value = scope->Var("pool2d_13.tmp_0"); - auto tensor_x = x_feed_value->GetMutable(); + auto tensor_x = x_feed_value->GetMutable(); tensor_x->ShareDataWith(t1); Variable *y_feed_value = scope->Var("loss3_classifier-loc_weights"); - auto tensor_y = y_feed_value->GetMutable(); + auto tensor_y = y_feed_value->GetMutable(); tensor_y->ShareDataWith(t2); Variable *z_feed_value = scope->Var("loss3_classifier-loc_biases"); - auto tensor_z = z_feed_value->GetMutable(); + auto tensor_z = z_feed_value->GetMutable(); tensor_z->ShareDataWith(t3); Variable *con_output = scope->Var("loss3_classifier-loc.tmp_1"); - auto *output_tensor = con_output->GetMutable(); + auto *output_tensor = con_output->GetMutable(); output_tensor->mutable_data({3, 10}); // DLOG << typeid(output_tensor).name(); // DLOG << "output_tensor dims: " << output_tensor->dims(); - std::shared_ptr out_tensor = std::make_shared(); + std::shared_ptr out_tensor = std::make_shared(); out_tensor.reset(output_tensor); predict(t1, t2, t3, 0); @@ -130,17 +130,17 @@ int main() { } /// input x (1,3,224,224) - paddle_mobile::framework::Tensor inputx; + paddle_mobile::framework::LoDTensor inputx; SetupTensor(&inputx, {3, 64, 1, 1}, static_cast(1), static_cast(1)); auto *inputx_ptr = inputx.data(); /// input y (224,) - paddle_mobile::framework::Tensor inputy; + paddle_mobile::framework::LoDTensor inputy; SetupTensor(&inputy, {64, 10}, static_cast(1.5), static_cast(1.5)); auto *inputy_ptr = inputy.data(); - paddle_mobile::framework::Tensor inputz; + paddle_mobile::framework::LoDTensor inputz; SetupTensor(&inputz, {10}, static_cast(0), static_cast(1)); auto *inputz_ptr = inputz.data(); diff --git a/test/operators/test_lrn_op.cpp b/test/operators/test_lrn_op.cpp index 2284b38abc3..ba35639fb71 100644 --- a/test/operators/test_lrn_op.cpp +++ b/test/operators/test_lrn_op.cpp @@ -12,118 +12,51 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#pragma once +#include "../executor_for_test.h" #include "../test_include.h" #include "operators/lrn_op.h" -namespace paddle_mobile { -namespace framework { - -template -class TestLrnOp { - public: - explicit TestLrnOp(const Program p) : program_(p) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - - const std::vector> blocks = - to_predict_program_->Blocks(); - // DLOG << " **block size " << blocks.size(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - // DLOG << " ops " << ops.size(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - if (op->Type() == "lrn" && op->Input("X")[0] == "pool2d_0.tmp_0") { - DLOG << " mul attr size: " << op->GetAttrMap().size(); - DLOG << " inputs size: " << op->GetInputs().size(); - DLOG << " outputs size: " << op->GetOutputs().size(); - DLOG << " Input X is : " << op->Input("X")[0]; - DLOG << " Output Out is : " << op->Output("Out")[0]; - DLOG << " n : " << op->GetAttrMap().at("n").Get(); - DLOG << " alpha : " << op->GetAttrMap().at("alpha").Get(); - DLOG << " beta : " << op->GetAttrMap().at("beta").Get(); - DLOG << " k : " << op->GetAttrMap().at("k").Get(); - std::shared_ptr> lrn = - std::make_shared>( - op->Type(), op->GetInputs(), op->GetOutputs(), - op->GetAttrMap(), program_.scope); - ops_of_block_[*block_desc.get()].push_back(lrn); - } - } - } - } - - std::shared_ptr predict_lrn(const Tensor &t1) { - // feed - auto scope = program_.scope; - Variable *x1_feed_value = scope->Var("pool2d_0.tmp_0"); - auto tensor_x1 = x1_feed_value->GetMutable(); - tensor_x1->ShareDataWith(t1); - - Variable *con_output = scope->Var("pool1_norm1.tmp_1"); - auto *output_tensor = con_output->GetMutable(); - output_tensor->mutable_data({3, 4, 2, 2}); - // DLOG << typeid(output_tensor).name(); - // DLOG << "output_tensor dims: " << output_tensor->dims(); - - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); +int main() { + paddle_mobile::Loader loader; + auto program = loader.Load(g_googlenet); + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); - predict_lrn(t1, 0); - return out_tensor; - } + Executor4Test> + executor(program, "lrn"); - private: - const framework::Program program_; - std::shared_ptr to_predict_program_; - std::map>>> - ops_of_block_; - bool use_optimize_ = false; + // 1. input_tensors; + vector input_tensors; - void predict_lrn(const Tensor &t1, int block_id) { - std::shared_ptr to_predict_block = - to_predict_program_->Block(block_id); - for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { - auto op = ops_of_block_[*to_predict_block.get()][j]; - DLOG << "op -> run()"; - op->Run(); - } - } -}; + Tensor input1; + auto input1_data = CreateInput(&input1, {3, 4, 2, 2}, 0, 1); + input_tensors.push_back(input1); -template class TestLrnOp; -} // namespace framework -} // namespace paddle_mobile + // 2. input_names + vector input_names({ + "pool2d_0.tmp_0", + }); -int main() { - DLOG << "----------**********----------"; - DLOG << "begin to run LrnOp Test"; - paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../../test/models/googlenet")); + // 3. output_names + vector output_names({"pool1_norm1.tmp_1"}); - /// input x (3,4,2,2) - paddle_mobile::framework::Tensor inputx1; - SetupTensor(&inputx1, {3, 4, 2, 2}, static_cast(0), - static_cast(1)); - auto *inputx1_ptr = inputx1.data(); + // 4. out_dims; + vector out_ddims; + auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2}); + out_ddims.push_back(out_ddim); - paddle_mobile::framework::TestLrnOp testLrnOp(program); + auto output = executor.predict(input_tensors, input_names, + output_names, out_ddims); - auto output_lrn = testLrnOp.predict_lrn(inputx1); - auto *output_lrn_ptr = output_lrn->data(); + auto output0_data = output[0]->data(); DLOG << " LrnOp input: "; for (int i = 0; i < 3; i++) { for (int j = 0; j < 4; j++) { for (int c = 0; c < 2; c++) { for (int d = 0; d < 2; d++) { - DLOGF("%f ", inputx1_ptr[i * 16 + j * 4 + c * 2 + d]); + DLOGF("%f ", input1_data[i * 16 + j * 4 + c * 2 + d]); } DLOGF("\n"); } @@ -136,7 +69,7 @@ int main() { for (int j = 0; j < 4; j++) { for (int c = 0; c < 2; c++) { for (int d = 0; d < 2; d++) { - DLOGF("%f ", output_lrn_ptr[i * 16 + j * 4 + c * 2 + d]); + DLOGF("%f ", output0_data[i * 16 + j * 4 + c * 2 + d]); } DLOGF("\n"); } @@ -144,8 +77,8 @@ int main() { } DLOGF("\n"); } - DLOG << inputx1_ptr[0] << " / ((1 + 0.00002 * ( " << inputx1_ptr[0] << "^2 + " - << inputx1_ptr[4] << "^2 + " << inputx1_ptr[8] << "^2 ))^0.75) = "; - DLOG << output_lrn_ptr[0]; + DLOG << input1_data[0] << " / ((1 + 0.00002 * ( " << input1_data[0] << "^2 + " + << input1_data[4] << "^2 + " << input1_data[8] << "^2 ))^0.75) = "; + DLOG << output0_data[0]; return 0; } diff --git a/test/operators/test_mul_op.cpp b/test/operators/test_mul_op.cpp index 061a942ed85..8acd4a99470 100644 --- a/test/operators/test_mul_op.cpp +++ b/test/operators/test_mul_op.cpp @@ -12,158 +12,81 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#pragma once +#include "../executor_for_test.h" #include "../test_include.h" #include "operators/mul_op.h" -namespace paddle_mobile { -namespace framework { - -template -class TestMulOp { - public: - explicit TestMulOp(const Program p) : program_(p) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - - const std::vector> blocks = - to_predict_program_->Blocks(); - // DLOG << " **block size " << blocks.size(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - // DLOG << " ops " << ops.size(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - if (op->Type() == "mul" && op->Input("X")[0] == "pool2d_0.tmp_0") { - DLOG << " mul attr size: " << op->GetAttrMap().size(); - DLOG << " inputs size: " << op->GetInputs().size(); - DLOG << " outputs size: " << op->GetOutputs().size(); - DLOG << " Input X is : " << op->Input("X")[0]; - DLOG << " Input Y is : " << op->Input("Y")[0]; - DLOG << " Output Out is : " << op->Output("Out")[0]; - DLOG << "x_num_col_dims : " - << op->GetAttrMap().at("x_num_col_dims").Get(); - DLOG << "y_num_col_dims : " - << op->GetAttrMap().at("y_num_col_dims").Get(); - - std::shared_ptr> mul = - std::make_shared>( - op->Type(), op->GetInputs(), op->GetOutputs(), - op->GetAttrMap(), program_.scope); - ops_of_block_[*block_desc.get()].push_back(mul); - } - } - } - } - - std::shared_ptr predict_mul(const Tensor &t1, const Tensor &t2) { - // feed - auto scope = program_.scope; - Variable *x_feed_value = scope->Var("pool2d_0.tmp_0"); - auto tensor_x = x_feed_value->GetMutable(); - tensor_x->ShareDataWith(t1); - - Variable *y_feed_value = scope->Var("fc_0.w_0"); - auto tensor_y = y_feed_value->GetMutable(); - tensor_y->ShareDataWith(t2); - - Variable *con_output = scope->Var("fc_0.tmp_0"); - auto *output_tensor = con_output->GetMutable(); - output_tensor->mutable_data({3, 3}); - // DLOG << typeid(output_tensor).name(); - // DLOG << "output_tensor dims: " << output_tensor->dims(); - - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); - - predict_mul(t1, t2, 0); - return out_tensor; - } - - private: - const framework::Program program_; - std::shared_ptr to_predict_program_; - std::map>>> - ops_of_block_; - bool use_optimize_ = false; - - void predict_mul(const Tensor &t1, const Tensor &t2, int block_id) { - std::shared_ptr to_predict_block = - to_predict_program_->Block(block_id); - for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { - auto op = ops_of_block_[*to_predict_block.get()][j]; - DLOG << "op -> run()"; - op->Run(); - } - } -}; - -template class TestMulOp; -} // namespace framework -} // namespace paddle_mobile - int main() { - DLOG << "----------**********----------"; - DLOG << "begin to run MulOp Test"; paddle_mobile::Loader loader; - auto program = - loader.Load(std::string("../../test/models/" - "image_classification_resnet.inference.model")); - - /// input x (3,2,1,1) - paddle_mobile::framework::Tensor inputx; - SetupTensor(&inputx, {3, 2, 1, 1}, static_cast(0), - static_cast(1)); - auto *inputx_ptr = inputx.data(); - - /// input y (2,3) - paddle_mobile::framework::Tensor inputy; - SetupTensor(&inputy, {2, 3}, static_cast(0), - static_cast(1)); - auto *inputy_ptr = inputy.data(); - - paddle_mobile::framework::TestMulOp testMulOp(program); - - auto output_mul = testMulOp.predict_mul(inputx, inputy); - auto *output_mul_ptr = output_mul->data(); - - auto dimx_1 = inputx.numel() / inputx.dims()[0]; - DLOG << " inputx : "; - for (int i = 0; i < inputx.dims()[0]; ++i) { - for (int j = 0; j < dimx_1; ++j) { - DLOGF("%f ", inputx_ptr[i * dimx_1 + j]); + auto program = loader.Load(g_resnet); + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); + + Executor4Test> + executor(program, "mul"); + + // 1. input_tensors; + vector input_tensors; + + Tensor input1; + auto input1_data = CreateInput(&input1, {3, 2, 1, 1}, 0, 1); + input_tensors.push_back(input1); + Tensor input2; + auto input2_data = CreateInput(&input2, {2, 3}, 0, 1); + input_tensors.push_back(input2); + + // 2. input_names + vector input_names({ + "pool2d_0.tmp_0", + "fc_0.w_0", + }); + + // 3. output_names + vector output_names({"fc_0.tmp_0"}); + + // 4. out_dims; + vector out_ddims; + auto out_ddim = paddle_mobile::framework::make_ddim({3, 3}); + out_ddims.push_back(out_ddim); + + auto output = executor.predict(input_tensors, input_names, + output_names, out_ddims); + + auto output0_data = output[0]->data(); + + auto dim_1 = input1.numel() / input1.dims()[0]; + DLOG << " input1 : "; + for (int i = 0; i < input1.dims()[0]; ++i) { + for (int j = 0; j < dim_1; ++j) { + DLOGF("%f ", input1_data[i * dim_1 + j]); } DLOGF("\n"); } - auto dimy_1 = inputy.numel() / inputy.dims()[0]; - DLOG << " inputy : "; - for (int i = 0; i < inputy.dims()[0]; ++i) { - for (int j = 0; j < dimy_1; ++j) { - DLOGF("%f ", inputy_ptr[i * dimx_1 + j]); + auto dim_2 = input2.numel() / input2.dims()[0]; + DLOG << " input2 : "; + for (int i = 0; i < input2.dims()[0]; ++i) { + for (int j = 0; j < dim_2; ++j) { + DLOGF("%f ", input2_data[i * dim_2 + j]); } DLOGF("\n"); } - auto dim_output_1 = output_mul->numel() / output_mul->dims()[0]; + auto dim_output0 = output[0]->numel() / output[0]->dims()[0]; DLOG << " output : "; - for (int i = 0; i < output_mul->dims()[0]; ++i) { - for (int j = 0; j < dim_output_1; ++j) { - DLOGF("%f ", output_mul_ptr[i * dimy_1 + j]); + for (int i = 0; i < output[0]->dims()[0]; ++i) { + for (int j = 0; j < dim_output0; ++j) { + DLOGF("%f ", output0_data[i * dim_2 + j]); } DLOGF("\n"); } /// output (3,3) - DLOG << "output memory size : " << output_mul->memory_size(); - DLOG << "output numel : " << output_mul->numel(); + DLOG << "output memory size : " << output[0]->memory_size(); + DLOG << "output numel : " << output[0]->numel(); - DLOG << inputx_ptr[0] << " x " << inputy_ptr[0] << " + " << inputx_ptr[1] - << " x " << inputy_ptr[0 + 3] << " = " << output_mul_ptr[0]; + DLOG << input1_data[0] << " x " << input2_data[0] << " + " << input1_data[1] + << " x " << input2_data[0 + 3] << " = " << output0_data[0]; return 0; } diff --git a/test/operators/test_multiclass_nms_op.cpp b/test/operators/test_multiclass_nms_op.cpp index 01ad72b9bbc..e6c41bd4b3b 100644 --- a/test/operators/test_multiclass_nms_op.cpp +++ b/test/operators/test_multiclass_nms_op.cpp @@ -77,15 +77,15 @@ class TestMultiClassNMSOp { // feed auto scope = program_.scope; Variable *x1_feed_value = scope->Var("box_coder_0.tmp_0"); - auto tensor_x1 = x1_feed_value->GetMutable(); + auto tensor_x1 = x1_feed_value->GetMutable(); tensor_x1->ShareDataWith(t1); Variable *x2_feed_value = scope->Var("transpose_12.tmp_0"); - auto tensor_x2 = x2_feed_value->GetMutable(); + auto tensor_x2 = x2_feed_value->GetMutable(); tensor_x2->ShareDataWith(t2); Variable *output = scope->Var("detection_output_0.tmp_0"); - auto *output_tensor = output->GetMutable(); + auto *output_tensor = output->GetMutable(); output_tensor->mutable_data({1917, 6}); // DLOG << typeid(output_tensor).name(); diff --git a/test/operators/test_prior_box_op.cpp b/test/operators/test_prior_box_op.cpp index e365c4ed851..80ede944936 100644 --- a/test/operators/test_prior_box_op.cpp +++ b/test/operators/test_prior_box_op.cpp @@ -72,19 +72,19 @@ class TestPriorBoxOp { // feed auto scope = program_.scope; Variable *x1_feed_value = scope->Var("image"); - auto tensor_x1 = x1_feed_value->GetMutable(); + auto tensor_x1 = x1_feed_value->GetMutable(); tensor_x1->ShareDataWith(t1); Variable *x2_feed_value = scope->Var("batch_norm_26.tmp_3"); - auto tensor_x2 = x2_feed_value->GetMutable(); + auto tensor_x2 = x2_feed_value->GetMutable(); tensor_x2->ShareDataWith(t2); Variable *boxes_output = scope->Var("prior_box_1.tmp_0"); - auto *boxes_output_tensor = boxes_output->GetMutable(); + auto *boxes_output_tensor = boxes_output->GetMutable(); boxes_output_tensor->mutable_data({10, 10, 6, 4}); Variable *variances_output = scope->Var("prior_box_1.tmp_1"); - auto *variances_output_tesnor = variances_output->GetMutable(); + auto *variances_output_tesnor = variances_output->GetMutable(); variances_output_tesnor->mutable_data({10, 10, 6, 4}); // DLOG << typeid(output_tensor).name(); // DLOG << "output_tensor dims: " << output_tensor->dims(); diff --git a/test/operators/test_relu_op.cpp b/test/operators/test_relu_op.cpp index 6fefb0368be..fb68b921113 100644 --- a/test/operators/test_relu_op.cpp +++ b/test/operators/test_relu_op.cpp @@ -14,12 +14,11 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_include.h" +#include "operators/relu_op.h" int main() { paddle_mobile::Loader loader; - // ../models/image_classification_resnet.inference.model - auto program = loader.Load(g_mobilenet_ssd); - + auto program = loader.Load(g_resnet); PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, "program file read fail"); @@ -27,17 +26,33 @@ int main() { paddle_mobile::operators::ReluOp> executor(program, "relu"); - paddle_mobile::framework::Tensor input; - SetupTensor(&input, {1, 2, 3, 4}, static_cast(-1), - static_cast(1)); + // 1. input_tensors; + vector input_tensors; + + Tensor input1; + auto input1_data = CreateInput(&input1, {1, 2, 3, 4}, -1, 1); + input_tensors.push_back(input1); + + // 2. input_names + vector input_names({ + "batch_norm_0.tmp_2", + }); + // 3. output_names + vector output_names({"batch_norm_0.tmp_3"}); + + // 4. out_dims; + vector out_ddims; auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4}); - auto output = executor.predict(input, "batch_norm_0.tmp_2", - "batch_norm_0.tmp_3", out_ddim); + out_ddims.push_back(out_ddim); + + auto output = executor.predict(input_tensors, input_names, + output_names, out_ddims); + + auto output0_data = output[0]->data(); - auto output_ptr = output->data(); - for (int j = 0; j < output->numel(); ++j) { - DLOG << " value of output: " << output_ptr[j]; + for (int j = 0; j < output[0]->numel(); ++j) { + DLOG << " value of output: " << output0_data[j]; } return 0; } diff --git a/test/test_helper.h b/test/test_helper.h index e2d6a183cb7..c0c301840fa 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -43,6 +43,12 @@ void SetupTensor(paddle_mobile::framework::Tensor *input, } } +template +T *CreateInput(Tensor *input, DDim dims, T low, T up) { + SetupTensor(input, dims, static_cast(low), static_cast(up)); + return input->data(); +} + template void GetInput(const std::string &input_name, std::vector *input, const std::vector &dims) { From 0d837d7adb7c95e6f46ccd8f93682bb33d90fece Mon Sep 17 00:00:00 2001 From: eclipsess Date: Tue, 29 May 2018 16:13:38 +0800 Subject: [PATCH 04/26] code style --- test/test_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_helper.h b/test/test_helper.h index 6b5d8335db0..029ed9742f6 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -29,8 +29,8 @@ static const std::string g_resnet = "../models/image_classification_resnet.inference.model"; static const std::string g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; -using paddle_mobile::framework::Tensor; using paddle_mobile::framework::DDim; +using paddle_mobile::framework::Tensor; template void SetupTensor(paddle_mobile::framework::Tensor *input, paddle_mobile::framework::DDim dims, T lower, T upper) { From 9f94c0e916958fa2a950496705612d288ea2f5f7 Mon Sep 17 00:00:00 2001 From: eclipsess Date: Tue, 29 May 2018 18:14:50 +0800 Subject: [PATCH 05/26] restore test for single input and output --- test/executor_for_test.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/executor_for_test.h b/test/executor_for_test.h index a54a8bb191a..89b54617826 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -114,4 +114,29 @@ class Executor4Test : public Executor { return output_tensor_sptrs; } + + std::shared_ptr predict(const Tensor &t, string input, string output, + const DDim &dDim) { + auto scope = this->program_.scope; + Variable *g_feed_value = scope->Var(input); + auto tensor = g_feed_value->GetMutable(); + tensor->ShareDataWith(t); + + Variable *con_output = scope->Var(output); + auto *output_tensor = con_output->GetMutable(); + output_tensor->mutable_data(dDim); + + std::shared_ptr out_tensor = std::make_shared(); + out_tensor.reset(output_tensor); + + std::shared_ptr to_predict_block = + this->to_predict_program_->Block(0); + for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size(); + ++j) { + auto op = this->ops_of_block_[*to_predict_block.get()][j]; + op->Run(); + } + + return out_tensor; + } }; From 5220e87e26926f4a7272ef8df94e38e9871e2bfd Mon Sep 17 00:00:00 2001 From: wangliu Date: Tue, 29 May 2018 20:29:56 +0800 Subject: [PATCH 06/26] modify operator interface for printing tensor array --- src/framework/operator.cpp | 36 +++++++++- src/framework/operator.h | 11 ++- src/framework/tensor.h | 102 ++++++++++++++------------- src/io.cpp | 18 ++--- src/operators/batchnorm_op.h | 11 +-- src/operators/box_coder_op.h | 2 +- src/operators/concat_op.h | 10 +-- src/operators/conv_op.h | 7 +- src/operators/elementwise_add_op.h | 13 ++-- src/operators/feed_op.h | 7 +- src/operators/fetch_op.h | 11 ++- src/operators/fusion_fc_op.h | 12 ++-- src/operators/lrn_op.h | 10 +-- src/operators/mul_op.h | 6 +- src/operators/multiclass_nms_op.h | 2 +- src/operators/pool_op.h | 22 +++--- src/operators/prior_box_op.h | 2 +- src/operators/relu_op.h | 2 +- src/operators/reshape_op.h | 2 +- src/operators/sigmoid_op.h | 2 +- src/operators/softmax_op.h | 2 +- src/operators/transpose_op.h | 2 +- test/executor_for_test.h | 2 +- test/framework/test_load.cpp | 2 +- test/framework/test_optimize.cpp | 2 +- test/net/test_googlenet.cpp | 2 +- test/operators/test_pool_op.cpp | 2 +- test/operators/test_reshape_op.cpp | 2 +- test/operators/test_sigmoid_op.cpp | 2 +- test/operators/test_softmax_op.cpp | 2 +- test/operators/test_transpose_op.cpp | 2 +- test/test_include.h | 2 +- 32 files changed, 175 insertions(+), 137 deletions(-) diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 857e8ea1c77..dfdf0af79ac 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -13,11 +13,32 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "framework/operator.h" -#include "framework/op_info.h" +#include "operators/op_param.h" namespace paddle_mobile { namespace framework { +template +vector OperatorBase::GetOutKeys() const { + auto it = op_input_output_key.find(type_); + if (it == op_input_output_key.end()) { + DLOG << type_ << " has no outputs"; + } + return it->second.second; +} + +template +static T *GetVarValue(const string &key, const VariableNameMap &var_map, + const Scope &scope) { + auto var_vec = var_map.at(key); + if (!var_vec.empty()) { + auto var = scope.FindVar(var_vec[0]); + return var->GetMutable(); + } else { + return nullptr; + } +} + template OperatorBase::OperatorBase(const std::string &type, const VariableNameMap &inputs, @@ -31,9 +52,22 @@ OperatorBase::OperatorBase(const std::string &type, scope_(scope) { CheckAllInputOutputSet(); } + template void OperatorBase::CheckAllInputOutputSet() const {} +template +void OperatorBase::Run() const { + RunImpl(); +#ifdef PADDLE_MOBILE_DEBUG + vector output_keys = GetOutKeys(); + for (const auto key : output_keys) { + Tensor *out_ = GetVarValue(key, outputs_, *scope_); + DLOG << type_ << " output- " << key << "=" << *out_; + } +#endif +} + template class OperatorBase; template class OperatorWithKernel; diff --git a/src/framework/operator.h b/src/framework/operator.h index 5a40a926630..549916b9a38 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -36,6 +36,8 @@ limitations under the License. */ namespace paddle_mobile { namespace framework { +using std::string; +using std::vector; static std::unordered_map< std::string, std::pair, std::vector>> op_input_output_key = {{"conv2d", {{"Input"}, {"Output"}}}, @@ -57,7 +59,9 @@ class OperatorBase : PaddleMobileObject { const VariableNameMap &outputs, const AttributeMap &attrs, std::shared_ptr scope); virtual ~OperatorBase() {} - virtual void Run() const = 0; + void Run() const; + vector GetOutKeys() const; + virtual void RunImpl() const = 0; virtual void InferShape() const = 0; const VariableNameMap &Inputs() const { return inputs_; } @@ -88,7 +92,8 @@ class OperatorWithKernel : public OperatorBase { const VariableNameMap &outputs, const AttributeMap &attrs, std::shared_ptr scope) : OperatorBase(type, inputs, outputs, attrs, scope) {} - virtual void Run() const = 0; + + virtual void RunImpl() const = 0; virtual void InferShape() const = 0; }; @@ -113,7 +118,7 @@ class FusionOpMatcher : PaddleMobileObject { virtual std::string Type() = 0; - virtual void FolderNodes(Node &node) { + virtual void FolderNodes(const Node &node) { node.Folder(node_.Depth(), Type(), {}); } diff --git a/src/framework/tensor.h b/src/framework/tensor.h index b6a7c724ad1..674edd67733 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -18,11 +18,12 @@ limitations under the License. */ #include #include #include +#include #include #include -#include "data_layout.h" -#include "ddim.h" +#include "framework/data_layout.h" +#include "framework/ddim.h" #include "memory/t_malloc.h" namespace paddle_mobile { @@ -62,8 +63,8 @@ struct SizeOfTypeFunctor { static inline size_t SizeOfType(std::type_index type) { SizeOfTypeFunctor functor; size_t size = functor(type); - // PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", - // type.name()); + + PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name()); return size; } @@ -72,16 +73,27 @@ class LoDTensor; class Tensor { public: Tensor() : offset_(0) {} + template + Tensor(std::vector input, DDim ddim) : offset_(0) { + PADDLE_MOBILE_ENFORCE( + input.size() == framework::product(ddim), + "input vector'length should be equal to tensor's length"); + auto input_ptr = mutable_data(ddim); + for (int i = 0; i < input.size(); ++i) { + input_ptr[i] = input[i]; + } + } /*! Return a pointer to mutable memory block. */ template inline T *data() { check_memory_size(); - // PADDLE_ENFORCE(std::is_same::value || - // holder_->type().hash_code() == - // typeid(T).hash_code(), - // "Tensor holds the wrong type, it holds %s", - // this->holder_->type().name()); + PADDLE_MOBILE_ENFORCE( + (std::is_same::value || + holder_->type().hash_code() == typeid(T).hash_code()), + "Tensor holds the wrong type, it holds %s", + this->holder_->type().name()); + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + offset_); } @@ -90,11 +102,11 @@ class Tensor { template inline const T *data() const { check_memory_size(); - // PADDLE_ENFORCE(std::is_same::value || - // holder_->type().hash_code() == - // typeid(T).hash_code(), - // "Tensor holds the wrong type, it holds %s", - // this->holder_->type().name()); + PADDLE_MOBILE_ENFORCE( + (std::is_same::value || + holder_->type().hash_code() == typeid(T).hash_code()), + "Tensor holds the wrong type, it holds %s", + this->holder_->type().name()); return reinterpret_cast( reinterpret_cast(holder_->ptr()) + offset_); @@ -116,17 +128,11 @@ class Tensor { if (holder_ != nullptr) { holder_->set_type(type); } - // PADDLE_ENFORCE_GE(numel(), 0, - // "When calling this method, the Tensor's - // numel must be - // " "equal or larger than zero. " "Please - // check - // Tensor::Resize has been called first."); + PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") int64_t size = numel() * SizeOfType(type); /* some versions of boost::variant don't have operator!= */ if (holder_ == nullptr || holder_->size() < size + offset_) { holder_.reset(new PlaceholderImpl(size, type)); - offset_ = 0; } return reinterpret_cast( @@ -179,16 +185,13 @@ class Tensor { */ inline Tensor Slice(int begin_idx, int end_idx) const { check_memory_size(); - // PADDLE_ENFORCE_GE(begin_idx, 0, - // "The start row index must be greater than - // 0."); - // PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is - // out of - // bound."); PADDLE_ENFORCE_LT( - // begin_idx, end_idx, - // "The start row index must be lesser than the end row - // index."); - + PADDLE_MOBILE_ENFORCE(begin_idx >= 0, + "The start row index must be greater than 0.") + PADDLE_MOBILE_ENFORCE(end_idx <= dims_[0], + "The end row index is out of bound.") + PADDLE_MOBILE_ENFORCE( + begin_idx < end_idx, + "The start row index must be lesser than the end row index") if (dims_[0] == 1) { return *this; } else { @@ -205,10 +208,9 @@ class Tensor { } std::type_index type() const { - // PADDLE_ENFORCE_NOT_NULL( - // holder_, "Tensor not initialized yet - // when - // Tensor::type() is called."); + PADDLE_MOBILE_ENFORCE( + holder_ != nullptr, + "Tensor not initialized yet when Tensor::type() is called.") return holder_->type(); } @@ -221,12 +223,8 @@ class Tensor { PADDLE_MOBILE_ENFORCE( holder_ != nullptr, "Tensor holds no memory. Call Tensor::mutable_data first."); - PADDLE_MOBILE_ENFORCE( - numel() * SizeOfType(type()) <= memory_size(), - "Tensor's dims_ is out of bound. CallTensor::mutable_data " - "first to re-allocate memory.\n" - "or maybe the required data-type mismatches the data\ - already stored."); + PADDLE_MOBILE_ENFORCE(numel() * SizeOfType(type()) <= memory_size(), + "Tensor's dims_ is out of bound. "); } inline DataLayout layout() const { return layout_; } @@ -257,13 +255,8 @@ class Tensor { memory::PODDeleter()), size_(size), type_(type) { - // PADDLE_ENFORCE_NOT_NULL(ptr_, - // "Insufficient %s - // memory to allocation.", - // (is_cpu_place(place_) - // ? - // "CPU" : - // "GPU")); + PADDLE_MOBILE_ENFORCE(ptr_ != nullptr, + "Insufficient memory to allocation"); } virtual size_t size() const { return size_; } @@ -321,6 +314,19 @@ class Tensor { size_t offset_; }; +#ifdef PADDLE_MOBILE_DEBUG +inline Print &operator<<(Print &printer, const Tensor &tensor) { + printer << " dims: " << tensor.dims() << "\n"; + int stride = tensor.numel() / 20; + stride = stride > 0 ? stride : 1; + for (int i = 0; i < tensor.numel(); i += stride) { + printer << tensor.data()[i] << " "; + } + return printer; +} + +#endif + inline Tensor ReshapeToMatrix(const Tensor &src, int num_col_dims) { Tensor res; res.ShareDataWith(src); diff --git a/src/io.cpp b/src/io.cpp index 1c5e97bbb7e..bfb3c5a7e2b 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "io.h" +#include "/io.h" #include #include - #include "common/enforce.h" #include "common/log.h" #include "framework/framework.pb-c.h" @@ -53,7 +52,7 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { DLOG << "model size: " << size; - *out = (uint8_t *)malloc(size); + *out = reinterpret_cast(size); size_t cur_len = 0; size_t nread; @@ -364,7 +363,7 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, is.read(static_cast(memory), memory_size * type_size); is.close(); -}; +} template void Executor::InitMemory() { @@ -381,6 +380,7 @@ void Executor::InitMemory() { } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { auto tensor = var->template GetMutable(); + tensor->template mutable_data(); } } @@ -406,15 +406,7 @@ void Executor::predict(const framework::Tensor &t, int block_id) { template std::vector::Ptype> Executor::predict( const std::vector &input, const std::vector &dims) { - DLOG << "start predict: "; - - framework::LoDTensor tensor; - auto ddim = framework::make_ddim(dims); - - auto input_ptr = tensor.mutable_data(ddim); - for (int i = 0; i < input.size(); ++i) { - input_ptr[i] = input[i]; - } + framework::Tensor tensor(input, framework::make_ddim(dims)); predict(tensor, 0); diff --git a/src/operators/batchnorm_op.h b/src/operators/batchnorm_op.h index 072fbd5f424..760466eeddc 100644 --- a/src/operators/batchnorm_op.h +++ b/src/operators/batchnorm_op.h @@ -12,19 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once + +#include #include "framework/operator.h" #include "operators/kernel/batchnorm_kernel.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { - -using namespace framework; - +using std::string; template class BatchNormOp : public framework::OperatorWithKernel { public: - BatchNormOp(const std::string &type, const VariableNameMap &inputs, + BatchNormOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) @@ -32,7 +33,7 @@ class BatchNormOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::BatchNormKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/box_coder_op.h b/src/operators/box_coder_op.h index 76f4b151742..a2203e1d89f 100644 --- a/src/operators/box_coder_op.h +++ b/src/operators/box_coder_op.h @@ -36,7 +36,7 @@ class BoxCoderOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::BoxCoderKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/concat_op.h b/src/operators/concat_op.h index 611e46af6a6..15160e20a40 100644 --- a/src/operators/concat_op.h +++ b/src/operators/concat_op.h @@ -13,25 +13,25 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once + +#include #include "framework/operator.h" #include "operators/kernel/concat_kernel.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { - -using namespace framework; - +using std::string; template class ConcatOp : public framework::OperatorWithKernel { public: - ConcatOp(const std::string &type, const VariableNameMap &inputs, + ConcatOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) : framework::OperatorWithKernel(type, inputs, outputs, attrs, scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::ConcatKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/conv_op.h b/src/operators/conv_op.h index 047fa1a8e6c..1557f2f06ee 100644 --- a/src/operators/conv_op.h +++ b/src/operators/conv_op.h @@ -14,14 +14,13 @@ limitations under the License. */ #pragma once +#include #include "framework/operator.h" #include "operators/kernel/conv_kernel.h" namespace paddle_mobile { namespace operators { - -using namespace framework; - +using std::string; template class ConvOp : public framework::OperatorWithKernel { public: @@ -35,7 +34,7 @@ class ConvOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape() const override; - void Run() const { + void RunImpl() const { operators::ConvKernel kernel; kernel.Compute(param_); this->ClearVariables({"Filter", "Input"}); diff --git a/src/operators/elementwise_add_op.h b/src/operators/elementwise_add_op.h index 47fa52c4696..7dd7e147a06 100644 --- a/src/operators/elementwise_add_op.h +++ b/src/operators/elementwise_add_op.h @@ -12,19 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once + +#include #include "framework/operator.h" #include "kernel/elementwise_add_kernel.h" -#include "op_param.h" +#include "operators/op_param.h" namespace paddle_mobile { namespace operators { - -using namespace framework; - +using std::string; template class ElementwiseAddOp : public framework::OperatorWithKernel { public: - ElementwiseAddOp(const std::string &type, const VariableNameMap &inputs, + ElementwiseAddOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) @@ -32,7 +33,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::ElementwiseAddKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 426d5f6220d..25a82894ea9 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -14,22 +14,23 @@ limitations under the License. */ #pragma once +#include #include "framework/operator.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { - +using std::string; template class FeedOp : public framework::OperatorBase { public: - FeedOp(const std::string &type, const VariableNameMap &inputs, + FeedOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) : framework::OperatorBase(type, inputs, outputs, attrs, scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { param_.Out()->ShareDataWith(*param_.InputX()); } + void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); } void InferShape() const { auto out_dims = param_.Out()->dims(); diff --git a/src/operators/fetch_op.h b/src/operators/fetch_op.h index 7dddd679929..31e17f2b562 100644 --- a/src/operators/fetch_op.h +++ b/src/operators/fetch_op.h @@ -14,27 +14,24 @@ limitations under the License. */ #pragma once +#include #include "framework/operator.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { +using std::string; template class FetchOp : public framework::OperatorBase { public: - FetchOp(const std::string &type, const VariableNameMap &inputs, + FetchOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) : framework::OperatorBase(type, inputs, outputs, attrs, scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { - param_.Out()->ShareDataWith(*param_.InputX()); - for (int i = 0; i < param_.Out()->numel(); ++i) { - DLOG << param_.Out()->template data()[i]; - } - } + void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); } void InferShape() const { auto x_dims = param_.InputX()->dims(); diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index 1dd5d2bf535..6e0c50170a1 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include "framework/operator.h" #include "framework/program/program-optimize/fusion_op_register.h" @@ -22,7 +23,8 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { - +using std::string; +using std::vector; class FusionFcMatcher : public framework::FusionOpMatcher { public: FusionFcMatcher() { @@ -30,8 +32,8 @@ class FusionFcMatcher : public framework::FusionOpMatcher { node_ > std::make_shared("elementwise_add"); } - void FolderNodes(framework::Node &node) { - std::vector> origin_descs = + void FolderNodes(const framework::Node &node) { + vector> origin_descs = node.OpDescs(node_.Depth()); node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}}); } @@ -42,7 +44,7 @@ class FusionFcMatcher : public framework::FusionOpMatcher { template class FushionFcOp : public framework::OperatorWithKernel { public: - FushionFcOp(const std::string &type, const VariableNameMap &inputs, + FushionFcOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) @@ -50,7 +52,7 @@ class FushionFcOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::FushionFcKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/lrn_op.h b/src/operators/lrn_op.h index 112053b97f9..e5d98e1bb10 100644 --- a/src/operators/lrn_op.h +++ b/src/operators/lrn_op.h @@ -11,27 +11,27 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once +#include #include "framework/operator.h" #include "operators/kernel/lrn_kernel.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { - -using namespace framework; - +using std::string; template class LrnOp : public framework::OperatorWithKernel { public: - LrnOp(const std::string &type, const VariableNameMap &inputs, + LrnOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) : framework::OperatorWithKernel(type, inputs, outputs, attrs, scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::LrnKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/mul_op.h b/src/operators/mul_op.h index 8685651ea68..ded618551fc 100644 --- a/src/operators/mul_op.h +++ b/src/operators/mul_op.h @@ -11,7 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once +#include #include "framework/operator.h" #include "operators/kernel/mul_kernel.h" #include "operators/op_param.h" @@ -19,8 +21,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -using namespace framework; - template class MulOp : public framework::OperatorWithKernel { public: @@ -31,7 +31,7 @@ class MulOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::MulKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/multiclass_nms_op.h b/src/operators/multiclass_nms_op.h index 40466af6074..c424856b8cd 100644 --- a/src/operators/multiclass_nms_op.h +++ b/src/operators/multiclass_nms_op.h @@ -36,7 +36,7 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::MultiClassNMSKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/pool_op.h b/src/operators/pool_op.h index 3cc1facbef4..7195c3b4e17 100644 --- a/src/operators/pool_op.h +++ b/src/operators/pool_op.h @@ -17,25 +17,25 @@ limitations under the License. */ #include #include #include +#include namespace paddle_mobile { namespace operators { -using namespace framework; - +using framework::AttributeMap; +using framework::Scope; +using std::string; template -class PoolOp : public framework::OperatorWithKernel { +class PoolOp : public OperatorWithKernel { public: - PoolOp(const std::string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) - : framework::OperatorWithKernel(type, inputs, outputs, attrs, - scope), + PoolOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + std::shared_ptr scope) + : OperatorWithKernel(type, inputs, outputs, attrs, scope), param_(inputs, outputs, attrs, *scope) {} - using framework::OperatorWithKernel::OperatorWithKernel; + using OperatorWithKernel::OperatorWithKernel; void InferShape() const override; - void Run() const { - // InferShape(); + void RunImpl() const { operators::PoolKernel kernel; kernel.Compute(param_); this->ClearVariables({"X"}); diff --git a/src/operators/prior_box_op.h b/src/operators/prior_box_op.h index 17a583cac96..84481e602a6 100644 --- a/src/operators/prior_box_op.h +++ b/src/operators/prior_box_op.h @@ -36,7 +36,7 @@ class PriorBoxOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::PriorBoxKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/relu_op.h b/src/operators/relu_op.h index 26bee848c1b..6c3a614a1a0 100644 --- a/src/operators/relu_op.h +++ b/src/operators/relu_op.h @@ -35,7 +35,7 @@ class ReluOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::ReluKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/reshape_op.h b/src/operators/reshape_op.h index 62bcb3a6798..b244e62a930 100644 --- a/src/operators/reshape_op.h +++ b/src/operators/reshape_op.h @@ -35,7 +35,7 @@ class ReshapeOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::ReshapeKernel kernel; kernel.Compute(param_); } diff --git a/src/operators/sigmoid_op.h b/src/operators/sigmoid_op.h index ba5d3d0299f..f631ba51759 100644 --- a/src/operators/sigmoid_op.h +++ b/src/operators/sigmoid_op.h @@ -36,7 +36,7 @@ class SigmoidOp : public framework::OperatorWithKernel { void InferShape() const override; - void Run() const { + void RunImpl() const { operators::SigmoidKernel kernel; kernel.Compute(param_); this->ClearVariables({"X"}); diff --git a/src/operators/softmax_op.h b/src/operators/softmax_op.h index 550a7698f96..07fd9b945cb 100644 --- a/src/operators/softmax_op.h +++ b/src/operators/softmax_op.h @@ -36,7 +36,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { void InferShape() const override; - void Run() const { + void RunImpl() const { operators::SoftmaxKernel kernel; kernel.Compute(param_); this->ClearVariables({"X"}); diff --git a/src/operators/transpose_op.h b/src/operators/transpose_op.h index a56771b4c64..0f673395332 100644 --- a/src/operators/transpose_op.h +++ b/src/operators/transpose_op.h @@ -36,7 +36,7 @@ class TransposeOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - void Run() const { + void RunImpl() const { operators::TransposeKernel kernel; kernel.Compute(param_); } diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 89b54617826..1eac6530209 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -17,9 +17,9 @@ limitations under the License. */ #include #include -#include "./io.h" #include "common/log.h" #include "framework/op_registry.h" +#include "io/io.h" #include "operators/conv_op.h" #include "operators/elementwise_add_op.h" #include "operators/pool_op.h" diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index 0370e6d946f..cae699b792f 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp index c721c453739..6681ce83bb5 100644 --- a/test/framework/test_optimize.cpp +++ b/test/framework/test_optimize.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/program_optimize.h" -#include "io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index ee03ed0b146..363825fe726 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "../test_helper.h" #include "../test_include.h" -#include "io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp index 85c4f6106da..d92fb66efd7 100644 --- a/test/operators/test_pool_op.cpp +++ b/test/operators/test_pool_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_reshape_op.cpp b/test/operators/test_reshape_op.cpp index 7ba2faa47df..d0cb9ac2df0 100644 --- a/test/operators/test_reshape_op.cpp +++ b/test/operators/test_reshape_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "./io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp index adf03761327..4ed3efaf28a 100644 --- a/test/operators/test_sigmoid_op.cpp +++ b/test/operators/test_sigmoid_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../../src/operators/kernel/sigmoid_kernel.h" #include "../test_helper.h" -#include "./io.h" +#include "io/io.h" int main() { paddle_mobile::framework::Tensor input; diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp index ed5a1a49f55..e0a616c9a46 100644 --- a/test/operators/test_softmax_op.cpp +++ b/test/operators/test_softmax_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "./io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_transpose_op.cpp b/test/operators/test_transpose_op.cpp index ffdb34f2f50..4ca05d612b7 100644 --- a/test/operators/test_transpose_op.cpp +++ b/test/operators/test_transpose_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "./io.h" +#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/test_include.h b/test/test_include.h index 25efbb9f4c0..dd4bf5d127d 100644 --- a/test/test_include.h +++ b/test/test_include.h @@ -29,4 +29,4 @@ limitations under the License. */ #include "framework/scope.h" #include "framework/tensor.h" #include "framework/variable.h" -#include "io.h" +#include "io/io.h" From 17f097807a935a992734d3d06ae3ff75803d9f52 Mon Sep 17 00:00:00 2001 From: wangliu Date: Wed, 30 May 2018 10:52:02 +0800 Subject: [PATCH 07/26] modify code style --- src/framework/operator.h | 2 +- src/{ => io}/io.cpp | 2 +- src/{ => io}/io.h | 0 src/operators/fusion_fc_op.h | 2 +- src/operators/pool_op.h | 1 + 5 files changed, 4 insertions(+), 3 deletions(-) rename src/{ => io}/io.cpp (99%) rename src/{ => io}/io.h (100%) diff --git a/src/framework/operator.h b/src/framework/operator.h index 549916b9a38..a44d264a188 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -118,7 +118,7 @@ class FusionOpMatcher : PaddleMobileObject { virtual std::string Type() = 0; - virtual void FolderNodes(const Node &node) { + virtual void FolderNodes(Node &node) { node.Folder(node_.Depth(), Type(), {}); } diff --git a/src/io.cpp b/src/io/io.cpp similarity index 99% rename from src/io.cpp rename to src/io/io.cpp index bfb3c5a7e2b..a773939505d 100644 --- a/src/io.cpp +++ b/src/io/io.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "/io.h" +#include "io/io.h" #include #include #include "common/enforce.h" diff --git a/src/io.h b/src/io/io.h similarity index 100% rename from src/io.h rename to src/io/io.h diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index 6e0c50170a1..0ed5a2b4d5e 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -32,7 +32,7 @@ class FusionFcMatcher : public framework::FusionOpMatcher { node_ > std::make_shared("elementwise_add"); } - void FolderNodes(const framework::Node &node) { + void FolderNodes(framework::Node &node) { vector> origin_descs = node.OpDescs(node_.Depth()); node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}}); diff --git a/src/operators/pool_op.h b/src/operators/pool_op.h index 7195c3b4e17..ff44771c561 100644 --- a/src/operators/pool_op.h +++ b/src/operators/pool_op.h @@ -22,6 +22,7 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { using framework::AttributeMap; +using framework::OperatorWithKernel; using framework::Scope; using std::string; template From 9f48fe1343ed651f71dc6f0ebcbfe790099ac018 Mon Sep 17 00:00:00 2001 From: wangliu Date: Wed, 30 May 2018 11:47:10 +0800 Subject: [PATCH 08/26] add timer for debug --- src/io/io.cpp | 421 --------------------------- src/io/io.h | 71 ----- test/executor_for_test.h | 2 +- test/framework/test_load.cpp | 2 +- test/framework/test_optimize.cpp | 2 +- test/net/test_googlenet.cpp | 12 +- test/operators/test_pool_op.cpp | 2 +- test/operators/test_reshape_op.cpp | 2 +- test/operators/test_sigmoid_op.cpp | 2 +- test/operators/test_softmax_op.cpp | 2 +- test/operators/test_transpose_op.cpp | 2 +- test/test_helper.h | 13 + test/test_include.h | 2 +- 13 files changed, 28 insertions(+), 507 deletions(-) delete mode 100644 src/io/io.cpp delete mode 100644 src/io/io.h diff --git a/src/io/io.cpp b/src/io/io.cpp deleted file mode 100644 index a773939505d..00000000000 --- a/src/io/io.cpp +++ /dev/null @@ -1,421 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "io/io.h" -#include -#include -#include "common/enforce.h" -#include "common/log.h" -#include "framework/framework.pb-c.h" -#include "framework/lod_tensor.h" -#include "framework/operator.h" -#include "framework/program/program_desc.h" -#include "framework/program/var_desc.h" -#include "framework/scope.h" -#include "framework/tensor.h" - -namespace paddle_mobile { -using framework::Variable; - -void ReadBinaryFile(const std::string &filename, std::string *contents) { - std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_MOBILE_ENFORCE(fin.is_open(), "open file: %s failed", - filename.c_str()); - fin.seekg(0, std::ios::end); - contents->clear(); - contents->resize(fin.tellg()); - fin.seekg(0, std::ios::beg); - fin.read(&(contents->at(0)), contents->size()); - fin.close(); -} - -static size_t ReadBuffer(const char *file_name, uint8_t **out) { - printf("%s \n", file_name); - FILE *fp; - fp = fopen(file_name, "rb"); - PADDLE_MOBILE_ENFORCE(fp != NULL, " %s open failed !", file_name); - - fseek(fp, 0, SEEK_END); - size_t size = ftell(fp); - rewind(fp); - - DLOG << "model size: " << size; - - *out = reinterpret_cast(size); - - size_t cur_len = 0; - size_t nread; - while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) { - cur_len += nread; - } - fclose(fp); - return cur_len; -} - -template -void Loader::LoadVar(framework::Variable *variable, - const framework::VarDesc &var_desc, - const std::string &file_path) { - auto tensor = variable->GetMutable(); - std::ifstream is(file_path); - PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed", - file_path.c_str()); - - std::fpos pos; - pos = is.tellg(); // save current position - is.seekg(0, std::ios::end); - is.seekg(pos); // restore saved position - - // 1. version - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - - // 2 Lod information - uint64_t lod_level; - is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); - auto &lod = *tensor->mutable_lod(); - lod.resize(lod_level); - for (uint64_t i = 0; i < lod_level; ++i) { - uint64_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::vector tmp(size / sizeof(size_t)); - is.read(reinterpret_cast(tmp.data()), - static_cast(size)); - for (auto j : tmp) { - LOG(kLOG_DEBUG1) << " lod - " << j; - } - lod[i] = tmp; - } - - // 3. tensor version - uint32_t tensor_version; - is.read(reinterpret_cast(&tensor_version), sizeof(tensor_version)); - - // 4. tensor desc - int32_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::unique_ptr buf(new char[size]); - is.read(reinterpret_cast(buf.get()), size); - - const framework::TensorDesc &desc = var_desc.Tensor_desc(); - - PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor_desc = NULL; - // void *v; - // PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure()(tensor_desc, - // buf.get()); - - // DLOG << "PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure- " << - // tensor_desc; - - // framework::TensorDesc &tensor_desc = variable-> - // PaddleMobile__Framework__Proto__ProgramDesc *c_program; - // uint8_t *proto_buf = NULL; - // size_t read_size = ReadBuffer(file_path.c_str(), &proto_buf); - // c_program = paddle_mobile__framework__proto__program_desc__unpack(NULL, - // read_size, buf); - - // paddle_mobile__framework__proto__var_type__tensor_desc__init() - - int memory_size = 1; - for (auto l : desc.Dims()) { - memory_size *= l; - } - - tensor->Resize(framework::make_ddim(desc.Dims())); - - void *memory = tensor; - int type_size = 0; - switch (desc.DataType()) { - case framework::VARTYPE_TYPE_FP16: - type_size = 2; - break; - case framework::VARTYPE_TYPE_FP32: - type_size = 4; - memory = tensor->mutable_data(); - break; - case framework::VARTYPE_TYPE_FP64: - type_size = 8; - break; - case framework::VARTYPE_TYPE_INT32: - type_size = 4; - break; - case framework::VARTYPE_TYPE_INT64: - type_size = 8; - break; - case framework::VARTYPE_TYPE_BOOL: - type_size = 1; - break; - default: - break; - } - - is.read(static_cast(memory), memory_size * type_size); - is.close(); -} - -template -const framework::Program Loader::Load( - const std::string &dirname) { - std::string model_filename = dirname + "/__model__"; - PaddleMobile__Framework__Proto__ProgramDesc *c_program; - uint8_t *buf = NULL; - size_t read_size = ReadBuffer(model_filename.c_str(), &buf); - - PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null"); - - c_program = paddle_mobile__framework__proto__program_desc__unpack( - NULL, read_size, buf); - - PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null"); - - DLOG << "n_ops: " << (*c_program->blocks)->n_ops; - - std::shared_ptr originProgramDesc = - std::make_shared(c_program); - - framework::Program program; - program.model_path = dirname; - program.originProgram = originProgramDesc; - - std::shared_ptr scope = - std::make_shared(); - program.scope = scope; - originProgramDesc->Block(0); - - for (const auto &block : originProgramDesc->Blocks()) { - for (int i = 0; i < block->Vars().size(); ++i) { - std::shared_ptr var_desc = block->Vars()[i]; - // DLOG << "var name-- " << var_desc->Name(); - auto var = scope->Var(var_desc->Name()); - - if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { - if (var_desc->Persistable() && - var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH && - var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) { - // DLOG << "to load var "; - auto dim = var_desc->Tensor_desc().Dims(); - auto tensor = var->GetMutable(); - tensor->Resize(framework::make_ddim(dim)); - } else { - auto dim = var_desc->Tensor_desc().Dims(); - PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0"); - dim[0] = 1; - auto tensor = var->GetMutable(); - tensor->Resize(framework::make_ddim(dim)); - } - } else { - // TODO(codeWorm): some. - } - } - } - - originProgramDesc->Description("program: "); - - paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); - return program; -} - -template class Loader; - -#pragma mark - executor - -template -Executor::Executor(const framework::Program p) : program_(p) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - - const std::vector> blocks = - to_predict_program_->Blocks(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - auto op_base = framework::OpRegistry::CreateOp( - op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), - program_.scope); - op_base->InferShape(); - ops_of_block_[*block_desc.get()].push_back(op_base); - } - } - InitMemory(); -} - -template -Executor::Executor(const framework::Program p, int batch_size) - : program_(p), batch_size_(batch_size) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - Variable *variable_ptr = program_.scope->Var("batch_size"); - variable_ptr[0].SetValue(batch_size); - const std::vector> blocks = - to_predict_program_->Blocks(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - auto op_base = framework::OpRegistry::CreateOp( - op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), - program_.scope); - op_base->InferShape(); - - ops_of_block_[*block_desc.get()].push_back(op_base); - } - } - InitMemory(); -} - -template -void Executor::LoadMemory(const framework::VarDesc var_desc, - framework::LoDTensor *tensor, - const std::string &file_path) { - std::ifstream is(file_path); - PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed", - file_path.c_str()); - std::fpos pos; - pos = is.tellg(); // save current position - is.seekg(0, std::ios::end); - is.seekg(pos); // restore saved position - - // 1. version - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - - // 2 Lod information - uint64_t lod_level; - is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); - auto &lod = *tensor->mutable_lod(); - lod.resize(lod_level); - for (uint64_t i = 0; i < lod_level; ++i) { - uint64_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::vector tmp(size / sizeof(size_t)); - is.read(reinterpret_cast(tmp.data()), - static_cast(size)); - for (auto j : tmp) { - LOG(kLOG_DEBUG1) << " lod - " << j; - } - lod[i] = tmp; - } - - // 3. tensor version - uint32_t tensor_version; - is.read(reinterpret_cast(&tensor_version), sizeof(tensor_version)); - - // 4. tensor desc - int32_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::unique_ptr buf(new char[size]); - is.read(reinterpret_cast(buf.get()), size); - - const framework::TensorDesc &desc = var_desc.Tensor_desc(); - - int memory_size = 1; - for (auto l : desc.Dims()) { - memory_size *= l; - } - - tensor->Resize(framework::make_ddim(desc.Dims())); - - void *memory = tensor; - int type_size = 0; - switch (desc.DataType()) { - case framework::VARTYPE_TYPE_FP16: - type_size = 2; - break; - case framework::VARTYPE_TYPE_FP32: - type_size = 4; - memory = tensor->mutable_data(); - break; - case framework::VARTYPE_TYPE_FP64: - type_size = 8; - break; - case framework::VARTYPE_TYPE_INT32: - type_size = 4; - break; - case framework::VARTYPE_TYPE_INT64: - type_size = 8; - break; - case framework::VARTYPE_TYPE_BOOL: - type_size = 1; - break; - default: - break; - } - - is.read(static_cast(memory), memory_size * type_size); - is.close(); -} - -template -void Executor::InitMemory() { - for (const auto &block : to_predict_program_->Blocks()) { - for (const auto &var_desc : block->Vars()) { - auto var = program_.scope->Var(var_desc->Name()); - if (var_desc->Persistable()) { - auto tensor = var->template GetMutable(); - if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { - continue; - } - LoadMemory(*var_desc, tensor, - program_.model_path + "/" + var_desc->Name()); - } else { - if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { - auto tensor = var->template GetMutable(); - - tensor->template mutable_data(); - } - } - } - } -} - -template -void Executor::predict(const framework::Tensor &t, int block_id) { - framework::Variable *g_feed_value = program_.scope->Var("feed"); - framework::Tensor *feed_tensor = - g_feed_value->GetMutable(); - feed_tensor->Resize(t.dims()); - feed_tensor->ShareDataWith(t); - std::shared_ptr to_predict_block = - to_predict_program_->Block(block_id); - for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { - auto op = ops_of_block_[*to_predict_block.get()][j]; - op->Run(); - } -} - -template -std::vector::Ptype> Executor::predict( - const std::vector &input, const std::vector &dims) { - framework::Tensor tensor(input, framework::make_ddim(dims)); - - predict(tensor, 0); - - framework::Variable *g_feed_value = program_.scope->Var("col"); - auto feed_tensor = g_feed_value->GetMutable(); - - return {}; -} - -template class Executor; - -} // namespace paddle_mobile diff --git a/src/io/io.h b/src/io/io.h deleted file mode 100644 index 678441a9e05..00000000000 --- a/src/io/io.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "common/types.h" -#include "framework/lod_tensor.h" -#include "framework/operator.h" -#include "framework/paddle_mobile_object.h" -#include "framework/program/program.h" -#include "framework/tensor.h" - -namespace paddle_mobile { - -template -class Loader : PaddleMobileObject { - public: - const framework::Program Load(const std::string &dirname); - - private: - void LoadVar(framework::Variable *variable, - const framework::VarDesc &var_desc, - const std::string &file_path); -}; - -template -class Executor { - public: - typedef typename PrecisionTrait

::ptype Ptype; - - Executor() = default; - - Executor(const framework::Program p); - - Executor(const framework::Program p, int batch_size); - - std::shared_ptr predict(framework::Tensor &t); - - std::vector predict(const std::vector &input, - const std::vector &dims); - - protected: - void InitMemory(); - void LoadMemory(const framework::VarDesc var_desc, - framework::LoDTensor *tensor, const std::string &file_path); - framework::Program program_; - int batch_size_ = 1; - std::shared_ptr to_predict_program_; - void predict(const framework::Tensor &t, int block_id); - std::map>>> - ops_of_block_; - bool use_optimize_ = false; -}; - -} // namespace paddle_mobile diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 1eac6530209..c69eba222fb 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -17,9 +17,9 @@ limitations under the License. */ #include #include +#include "common/io.h" #include "common/log.h" #include "framework/op_registry.h" -#include "io/io.h" #include "operators/conv_op.h" #include "operators/elementwise_add_op.h" #include "operators/pool_op.h" diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index cae699b792f..fe403b55a18 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "io/io.h" +#include "common/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp index 6681ce83bb5..4c4dc6eb3ee 100644 --- a/test/framework/test_optimize.cpp +++ b/test/framework/test_optimize.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "common/io.h" #include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/program_optimize.h" -#include "io/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index 363825fe726..d52f080277a 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -13,25 +13,25 @@ See the License for the specific language governing permissions and limitations under the License. */ #include - #include "../test_helper.h" #include "../test_include.h" -#include "io/io.h" int main() { paddle_mobile::Loader loader; // ../../../test/models/googlenet // ../../../test/models/mobilenet + auto time1 = time(); auto program = loader.Load(std::string("../models/googlenet")); - + auto time2 = time(); + DLOG << "load cost :" << time_diff(time1, time1) << "ms"; paddle_mobile::Executor executor(program, 1); std::vector input; std::vector dims{1, 3, 224, 224}; GetInput(g_test_image_1x3x224x224, &input, dims); - - // DLOG << " input: " << input; + auto time3 = time(); executor.predict(input, dims); - + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; return 0; } diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp index d92fb66efd7..8a1c0a7ccec 100644 --- a/test/operators/test_pool_op.cpp +++ b/test/operators/test_pool_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "io/io.h" +#include "common/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_reshape_op.cpp b/test/operators/test_reshape_op.cpp index d0cb9ac2df0..b0251e693a7 100644 --- a/test/operators/test_reshape_op.cpp +++ b/test/operators/test_reshape_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "io/io.h" +#include "common/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp index 4ed3efaf28a..dcd35cd8e46 100644 --- a/test/operators/test_sigmoid_op.cpp +++ b/test/operators/test_sigmoid_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../../src/operators/kernel/sigmoid_kernel.h" #include "../test_helper.h" -#include "io/io.h" +#include "common/io.h" int main() { paddle_mobile::framework::Tensor input; diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp index e0a616c9a46..094c48adbb6 100644 --- a/test/operators/test_softmax_op.cpp +++ b/test/operators/test_softmax_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "io/io.h" +#include "common/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/operators/test_transpose_op.cpp b/test/operators/test_transpose_op.cpp index 4ca05d612b7..23e3bc3ec47 100644 --- a/test/operators/test_transpose_op.cpp +++ b/test/operators/test_transpose_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "io/io.h" +#include "common/io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/test_helper.h b/test/test_helper.h index 029ed9742f6..dba4dec9bbc 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include @@ -31,6 +32,18 @@ static const std::string g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; using paddle_mobile::framework::DDim; using paddle_mobile::framework::Tensor; + +using Time = decltype(std::chrono::high_resolution_clock::now()); + +Time time() { return std::chrono::high_resolution_clock::now(); } + +double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count() / 1000.0; +} + template void SetupTensor(paddle_mobile::framework::Tensor *input, paddle_mobile::framework::DDim dims, T lower, T upper) { diff --git a/test/test_include.h b/test/test_include.h index dd4bf5d127d..19a9bff8846 100644 --- a/test/test_include.h +++ b/test/test_include.h @@ -20,6 +20,7 @@ limitations under the License. */ #include "./test_helper.h" #include "common/enforce.h" +#include "common/io.h" #include "common/log.h" #include "framework/lod_tensor.h" #include "framework/operator.h" @@ -29,4 +30,3 @@ limitations under the License. */ #include "framework/scope.h" #include "framework/tensor.h" #include "framework/variable.h" -#include "io/io.h" From abc98f5c6c29ebbeac695f282358aff7cb86cab3 Mon Sep 17 00:00:00 2001 From: wangliu Date: Wed, 30 May 2018 13:02:53 +0800 Subject: [PATCH 09/26] commit io files --- src/common/io.cpp | 421 ++++++++++++++++++++++++++++++++++++++++++++++ src/common/io.h | 71 ++++++++ 2 files changed, 492 insertions(+) create mode 100644 src/common/io.cpp create mode 100644 src/common/io.h diff --git a/src/common/io.cpp b/src/common/io.cpp new file mode 100644 index 00000000000..4f9309aa108 --- /dev/null +++ b/src/common/io.cpp @@ -0,0 +1,421 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "io.h" +#include +#include +#include "common/enforce.h" +#include "common/log.h" +#include "framework/framework.pb-c.h" +#include "framework/lod_tensor.h" +#include "framework/operator.h" +#include "framework/program/program_desc.h" +#include "framework/program/var_desc.h" +#include "framework/scope.h" +#include "framework/tensor.h" + +namespace paddle_mobile { +using framework::Variable; + +void ReadBinaryFile(const std::string &filename, std::string *contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + PADDLE_MOBILE_ENFORCE(fin.is_open(), "open file: %s failed", + filename.c_str()); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); +} + +static size_t ReadBuffer(const char *file_name, uint8_t **out) { + printf("%s \n", file_name); + FILE *fp; + fp = fopen(file_name, "rb"); + PADDLE_MOBILE_ENFORCE(fp != NULL, " %s open failed !", file_name); + + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + rewind(fp); + + DLOG << "model size: " << size; + + *out = reinterpret_cast(malloc(size)); + + size_t cur_len = 0; + size_t nread; + while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) { + cur_len += nread; + } + fclose(fp); + return cur_len; +} + +template +void Loader::LoadVar(framework::Variable *variable, + const framework::VarDesc &var_desc, + const std::string &file_path) { + auto tensor = variable->GetMutable(); + std::ifstream is(file_path); + PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed", + file_path.c_str()); + + std::fpos pos; + pos = is.tellg(); // save current position + is.seekg(0, std::ios::end); + is.seekg(pos); // restore saved position + + // 1. version + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + + // 2 Lod information + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + is.read(reinterpret_cast(tmp.data()), + static_cast(size)); + for (auto j : tmp) { + LOG(kLOG_DEBUG1) << " lod - " << j; + } + lod[i] = tmp; + } + + // 3. tensor version + uint32_t tensor_version; + is.read(reinterpret_cast(&tensor_version), sizeof(tensor_version)); + + // 4. tensor desc + int32_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + is.read(reinterpret_cast(buf.get()), size); + + const framework::TensorDesc &desc = var_desc.Tensor_desc(); + + PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor_desc = NULL; + // void *v; + // PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure()(tensor_desc, + // buf.get()); + + // DLOG << "PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure- " << + // tensor_desc; + + // framework::TensorDesc &tensor_desc = variable-> + // PaddleMobile__Framework__Proto__ProgramDesc *c_program; + // uint8_t *proto_buf = NULL; + // size_t read_size = ReadBuffer(file_path.c_str(), &proto_buf); + // c_program = paddle_mobile__framework__proto__program_desc__unpack(NULL, + // read_size, buf); + + // paddle_mobile__framework__proto__var_type__tensor_desc__init() + + int memory_size = 1; + for (auto l : desc.Dims()) { + memory_size *= l; + } + + tensor->Resize(framework::make_ddim(desc.Dims())); + + void *memory = tensor; + int type_size = 0; + switch (desc.DataType()) { + case framework::VARTYPE_TYPE_FP16: + type_size = 2; + break; + case framework::VARTYPE_TYPE_FP32: + type_size = 4; + memory = tensor->mutable_data(); + break; + case framework::VARTYPE_TYPE_FP64: + type_size = 8; + break; + case framework::VARTYPE_TYPE_INT32: + type_size = 4; + break; + case framework::VARTYPE_TYPE_INT64: + type_size = 8; + break; + case framework::VARTYPE_TYPE_BOOL: + type_size = 1; + break; + default: + break; + } + + is.read(static_cast(memory), memory_size * type_size); + is.close(); +} + +template +const framework::Program Loader::Load( + const std::string &dirname) { + std::string model_filename = dirname + "/__model__"; + PaddleMobile__Framework__Proto__ProgramDesc *c_program; + uint8_t *buf = NULL; + size_t read_size = ReadBuffer(model_filename.c_str(), &buf); + + PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null"); + + c_program = paddle_mobile__framework__proto__program_desc__unpack( + NULL, read_size, buf); +// + PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null"); +// + DLOG << "n_ops: " << (*c_program->blocks)->n_ops; +// + std::shared_ptr originProgramDesc = + std::make_shared(c_program); + + framework::Program program; + program.model_path = dirname; + program.originProgram = originProgramDesc; + + std::shared_ptr scope = + std::make_shared(); + program.scope = scope; + originProgramDesc->Block(0); + + for (const auto &block : originProgramDesc->Blocks()) { + for (int i = 0; i < block->Vars().size(); ++i) { + std::shared_ptr var_desc = block->Vars()[i]; + // DLOG << "var name-- " << var_desc->Name(); + auto var = scope->Var(var_desc->Name()); + + if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { + if (var_desc->Persistable() && + var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH && + var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) { + // DLOG << "to load var "; + auto dim = var_desc->Tensor_desc().Dims(); + auto tensor = var->GetMutable(); + tensor->Resize(framework::make_ddim(dim)); + } else { + auto dim = var_desc->Tensor_desc().Dims(); + PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0"); + dim[0] = 1; + auto tensor = var->GetMutable(); + tensor->Resize(framework::make_ddim(dim)); + } + } else { + // TODO(codeWorm): some. + } + } + } + + originProgramDesc->Description("program: "); + + paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); + return program; +} + +template class Loader; + +#pragma mark - executor + +template +Executor::Executor(const framework::Program p) : program_(p) { + if (use_optimize_) { + to_predict_program_ = program_.optimizeProgram; + } else { + to_predict_program_ = program_.originProgram; + } + + const std::vector> blocks = + to_predict_program_->Blocks(); + for (int i = 0; i < blocks.size(); ++i) { + std::shared_ptr block_desc = blocks[i]; + std::vector> ops = block_desc->Ops(); + for (int j = 0; j < ops.size(); ++j) { + std::shared_ptr op = ops[j]; + auto op_base = framework::OpRegistry::CreateOp( + op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), + program_.scope); + op_base->InferShape(); + ops_of_block_[*block_desc.get()].push_back(op_base); + } + } + InitMemory(); +} + +template +Executor::Executor(const framework::Program p, int batch_size) + : program_(p), batch_size_(batch_size) { + if (use_optimize_) { + to_predict_program_ = program_.optimizeProgram; + } else { + to_predict_program_ = program_.originProgram; + } + Variable *variable_ptr = program_.scope->Var("batch_size"); + variable_ptr[0].SetValue(batch_size); + const std::vector> blocks = + to_predict_program_->Blocks(); + for (int i = 0; i < blocks.size(); ++i) { + std::shared_ptr block_desc = blocks[i]; + std::vector> ops = block_desc->Ops(); + for (int j = 0; j < ops.size(); ++j) { + std::shared_ptr op = ops[j]; + auto op_base = framework::OpRegistry::CreateOp( + op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), + program_.scope); + op_base->InferShape(); + + ops_of_block_[*block_desc.get()].push_back(op_base); + } + } + InitMemory(); +} + +template +void Executor::LoadMemory(const framework::VarDesc var_desc, + framework::LoDTensor *tensor, + const std::string &file_path) { + std::ifstream is(file_path); + PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed", + file_path.c_str()); + std::fpos pos; + pos = is.tellg(); // save current position + is.seekg(0, std::ios::end); + is.seekg(pos); // restore saved position + + // 1. version + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + + // 2 Lod information + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + is.read(reinterpret_cast(tmp.data()), + static_cast(size)); + for (auto j : tmp) { + LOG(kLOG_DEBUG1) << " lod - " << j; + } + lod[i] = tmp; + } + + // 3. tensor version + uint32_t tensor_version; + is.read(reinterpret_cast(&tensor_version), sizeof(tensor_version)); + + // 4. tensor desc + int32_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + is.read(reinterpret_cast(buf.get()), size); + + const framework::TensorDesc &desc = var_desc.Tensor_desc(); + + int memory_size = 1; + for (auto l : desc.Dims()) { + memory_size *= l; + } + + tensor->Resize(framework::make_ddim(desc.Dims())); + + void *memory = tensor; + int type_size = 0; + switch (desc.DataType()) { + case framework::VARTYPE_TYPE_FP16: + type_size = 2; + break; + case framework::VARTYPE_TYPE_FP32: + type_size = 4; + memory = tensor->mutable_data(); + break; + case framework::VARTYPE_TYPE_FP64: + type_size = 8; + break; + case framework::VARTYPE_TYPE_INT32: + type_size = 4; + break; + case framework::VARTYPE_TYPE_INT64: + type_size = 8; + break; + case framework::VARTYPE_TYPE_BOOL: + type_size = 1; + break; + default: + break; + } + + is.read(static_cast(memory), memory_size * type_size); + is.close(); +} + +template +void Executor::InitMemory() { + for (const auto &block : to_predict_program_->Blocks()) { + for (const auto &var_desc : block->Vars()) { + auto var = program_.scope->Var(var_desc->Name()); + if (var_desc->Persistable()) { + auto tensor = var->template GetMutable(); + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + LoadMemory(*var_desc, tensor, + program_.model_path + "/" + var_desc->Name()); + } else { + if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { + auto tensor = var->template GetMutable(); + + tensor->template mutable_data(); + } + } + } + } +} + +template +void Executor::predict(const framework::Tensor &t, int block_id) { + framework::Variable *g_feed_value = program_.scope->Var("feed"); + framework::Tensor *feed_tensor = + g_feed_value->GetMutable(); + feed_tensor->Resize(t.dims()); + feed_tensor->ShareDataWith(t); + std::shared_ptr to_predict_block = + to_predict_program_->Block(block_id); + for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { + auto op = ops_of_block_[*to_predict_block.get()][j]; + op->Run(); + } +} + +template +std::vector::Ptype> Executor::predict( + const std::vector &input, const std::vector &dims) { + framework::Tensor tensor(input, framework::make_ddim(dims)); + + predict(tensor, 0); + + framework::Variable *g_feed_value = program_.scope->Var("col"); + auto feed_tensor = g_feed_value->GetMutable(); + + return {}; +} + +template class Executor; + +} // namespace paddle_mobile diff --git a/src/common/io.h b/src/common/io.h new file mode 100644 index 00000000000..678441a9e05 --- /dev/null +++ b/src/common/io.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "common/types.h" +#include "framework/lod_tensor.h" +#include "framework/operator.h" +#include "framework/paddle_mobile_object.h" +#include "framework/program/program.h" +#include "framework/tensor.h" + +namespace paddle_mobile { + +template +class Loader : PaddleMobileObject { + public: + const framework::Program Load(const std::string &dirname); + + private: + void LoadVar(framework::Variable *variable, + const framework::VarDesc &var_desc, + const std::string &file_path); +}; + +template +class Executor { + public: + typedef typename PrecisionTrait

::ptype Ptype; + + Executor() = default; + + Executor(const framework::Program p); + + Executor(const framework::Program p, int batch_size); + + std::shared_ptr predict(framework::Tensor &t); + + std::vector predict(const std::vector &input, + const std::vector &dims); + + protected: + void InitMemory(); + void LoadMemory(const framework::VarDesc var_desc, + framework::LoDTensor *tensor, const std::string &file_path); + framework::Program program_; + int batch_size_ = 1; + std::shared_ptr to_predict_program_; + void predict(const framework::Tensor &t, int block_id); + std::map>>> + ops_of_block_; + bool use_optimize_ = false; +}; + +} // namespace paddle_mobile From df6475ee7ac1f8e858b2f0c9bc896635c22ab7af Mon Sep 17 00:00:00 2001 From: wangliu Date: Wed, 30 May 2018 13:08:39 +0800 Subject: [PATCH 10/26] commit io files --- src/common/io.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/io.cpp b/src/common/io.cpp index 4f9309aa108..fc1466237e9 100644 --- a/src/common/io.cpp +++ b/src/common/io.cpp @@ -176,11 +176,11 @@ const framework::Program Loader::Load( c_program = paddle_mobile__framework__proto__program_desc__unpack( NULL, read_size, buf); -// + // PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null"); -// + // DLOG << "n_ops: " << (*c_program->blocks)->n_ops; -// + // std::shared_ptr originProgramDesc = std::make_shared(c_program); From 5f3f414f84ba2be8d1fb1fbf70ce975532f0f858 Mon Sep 17 00:00:00 2001 From: wangliu Date: Wed, 30 May 2018 14:29:07 +0800 Subject: [PATCH 11/26] Init tensor memory in executor_for_test --- src/framework/operator.cpp | 1 + test/executor_for_test.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index dfdf0af79ac..808002d4c8f 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -23,6 +23,7 @@ vector OperatorBase::GetOutKeys() const { auto it = op_input_output_key.find(type_); if (it == op_input_output_key.end()) { DLOG << type_ << " has no outputs"; + return {}; } return it->second.second; } diff --git a/test/executor_for_test.h b/test/executor_for_test.h index c69eba222fb..48b6b5cf3c6 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -73,6 +73,7 @@ class Executor4Test : public Executor { } } } + this->InitMemory(); } template From 29c19e4411befa52dd4c303a21c49aa52749a372 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Wed, 30 May 2018 15:30:45 +0800 Subject: [PATCH 12/26] add split, foramt codes --- src/common/types.h | 41 +++++++++++ src/framework/operator.h | 69 +++++++++++-------- .../program/program-optimize/node.cpp | 66 ++++++++++++++++-- src/framework/program/program-optimize/node.h | 4 +- .../program-optimize/program_optimize.cpp | 4 +- .../program-optimize/program_optimize.h | 1 - src/{common => }/io.cpp | 60 ++++++---------- src/{common => }/io.h | 12 ++-- src/operators/fusion_conv_add_relu_op.h | 11 ++- src/operators/fusion_fc_op.h | 8 +-- src/operators/kernel/arm/relu_kernel.cpp | 4 ++ src/operators/op_param.h | 4 +- src/operators/relu_op.cpp | 4 ++ src/operators/relu_op.h | 10 +++ test/executor_for_test.h | 6 +- test/framework/test_load.cpp | 7 +- test/framework/test_optimize.cpp | 7 +- test/net/test_googlenet.cpp | 6 +- test/operators/test_batchnorm_op.cpp | 2 +- test/operators/test_box_coder_op.cpp | 2 +- test/operators/test_concat_op.cpp | 2 +- test/operators/test_cov_op.cpp | 2 +- test/operators/test_elementwise_add_op.cpp | 2 +- test/operators/test_fushion_fc_op.cpp | 2 +- test/operators/test_lrn_op.cpp | 2 +- test/operators/test_mul_op.cpp | 2 +- test/operators/test_pool_op.cpp | 6 +- test/operators/test_prior_box_op.cpp | 2 +- test/operators/test_relu_op.cpp | 2 +- test/operators/test_reshape_op.cpp | 6 +- test/operators/test_sigmoid_op.cpp | 2 +- test/operators/test_softmax_op.cpp | 6 +- test/operators/test_transpose_op.cpp | 6 +- test/test_helper.h | 2 +- test/test_include.h | 2 +- 35 files changed, 242 insertions(+), 132 deletions(-) rename src/{common => }/io.cpp (91%) rename src/{common => }/io.h (86%) diff --git a/src/common/types.h b/src/common/types.h index ae76c953aa5..252c747d75f 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -14,6 +14,10 @@ limitations under the License. */ #pragma once; +#include +#include +#include + namespace paddle_mobile { enum class Precision : int { FP32 = 0 }; @@ -67,4 +71,41 @@ enum PMStatus { PMUnImplError = 0x07, /*!< Unimplement error. */ PMWrongDevice = 0x08 /*!< un-correct device. */ }; + +static const std::string G_OP_TYPE_CONV = "conv2d"; +static const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; +static const std::string G_OP_TYPE_BOX_CODER = "box_coder"; +static const std::string G_OP_TYPE_CONCAT = "concat"; +static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; +static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "FusionConvAddRelu"; +static const std::string G_OP_TYPE_FC = "fc"; +static const std::string G_OP_TYPE_LRN = "lrn"; +static const std::string G_OP_TYPE_MUL = "mul"; +static const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; +static const std::string G_OP_TYPE_POOL2D = "pool2d"; +static const std::string G_OP_TYPE_PRIOR_BOX = "prior_box"; +static const std::string G_OP_TYPE_RELU = "relu"; +static const std::string G_OP_TYPE_RESHAPE = "reshape"; +static const std::string G_OP_TYPE_SIGMOID = "sigmoid"; +static const std::string G_OP_TYPE_SOFTMAX = "softmax"; +static const std::string G_OP_TYPE_TRANSPOSE = "transpose"; +static const std::string G_OP_TYPE_SPLIT = "split"; +static const std::string G_OP_TYPE_FEED = "feed"; +static const std::string G_OP_TYPE_FETCH = "fetch"; + +static std::unordered_map< + std::string, std::pair, std::vector>> + op_input_output_key = {{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, + {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, + {G_OP_TYPE_MUL, {{"X"}, {"Out"}}}, + {G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}}, + {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, + {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, + {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, + {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FEED, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FETCH, {{"X"}, {"Out"}}}}; + } // namespace paddle_mobile diff --git a/src/framework/operator.h b/src/framework/operator.h index a44d264a188..e9dc6f6fb75 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -19,61 +19,64 @@ limitations under the License. */ #include #include -#include "common/enforce.h" -#include "common/type_define.h" #include "common/types.h" +#include "common/enforce.h" #include "common/variant.h" -#include "framework/attribute.h" +#include "framework/scope.h" +#include "framework/tensor.h" #include "framework/op_info.h" -#include "framework/op_kernel_type.h" +#include "common/type_define.h" +#include "framework/variable.h" +#include "framework/attribute.h" #include "framework/op_registry.h" -#include "framework/paddle_mobile_object.h" +#include "framework/op_kernel_type.h" #include "framework/program/block_desc.h" +#include "framework/paddle_mobile_object.h" #include "framework/program/program-optimize/node.h" -#include "framework/scope.h" -#include "framework/tensor.h" -#include "framework/variable.h" namespace paddle_mobile { namespace framework { using std::string; using std::vector; -static std::unordered_map< - std::string, std::pair, std::vector>> - op_input_output_key = {{"conv2d", {{"Input"}, {"Output"}}}, - {"relu", {{"X"}, {"Out"}}}, - {"softmax", {{"X"}, {"Out"}}}, - {"mul", {{"X"}, {"Out"}}}, - {"elementwise_add", {{"X", "Y"}, {"Out"}}}, - {"pool2d", {{"X"}, {"Out"}}}, - {"batch_norm", {{"X"}, {"Y"}}}, - {"lrn", {{"X"}, {"Out"}}}, - {"concat", {{"X"}, {"Out"}}}, - {"feed", {{"X"}, {"Out"}}}, - {"fetch", {{"X"}, {"Out"}}}}; - template class OperatorBase : PaddleMobileObject { public: + /* + * @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor + * */ OperatorBase(const std::string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, std::shared_ptr scope); virtual ~OperatorBase() {} void Run() const; - vector GetOutKeys() const; + std::vector GetOutKeys() const; virtual void RunImpl() const = 0; - virtual void InferShape() const = 0; + /* + * @b op 运算所需的输入, 如上一层的输出结果、卷积核 + * */ const VariableNameMap &Inputs() const { return inputs_; } + /* + * @b op 的输出, 内存会提前被分配好, 运算结果会被存到分配好的内存内 + * */ const VariableNameMap &Outputs() const { return outputs_; } + /* + * @b op 类型 + * */ const std::string &Type() const { return type_; } + /* + * @b op 运算所需要用到的参数: 如 conv 运算所需要用到的 stride + * */ const AttributeMap &Attrs() const { return attrs_; } void ClearVariables(const std::vector &var_names) const { if (this->scope_) { this->scope_->EraseVars(var_names); } } - + /* + * @b 根据输入形状和参数计算出输出形状 + * */ + virtual void InferShape() const = 0; protected: std::shared_ptr scope_; std::string type_; @@ -85,6 +88,9 @@ class OperatorBase : PaddleMobileObject { void CheckAllInputOutputSet() const; }; +/* + * @b 这个类为所有带有运算的 op 的父类, 这个 op 继承与 OperatorBase + * */ template class OperatorWithKernel : public OperatorBase { public: @@ -97,11 +103,18 @@ class OperatorWithKernel : public OperatorBase { virtual void InferShape() const = 0; }; +/* + * @b 所有kernel的父类 + * */ template class OpKernelBase : PaddleMobileObject { public: + /* + * @b 所有kernel 需实现 Compute 方法 + * @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体, + * 所有结构体存在与: paddle-mobile/src/operators/op_param.h + * */ virtual void Compute(const P ¶) const = 0; - virtual ~OpKernelBase() = default; }; @@ -118,8 +131,8 @@ class FusionOpMatcher : PaddleMobileObject { virtual std::string Type() = 0; - virtual void FolderNodes(Node &node) { - node.Folder(node_.Depth(), Type(), {}); + virtual void FolderNodes(Node *node) { + node->Folder(node_.Depth(), Type(), {}); } virtual Node &BeginNode() { return node_; } diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index ac7137a47f3..f260fd0b61f 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include +#include "framework/operator.h" #include "framework/program/program-optimize/node.h" namespace paddle_mobile { @@ -73,24 +74,79 @@ void Node::OpDescs(uint index, } void Node::OpDescs(std::vector> *op_desc, - Node *node) { - auto iter = std::find(op_desc->begin(), op_desc->end(), this->op_desc_); + Node *node, bool adding_thread, int thread_num) { + bool can_add_split = false; + if (outputs_.size() > 1) { + can_add_split = true; + if (op_input_output_key[op_desc_->type_].second.size() != 1) { + DLOG << "当前 op desc 输出数不为 1 "; + can_add_split = false; + } + for (const auto& output : outputs_) { + if (op_input_output_key.find(output->op_desc_->type_) != op_input_output_key.end()) { + auto inputs_and_outputs = op_input_output_key[output->op_desc_->type_]; + auto outputs_of_output = output->op_desc_->Output(inputs_and_outputs.second[0]); + auto inputs_of_output = output->op_desc_->Input(inputs_and_outputs.first[0]); + for (int i = 0; i < inputs_of_output.size(); ++i) { + std::string input_of_output = inputs_of_output[i]; + for (int j = 0; j < outputs_of_output.size(); ++j) { + std::string output_of_output = outputs_of_output[j]; + if (input_of_output == output_of_output) { + DLOG << "output的 output 包含 input" << input_of_output; + can_add_split = false; + break; + } + } + } + } else { + DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_; + can_add_split = false; + } + } + } + if (inputs_.size() > 1 && node != inputs_.back()) { return; } else if (inputs_.size() > 1 && node == inputs_.back()) { + adding_thread = false; op_desc->push_back(this->op_desc_); } else { op_desc->push_back(this->op_desc_); } + if (adding_thread) { + Attribute attr; + attr.Set(thread_num); + this->op_desc_->attrs_["thread"] = attr; + } - for (auto &output : outputs_) { - output->OpDescs(op_desc, this); + if (can_add_split) { + adding_thread = true; + std::shared_ptr split_op_desc = std::make_shared(); + split_op_desc->type_ = G_OP_TYPE_SPLIT; + auto outputs = this->op_desc_->Output(op_input_output_key[this->op_desc_->Type()].second[0]); + + split_op_desc->inputs_ = {{op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; + auto &split_outputs = split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]]; + for (const auto& output : outputs_) { + split_outputs.push_back(outputs[0]); + } + DLOG << "add split"; + op_desc->push_back(split_op_desc); + } + + for (int i = 0; i < outputs_.size(); ++i) { + auto &output = outputs_[i]; + if (can_add_split) { + output->OpDescs(op_desc, this, adding_thread, i); + } else { + output->OpDescs(op_desc, this, adding_thread, thread_num); + } } } std::vector> Node::OpDescs() { std::vector> op_descs; - OpDescs(&op_descs, this); + OpDescs(&op_descs, this, false, 0); return op_descs; } diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h index da9a7ef5694..5dd1a3acbf5 100644 --- a/src/framework/program/program-optimize/node.h +++ b/src/framework/program/program-optimize/node.h @@ -42,13 +42,13 @@ class Node : PaddleMobileObject { std::map> change_map); std::vector> OpDescs(uint size); std::vector> OpDescs(); - void OpDescs(std::vector> *op_desc, - Node *node); std::shared_ptr OpDesc() { return op_desc_; } std::string BeginType() { return type_; } void Description(); private: + void OpDescs(std::vector> *op_desc, + Node *node, bool adding_thread, int thread_num); void OpDescs(uint size, std::vector> *op_desc); void To(int index, std::shared_ptr); diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index fd7edeed1b6..cd6899efe36 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -19,7 +19,7 @@ namespace paddle_mobile { namespace framework { -std::shared_ptr ProgramOptimize::Optimize() {} +//std::shared_ptr ProgramOptimize::Optimize() {} std::shared_ptr ProgramOptimize::FushionOptimize( std::shared_ptr ori_des) { @@ -86,7 +86,7 @@ std::shared_ptr ProgramOptimize::FushionOptimize( // DLOG << " match success " << " fusion node: \n" << // matcher->BeginNode() << "\nsub node: \n" << *sub_node; // DLOG << "match node\n"<< *match_node; - matcher->FolderNodes(*match_node); + matcher->FolderNodes(match_node.get()); // DLOG << " after match node\n"<< *match_node; // match_node->Description(); diff --git a/src/framework/program/program-optimize/program_optimize.h b/src/framework/program/program-optimize/program_optimize.h index 9dc4b19eba3..3839fa1e36b 100644 --- a/src/framework/program/program-optimize/program_optimize.h +++ b/src/framework/program/program-optimize/program_optimize.h @@ -27,7 +27,6 @@ namespace framework { class ProgramOptimize { public: ProgramOptimize() {} - std::shared_ptr Optimize(); std::shared_ptr FushionOptimize( std::shared_ptr ori_des); diff --git a/src/common/io.cpp b/src/io.cpp similarity index 91% rename from src/common/io.cpp rename to src/io.cpp index fc1466237e9..23b3e21ee81 100644 --- a/src/common/io.cpp +++ b/src/io.cpp @@ -15,15 +15,18 @@ limitations under the License. */ #include "io.h" #include #include -#include "common/enforce.h" #include "common/log.h" -#include "framework/framework.pb-c.h" -#include "framework/lod_tensor.h" -#include "framework/operator.h" -#include "framework/program/program_desc.h" -#include "framework/program/var_desc.h" + +#include "common/enforce.h" +#include "common/enforce.h" #include "framework/scope.h" #include "framework/tensor.h" +#include "framework/operator.h" +#include "framework/lod_tensor.h" +#include "framework/framework.pb-c.h" +#include "framework/program/var_desc.h" +#include "framework/program/program_desc.h" +#include "framework/program/program-optimize/program_optimize.h" namespace paddle_mobile { using framework::Variable; @@ -166,7 +169,7 @@ void Loader::LoadVar(framework::Variable *variable, template const framework::Program Loader::Load( - const std::string &dirname) { + const std::string &dirname, bool optimize) { std::string model_filename = dirname + "/__model__"; PaddleMobile__Framework__Proto__ProgramDesc *c_program; uint8_t *buf = NULL; @@ -199,11 +202,11 @@ const framework::Program Loader::Load( // DLOG << "var name-- " << var_desc->Name(); auto var = scope->Var(var_desc->Name()); + if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Persistable() && var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH && var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) { - // DLOG << "to load var "; auto dim = var_desc->Tensor_desc().Dims(); auto tensor = var->GetMutable(); tensor->Resize(framework::make_ddim(dim)); @@ -219,8 +222,12 @@ const framework::Program Loader::Load( } } } + // originProgramDesc->Description("program: "); - originProgramDesc->Description("program: "); + if (optimize) { + framework::ProgramOptimize program_optimize; + program.optimizeProgram = program_optimize.FushionOptimize(originProgramDesc); + } paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); return program; @@ -231,33 +238,8 @@ template class Loader; #pragma mark - executor template -Executor::Executor(const framework::Program p) : program_(p) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } - - const std::vector> blocks = - to_predict_program_->Blocks(); - for (int i = 0; i < blocks.size(); ++i) { - std::shared_ptr block_desc = blocks[i]; - std::vector> ops = block_desc->Ops(); - for (int j = 0; j < ops.size(); ++j) { - std::shared_ptr op = ops[j]; - auto op_base = framework::OpRegistry::CreateOp( - op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), - program_.scope); - op_base->InferShape(); - ops_of_block_[*block_desc.get()].push_back(op_base); - } - } - InitMemory(); -} - -template -Executor::Executor(const framework::Program p, int batch_size) - : program_(p), batch_size_(batch_size) { +Executor::Executor(const framework::Program p, int batch_size, bool use_optimize) + : program_(p), batch_size_(batch_size), use_optimize_(use_optimize) { if (use_optimize_) { to_predict_program_ = program_.optimizeProgram; } else { @@ -389,7 +371,7 @@ void Executor::InitMemory() { } template -void Executor::predict(const framework::Tensor &t, int block_id) { +void Executor::Predict(const framework::Tensor &t, int block_id) { framework::Variable *g_feed_value = program_.scope->Var("feed"); framework::Tensor *feed_tensor = g_feed_value->GetMutable(); @@ -404,11 +386,11 @@ void Executor::predict(const framework::Tensor &t, int block_id) { } template -std::vector::Ptype> Executor::predict( +std::vector::Ptype> Executor::Predict( const std::vector &input, const std::vector &dims) { framework::Tensor tensor(input, framework::make_ddim(dims)); - predict(tensor, 0); + Predict(tensor, 0); framework::Variable *g_feed_value = program_.scope->Var("col"); auto feed_tensor = g_feed_value->GetMutable(); diff --git a/src/common/io.h b/src/io.h similarity index 86% rename from src/common/io.h rename to src/io.h index 678441a9e05..8a73beba6d8 100644 --- a/src/common/io.h +++ b/src/io.h @@ -30,7 +30,7 @@ namespace paddle_mobile { template class Loader : PaddleMobileObject { public: - const framework::Program Load(const std::string &dirname); + const framework::Program Load(const std::string &dirname, bool optimize = true); private: void LoadVar(framework::Variable *variable, @@ -45,13 +45,11 @@ class Executor { Executor() = default; - Executor(const framework::Program p); + Executor(const framework::Program p, int batch_size = 1, bool use_optimize = true); - Executor(const framework::Program p, int batch_size); + // std::shared_ptr Predict(framework::Tensor &t); - std::shared_ptr predict(framework::Tensor &t); - - std::vector predict(const std::vector &input, + std::vector Predict(const std::vector &input, const std::vector &dims); protected: @@ -61,7 +59,7 @@ class Executor { framework::Program program_; int batch_size_ = 1; std::shared_ptr to_predict_program_; - void predict(const framework::Tensor &t, int block_id); + void Predict(const framework::Tensor &t, int block_id); std::map>>> ops_of_block_; diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index 39f11dd708c..cab55dc3617 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -23,18 +23,17 @@ namespace operators { class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { public: FushionConvAddReluOpMatcher() { - node_ = framework::Node("conv2d"); - node_ > std::make_shared("elementwise_add") > - std::make_shared("relu"); + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_RELU); } void FolderNodes(framework::Node &node) { std::vector> origin_descs = node.OpDescs(node_.Depth()); - node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}}); + node.Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } - - std::string Type() { return "FusionConvAddRelu"; } + std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; } }; class FusionFcOp { diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index 0ed5a2b4d5e..fd6f2658fd1 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -28,17 +28,17 @@ using std::vector; class FusionFcMatcher : public framework::FusionOpMatcher { public: FusionFcMatcher() { - node_ = framework::Node("mul"); - node_ > std::make_shared("elementwise_add"); + node_ = framework::Node(G_OP_TYPE_MUL); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD); } void FolderNodes(framework::Node &node) { vector> origin_descs = node.OpDescs(node_.Depth()); - node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}}); + node.Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } - std::string Type() { return "fc"; } + std::string Type() { return G_OP_TYPE_FC; } }; template diff --git a/src/operators/kernel/arm/relu_kernel.cpp b/src/operators/kernel/arm/relu_kernel.cpp index e0badea51e7..96fcb7c3088 100644 --- a/src/operators/kernel/arm/relu_kernel.cpp +++ b/src/operators/kernel/arm/relu_kernel.cpp @@ -20,11 +20,15 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { + template struct ReluFunctor { inline T operator()(T in) const { return in > 0 ? in : 0; } }; +/* + * @b 特化到具体平台的实现, param 从 op 层传入 + * */ template <> void ReluKernel::Compute(const ReluParam ¶m) const { const auto *input_x = param.InputX(); diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 02bda7147aa..0ce187c0849 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -696,6 +696,9 @@ class ReshapeParam : public OpParam { bool inplace_; }; +/* + * @b op 层实例化好这个 param 传递给 kernel 层使用 + * */ class ReluParam : public OpParam { public: ReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, @@ -725,7 +728,6 @@ class FushionFcParam : public OpParam { y_num_col_dims_ = GetAttr("y_num_col_dims", attrs); axis_ = GetAttr("axis", attrs); } - const Tensor *InputX() const { return input_x_; } const Tensor *InputY() const { return input_y_; } diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp index 5f861579ab4..35791b28845 100644 --- a/src/operators/relu_op.cpp +++ b/src/operators/relu_op.cpp @@ -25,6 +25,10 @@ template class ReluOp; } // namespace operators } // namespace paddle_mobile +/* + * @b 每一个 op 都需要注册一下的, + * USE_OP的参数 和 REGISTER_OPERATOR的第一个参数 都是需要和model中类型对应起来的 + * */ namespace ops = paddle_mobile::operators; USE_OP(relu); REGISTER_OPERATOR(relu, ops::ReluOp); diff --git a/src/operators/relu_op.h b/src/operators/relu_op.h index 6c3a614a1a0..aed907e0f87 100644 --- a/src/operators/relu_op.h +++ b/src/operators/relu_op.h @@ -28,6 +28,9 @@ using paddle_mobile::framework::Tensor; template class ReluOp : public framework::OperatorWithKernel { public: + /* + * @b op 的实例化方法, 需要调用父类的实例化方法, 以及实例化自己的参数结构体 + * */ ReluOp(const std::string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap attrs, std::shared_ptr scope) @@ -35,6 +38,9 @@ class ReluOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} + /* + * @b op 进行运算, 调用相应的 kernel 进行运算 + * */ void RunImpl() const { operators::ReluKernel kernel; kernel.Compute(param_); @@ -44,6 +50,10 @@ class ReluOp : public framework::OperatorWithKernel { void InferShape() const override; protected: + /* + * @b Relu kernel 进行运算时所需要用到参数的结构体, + * 结构体定义在: paddle-mobile/src/operators/op_param.h + * */ ReluParam param_; }; diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 48b6b5cf3c6..2893eccd80e 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include -#include "common/io.h" +#include "io.h" #include "common/log.h" #include "framework/op_registry.h" #include "operators/conv_op.h" @@ -77,7 +77,7 @@ class Executor4Test : public Executor { } template - vector> predict(const vector &ts, + vector> Predict(const vector &ts, const vector &input_names, const vector &output_names, const vector &ddims) { @@ -116,7 +116,7 @@ class Executor4Test : public Executor { return output_tensor_sptrs; } - std::shared_ptr predict(const Tensor &t, string input, string output, + std::shared_ptr Predict(const Tensor &t, string input, string output, const DDim &dDim) { auto scope = this->program_.scope; Variable *g_feed_value = scope->Var(input); diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index fe403b55a18..19871f95557 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "common/io.h" +#include "io.h" +#include "../test_helper.h" int main() { paddle_mobile::Loader loader; - // ../../../test/models/googlenet // ../../../test/models/mobilenet - auto program = loader.Load(std::string("../models/googlenet")); + auto program = loader.Load(g_googlenet); + program.optimizeProgram->Description("program desc: "); return 0; } diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp index 4c4dc6eb3ee..b371fb63b93 100644 --- a/test/framework/test_optimize.cpp +++ b/test/framework/test_optimize.cpp @@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "common/io.h" +#include "io.h" +#include "../test_helper.h" #include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/program_optimize.h" int main() { paddle_mobile::Loader loader; // "../../../test/models/googlenet" - auto program = loader.Load("../models/googlenet"); + auto program = loader.Load(g_googlenet); paddle_mobile::framework::ProgramOptimize optimize; - // program.originProgram->Description("origin"); +// program.originProgram->Description("origin"); auto optimize_program = optimize.FushionOptimize(program.originProgram); if (optimize_program != nullptr) { optimize_program->Description("optimize"); diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index d52f080277a..139579e9116 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -21,16 +21,16 @@ int main() { // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); - auto program = loader.Load(std::string("../models/googlenet")); + auto program = loader.Load(g_googlenet, false); auto time2 = time(); DLOG << "load cost :" << time_diff(time1, time1) << "ms"; - paddle_mobile::Executor executor(program, 1); + paddle_mobile::Executor executor(program, 1, false); std::vector input; std::vector dims{1, 3, 224, 224}; GetInput(g_test_image_1x3x224x224, &input, dims); auto time3 = time(); - executor.predict(input, dims); + executor.Predict(input, dims); auto time4 = time(); DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; return 0; diff --git a/test/operators/test_batchnorm_op.cpp b/test/operators/test_batchnorm_op.cpp index ba2e06b80b4..0acd6ea5726 100644 --- a/test/operators/test_batchnorm_op.cpp +++ b/test/operators/test_batchnorm_op.cpp @@ -129,7 +129,7 @@ int main() { DLOG << "begin to run BatchNormOp Test"; paddle_mobile::Loader loader; auto program = loader.Load(std::string( - "../../test/models/image_classification_resnet.inference.model")); + g_resnet)); /// input x (4,10,2,2) paddle_mobile::framework::Tensor inputx1; diff --git a/test/operators/test_box_coder_op.cpp b/test/operators/test_box_coder_op.cpp index b7695c91dfb..dac0d0b8051 100644 --- a/test/operators/test_box_coder_op.cpp +++ b/test/operators/test_box_coder_op.cpp @@ -116,7 +116,7 @@ int main() { DLOG << "----------**********----------"; DLOG << "begin to run BoxCoderOp Test"; paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); + auto program = loader.Load(std::string(g_mobilenet_ssd)); paddle_mobile::framework::Tensor priorbox; SetupTensor(&priorbox, {1917, 4}, static_cast(0), diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp index a9bb072f1e9..7a106b03c44 100644 --- a/test/operators/test_concat_op.cpp +++ b/test/operators/test_concat_op.cpp @@ -57,7 +57,7 @@ int main() { auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2}); out_ddims.push_back(out_ddim); - auto output = executor.predict(input_tensors, input_names, + auto output = executor.Predict(input_tensors, input_names, output_names, out_ddims); auto output0_data = output[0]->data(); diff --git a/test/operators/test_cov_op.cpp b/test/operators/test_cov_op.cpp index 2fe7f3577be..ba6a9b4800f 100644 --- a/test/operators/test_cov_op.cpp +++ b/test/operators/test_cov_op.cpp @@ -34,7 +34,7 @@ int main() { // static_cast(1)); auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112}); - auto output = executor.predict(input, "data", "conv2d_0.tmp_0", out_ddim); + auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim); auto output_ptr = output->data(); for (int j = 0; j < output->numel(); ++j) { diff --git a/test/operators/test_elementwise_add_op.cpp b/test/operators/test_elementwise_add_op.cpp index 1b4bf457a2c..c4997f2eb37 100644 --- a/test/operators/test_elementwise_add_op.cpp +++ b/test/operators/test_elementwise_add_op.cpp @@ -50,7 +50,7 @@ int main() { auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224}); out_ddims.push_back(out_ddim); - auto output = executor.predict(input_tensors, input_names, + auto output = executor.Predict(input_tensors, input_names, output_names, out_ddims); auto output0_data = output[0]->data(); diff --git a/test/operators/test_fushion_fc_op.cpp b/test/operators/test_fushion_fc_op.cpp index 6063772d85a..8dc1b02bec4 100644 --- a/test/operators/test_fushion_fc_op.cpp +++ b/test/operators/test_fushion_fc_op.cpp @@ -116,7 +116,7 @@ int main() { DLOG << "begin to run Fc Test"; paddle_mobile::Loader loader; // "../../../test/models/googlenet" - auto program = loader.Load("../models/googlenet"); + auto program = loader.Load(g_googlenet); paddle_mobile::framework::ProgramOptimize optimize; // program.originProgram->Description("origin"); auto optimize_program = optimize.FushionOptimize(program.originProgram); diff --git a/test/operators/test_lrn_op.cpp b/test/operators/test_lrn_op.cpp index ba35639fb71..cf5fd4bdf2d 100644 --- a/test/operators/test_lrn_op.cpp +++ b/test/operators/test_lrn_op.cpp @@ -46,7 +46,7 @@ int main() { auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2}); out_ddims.push_back(out_ddim); - auto output = executor.predict(input_tensors, input_names, + auto output = executor.Predict(input_tensors, input_names, output_names, out_ddims); auto output0_data = output[0]->data(); diff --git a/test/operators/test_mul_op.cpp b/test/operators/test_mul_op.cpp index 8acd4a99470..5412e6905b7 100644 --- a/test/operators/test_mul_op.cpp +++ b/test/operators/test_mul_op.cpp @@ -50,7 +50,7 @@ int main() { auto out_ddim = paddle_mobile::framework::make_ddim({3, 3}); out_ddims.push_back(out_ddim); - auto output = executor.predict(input_tensors, input_names, + auto output = executor.Predict(input_tensors, input_names, output_names, out_ddims); auto output0_data = output[0]->data(); diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp index 8a1c0a7ccec..62dfc20dc12 100644 --- a/test/operators/test_pool_op.cpp +++ b/test/operators/test_pool_op.cpp @@ -14,11 +14,11 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "common/io.h" +#include "io.h" int main() { paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../models/googlenet")); + auto program = loader.Load(std::string(g_googlenet)); if (program.originProgram == nullptr) { DLOG << "program read file"; } @@ -32,7 +32,7 @@ int main() { static_cast(1)); auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 56, 56}); auto output = - executor.predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim); + executor.Predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim); float *output_ptr = output->data(); for (int j = 0; j < output->numel(); ++j) { diff --git a/test/operators/test_prior_box_op.cpp b/test/operators/test_prior_box_op.cpp index 80ede944936..8c697a9a798 100644 --- a/test/operators/test_prior_box_op.cpp +++ b/test/operators/test_prior_box_op.cpp @@ -127,7 +127,7 @@ int main() { DLOG << "----------**********----------"; DLOG << "begin to run PriorBoxOp Test"; paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); + auto program = loader.Load(std::string(g_mobilenet_ssd)); /// input x (1,3,300,300) paddle_mobile::framework::Tensor input_image; diff --git a/test/operators/test_relu_op.cpp b/test/operators/test_relu_op.cpp index fb68b921113..50f3b6a20b6 100644 --- a/test/operators/test_relu_op.cpp +++ b/test/operators/test_relu_op.cpp @@ -46,7 +46,7 @@ int main() { auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4}); out_ddims.push_back(out_ddim); - auto output = executor.predict(input_tensors, input_names, + auto output = executor.Predict(input_tensors, input_names, output_names, out_ddims); auto output0_data = output[0]->data(); diff --git a/test/operators/test_reshape_op.cpp b/test/operators/test_reshape_op.cpp index b0251e693a7..5448aac87c2 100644 --- a/test/operators/test_reshape_op.cpp +++ b/test/operators/test_reshape_op.cpp @@ -14,11 +14,11 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "common/io.h" +#include "io.h" int main() { paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); + auto program = loader.Load(std::string(g_mobilenet_ssd)); if (program.originProgram == nullptr) { DLOG << "program read file"; } @@ -31,7 +31,7 @@ int main() { auto input_ptr = input.data(); auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2}); auto output = - executor.predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim); + executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim); auto *output_ptr = output->data(); DLOG << "input : "; diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp index dcd35cd8e46..289eac149fa 100644 --- a/test/operators/test_sigmoid_op.cpp +++ b/test/operators/test_sigmoid_op.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "../../src/operators/kernel/sigmoid_kernel.h" #include "../test_helper.h" -#include "common/io.h" +#include "io.h" int main() { paddle_mobile::framework::Tensor input; diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp index 094c48adbb6..58de5300cca 100644 --- a/test/operators/test_softmax_op.cpp +++ b/test/operators/test_softmax_op.cpp @@ -14,11 +14,11 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "common/io.h" +#include "io.h" int main() { paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../models/mobilenet")); + auto program = loader.Load(std::string(g_mobilenet)); if (program.originProgram == nullptr) { DLOG << "program read file"; } @@ -30,7 +30,7 @@ int main() { static_cast(1)); auto out_ddim = paddle_mobile::framework::make_ddim({1, 1000}); auto output = - executor.predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim); + executor.Predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim); auto *output_ptr = output->data(); for (int j = 0; j < output->numel(); ++j) { DLOG << " value of output: " << output_ptr[j]; diff --git a/test/operators/test_transpose_op.cpp b/test/operators/test_transpose_op.cpp index 23e3bc3ec47..4c88df2d83d 100644 --- a/test/operators/test_transpose_op.cpp +++ b/test/operators/test_transpose_op.cpp @@ -14,11 +14,11 @@ limitations under the License. */ #include "../executor_for_test.h" #include "../test_helper.h" -#include "common/io.h" +#include "io.h" int main() { paddle_mobile::Loader loader; - auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); + auto program = loader.Load(std::string(g_mobilenet_ssd)); if (program.originProgram == nullptr) { DLOG << "program read file"; } @@ -31,7 +31,7 @@ int main() { auto input_ptr = input.data(); auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 4, 2}); auto output = - executor.predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim); + executor.Predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim); auto *output_ptr = output->data(); DLOG << "input : "; diff --git a/test/test_helper.h b/test/test_helper.h index dba4dec9bbc..0fec49e4e92 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include -#include #include +#include #include "common/log.h" #include "framework/ddim.h" diff --git a/test/test_include.h b/test/test_include.h index 19a9bff8846..0046bdb4e41 100644 --- a/test/test_include.h +++ b/test/test_include.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "./test_helper.h" #include "common/enforce.h" -#include "common/io.h" +#include "io.h" #include "common/log.h" #include "framework/lod_tensor.h" #include "framework/operator.h" From 50da2d3114a6a7af9468aad3c6bf6995c04e079c Mon Sep 17 00:00:00 2001 From: liuruilong Date: Wed, 30 May 2018 15:31:27 +0800 Subject: [PATCH 13/26] format files --- src/common/types.h | 28 +++++++++---------- src/framework/operator.h | 17 +++++------ .../program/program-optimize/node.cpp | 27 +++++++++++------- .../program-optimize/program_optimize.cpp | 2 +- src/io.cpp | 20 ++++++------- src/io.h | 6 ++-- src/operators/fusion_conv_add_relu_op.h | 3 +- src/operators/fusion_fc_op.h | 3 +- src/operators/kernel/arm/relu_kernel.cpp | 1 - src/operators/relu_op.cpp | 3 +- src/operators/relu_op.h | 2 +- test/executor_for_test.h | 2 +- test/framework/test_load.cpp | 2 +- test/framework/test_optimize.cpp | 4 +-- test/operators/test_batchnorm_op.cpp | 3 +- test/test_helper.h | 2 +- test/test_include.h | 2 +- 17 files changed, 69 insertions(+), 58 deletions(-) diff --git a/src/common/types.h b/src/common/types.h index 252c747d75f..b25ae329931 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once; #include -#include #include +#include namespace paddle_mobile { enum class Precision : int { FP32 = 0 }; @@ -94,18 +94,18 @@ static const std::string G_OP_TYPE_FEED = "feed"; static const std::string G_OP_TYPE_FETCH = "fetch"; static std::unordered_map< - std::string, std::pair, std::vector>> - op_input_output_key = {{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, - {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, - {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, - {G_OP_TYPE_MUL, {{"X"}, {"Out"}}}, - {G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}}, - {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, - {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, - {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, - {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, - {G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FEED, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FETCH, {{"X"}, {"Out"}}}}; + std::string, std::pair, std::vector>> + op_input_output_key = {{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, + {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, + {G_OP_TYPE_MUL, {{"X"}, {"Out"}}}, + {G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}}, + {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, + {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, + {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, + {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FEED, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FETCH, {{"X"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/framework/operator.h b/src/framework/operator.h index e9dc6f6fb75..8e5e55fb469 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -19,20 +19,20 @@ limitations under the License. */ #include #include -#include "common/types.h" #include "common/enforce.h" -#include "common/variant.h" -#include "framework/scope.h" -#include "framework/tensor.h" -#include "framework/op_info.h" #include "common/type_define.h" -#include "framework/variable.h" +#include "common/types.h" +#include "common/variant.h" #include "framework/attribute.h" -#include "framework/op_registry.h" +#include "framework/op_info.h" #include "framework/op_kernel_type.h" -#include "framework/program/block_desc.h" +#include "framework/op_registry.h" #include "framework/paddle_mobile_object.h" +#include "framework/program/block_desc.h" #include "framework/program/program-optimize/node.h" +#include "framework/scope.h" +#include "framework/tensor.h" +#include "framework/variable.h" namespace paddle_mobile { namespace framework { @@ -77,6 +77,7 @@ class OperatorBase : PaddleMobileObject { * @b 根据输入形状和参数计算出输出形状 * */ virtual void InferShape() const = 0; + protected: std::shared_ptr scope_; std::string type_; diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index f260fd0b61f..820fa6a443c 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -82,11 +82,14 @@ void Node::OpDescs(std::vector> *op_desc, DLOG << "当前 op desc 输出数不为 1 "; can_add_split = false; } - for (const auto& output : outputs_) { - if (op_input_output_key.find(output->op_desc_->type_) != op_input_output_key.end()) { + for (const auto &output : outputs_) { + if (op_input_output_key.find(output->op_desc_->type_) != + op_input_output_key.end()) { auto inputs_and_outputs = op_input_output_key[output->op_desc_->type_]; - auto outputs_of_output = output->op_desc_->Output(inputs_and_outputs.second[0]); - auto inputs_of_output = output->op_desc_->Input(inputs_and_outputs.first[0]); + auto outputs_of_output = + output->op_desc_->Output(inputs_and_outputs.second[0]); + auto inputs_of_output = + output->op_desc_->Input(inputs_and_outputs.first[0]); for (int i = 0; i < inputs_of_output.size(); ++i) { std::string input_of_output = inputs_of_output[i]; for (int j = 0; j < outputs_of_output.size(); ++j) { @@ -121,13 +124,17 @@ void Node::OpDescs(std::vector> *op_desc, if (can_add_split) { adding_thread = true; - std::shared_ptr split_op_desc = std::make_shared(); + std::shared_ptr split_op_desc = + std::make_shared(); split_op_desc->type_ = G_OP_TYPE_SPLIT; - auto outputs = this->op_desc_->Output(op_input_output_key[this->op_desc_->Type()].second[0]); - - split_op_desc->inputs_ = {{op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; - auto &split_outputs = split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]]; - for (const auto& output : outputs_) { + auto outputs = this->op_desc_->Output( + op_input_output_key[this->op_desc_->Type()].second[0]); + + split_op_desc->inputs_ = { + {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; + auto &split_outputs = + split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]]; + for (const auto &output : outputs_) { split_outputs.push_back(outputs[0]); } DLOG << "add split"; diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index cd6899efe36..737fed9bd56 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -19,7 +19,7 @@ namespace paddle_mobile { namespace framework { -//std::shared_ptr ProgramOptimize::Optimize() {} +// std::shared_ptr ProgramOptimize::Optimize() {} std::shared_ptr ProgramOptimize::FushionOptimize( std::shared_ptr ori_des) { diff --git a/src/io.cpp b/src/io.cpp index 23b3e21ee81..002e73b7964 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -18,15 +18,14 @@ limitations under the License. */ #include "common/log.h" #include "common/enforce.h" -#include "common/enforce.h" -#include "framework/scope.h" -#include "framework/tensor.h" -#include "framework/operator.h" -#include "framework/lod_tensor.h" #include "framework/framework.pb-c.h" -#include "framework/program/var_desc.h" -#include "framework/program/program_desc.h" +#include "framework/lod_tensor.h" +#include "framework/operator.h" #include "framework/program/program-optimize/program_optimize.h" +#include "framework/program/program_desc.h" +#include "framework/program/var_desc.h" +#include "framework/scope.h" +#include "framework/tensor.h" namespace paddle_mobile { using framework::Variable; @@ -202,7 +201,6 @@ const framework::Program Loader::Load( // DLOG << "var name-- " << var_desc->Name(); auto var = scope->Var(var_desc->Name()); - if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Persistable() && var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH && @@ -226,7 +224,8 @@ const framework::Program Loader::Load( if (optimize) { framework::ProgramOptimize program_optimize; - program.optimizeProgram = program_optimize.FushionOptimize(originProgramDesc); + program.optimizeProgram = + program_optimize.FushionOptimize(originProgramDesc); } paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); @@ -238,7 +237,8 @@ template class Loader; #pragma mark - executor template -Executor::Executor(const framework::Program p, int batch_size, bool use_optimize) +Executor::Executor(const framework::Program p, int batch_size, + bool use_optimize) : program_(p), batch_size_(batch_size), use_optimize_(use_optimize) { if (use_optimize_) { to_predict_program_ = program_.optimizeProgram; diff --git a/src/io.h b/src/io.h index 8a73beba6d8..de2d359bf58 100644 --- a/src/io.h +++ b/src/io.h @@ -30,7 +30,8 @@ namespace paddle_mobile { template class Loader : PaddleMobileObject { public: - const framework::Program Load(const std::string &dirname, bool optimize = true); + const framework::Program Load(const std::string &dirname, + bool optimize = true); private: void LoadVar(framework::Variable *variable, @@ -45,7 +46,8 @@ class Executor { Executor() = default; - Executor(const framework::Program p, int batch_size = 1, bool use_optimize = true); + Executor(const framework::Program p, int batch_size = 1, + bool use_optimize = true); // std::shared_ptr Predict(framework::Tensor &t); diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index cab55dc3617..1fa3399cf22 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -31,7 +31,8 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { void FolderNodes(framework::Node &node) { std::vector> origin_descs = node.OpDescs(node_.Depth()); - node.Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); + node.Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; } }; diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index fd6f2658fd1..fb49fa61b20 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -35,7 +35,8 @@ class FusionFcMatcher : public framework::FusionOpMatcher { void FolderNodes(framework::Node &node) { vector> origin_descs = node.OpDescs(node_.Depth()); - node.Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); + node.Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } std::string Type() { return G_OP_TYPE_FC; } diff --git a/src/operators/kernel/arm/relu_kernel.cpp b/src/operators/kernel/arm/relu_kernel.cpp index 96fcb7c3088..586d9811751 100644 --- a/src/operators/kernel/arm/relu_kernel.cpp +++ b/src/operators/kernel/arm/relu_kernel.cpp @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { - template struct ReluFunctor { inline T operator()(T in) const { return in > 0 ? in : 0; } diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp index 35791b28845..21bcc605282 100644 --- a/src/operators/relu_op.cpp +++ b/src/operators/relu_op.cpp @@ -27,7 +27,8 @@ template class ReluOp; /* * @b 每一个 op 都需要注册一下的, - * USE_OP的参数 和 REGISTER_OPERATOR的第一个参数 都是需要和model中类型对应起来的 + * USE_OP的参数 和 REGISTER_OPERATOR的第一个参数 + * 都是需要和model中类型对应起来的 * */ namespace ops = paddle_mobile::operators; USE_OP(relu); diff --git a/src/operators/relu_op.h b/src/operators/relu_op.h index aed907e0f87..7be8cd249cb 100644 --- a/src/operators/relu_op.h +++ b/src/operators/relu_op.h @@ -38,7 +38,7 @@ class ReluOp : public framework::OperatorWithKernel { scope), param_(inputs, outputs, attrs, *scope) {} - /* + /* * @b op 进行运算, 调用相应的 kernel 进行运算 * */ void RunImpl() const { diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 2893eccd80e..ce3c84e986e 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -17,9 +17,9 @@ limitations under the License. */ #include #include -#include "io.h" #include "common/log.h" #include "framework/op_registry.h" +#include "io.h" #include "operators/conv_op.h" #include "operators/elementwise_add_op.h" #include "operators/pool_op.h" diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index 19871f95557..95357547e1b 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "io.h" #include "../test_helper.h" +#include "io.h" int main() { paddle_mobile::Loader loader; diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp index b371fb63b93..f0392cfec02 100644 --- a/test/framework/test_optimize.cpp +++ b/test/framework/test_optimize.cpp @@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "io.h" #include "../test_helper.h" #include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/program_optimize.h" +#include "io.h" int main() { paddle_mobile::Loader loader; // "../../../test/models/googlenet" auto program = loader.Load(g_googlenet); paddle_mobile::framework::ProgramOptimize optimize; -// program.originProgram->Description("origin"); + // program.originProgram->Description("origin"); auto optimize_program = optimize.FushionOptimize(program.originProgram); if (optimize_program != nullptr) { optimize_program->Description("optimize"); diff --git a/test/operators/test_batchnorm_op.cpp b/test/operators/test_batchnorm_op.cpp index 0acd6ea5726..38d9f624909 100644 --- a/test/operators/test_batchnorm_op.cpp +++ b/test/operators/test_batchnorm_op.cpp @@ -128,8 +128,7 @@ int main() { DLOG << "----------**********----------"; DLOG << "begin to run BatchNormOp Test"; paddle_mobile::Loader loader; - auto program = loader.Load(std::string( - g_resnet)); + auto program = loader.Load(std::string(g_resnet)); /// input x (4,10,2,2) paddle_mobile::framework::Tensor inputx1; diff --git a/test/test_helper.h b/test/test_helper.h index 0fec49e4e92..dba4dec9bbc 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include -#include #include +#include #include "common/log.h" #include "framework/ddim.h" diff --git a/test/test_include.h b/test/test_include.h index 0046bdb4e41..25efbb9f4c0 100644 --- a/test/test_include.h +++ b/test/test_include.h @@ -20,7 +20,6 @@ limitations under the License. */ #include "./test_helper.h" #include "common/enforce.h" -#include "io.h" #include "common/log.h" #include "framework/lod_tensor.h" #include "framework/operator.h" @@ -30,3 +29,4 @@ limitations under the License. */ #include "framework/scope.h" #include "framework/tensor.h" #include "framework/variable.h" +#include "io.h" From 8d3c8d674c446d66f5539814a17d5aabc1ea72b0 Mon Sep 17 00:00:00 2001 From: zhaojiaying01 Date: Wed, 30 May 2018 16:26:32 +0800 Subject: [PATCH 14/26] submit depthwise_conv_op and test --- src/operators/conv_op.cpp | 7 - src/operators/conv_op.h | 7 + src/operators/depthwise_conv_op.cpp | 57 ++++++++ src/operators/depthwise_conv_op.h | 49 +++++++ src/operators/kernel/arm/conv_kernel.cpp | 13 -- .../kernel/arm/depthwise_conv_kernel.cpp | 126 ++++++++++++++++++ src/operators/kernel/conv_kernel.h | 21 ++- src/operators/kernel/depthwise_conv_kernel.h | 34 +++++ test/CMakeLists.txt | 4 + test/operators/test_depthwise_conv_op.cpp | 46 +++++++ 10 files changed, 342 insertions(+), 22 deletions(-) create mode 100644 src/operators/depthwise_conv_op.cpp create mode 100644 src/operators/depthwise_conv_op.h create mode 100644 src/operators/kernel/arm/depthwise_conv_kernel.cpp create mode 100644 src/operators/kernel/depthwise_conv_kernel.h create mode 100644 test/operators/test_depthwise_conv_op.cpp diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp index 148b0f69f96..bfddcf14acb 100644 --- a/src/operators/conv_op.cpp +++ b/src/operators/conv_op.cpp @@ -21,13 +21,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -int ConvOutputSize(int input_size, int filter_size, int dilation, int padding, - int stride) { - const int dkernel = dilation * (filter_size - 1) + 1; - int output_size = (input_size + 2 * padding - dkernel) / stride + 1; - return output_size; -} - template void ConvOp::InferShape() const { // std::cout << " begin get dims: " << std::endl; diff --git a/src/operators/conv_op.h b/src/operators/conv_op.h index 1557f2f06ee..f15f286b606 100644 --- a/src/operators/conv_op.h +++ b/src/operators/conv_op.h @@ -44,5 +44,12 @@ class ConvOp : public framework::OperatorWithKernel { ConvParam param_; }; +inline int ConvOutputSize(int input_size, int filter_size, int dilation, + int padding, int stride) { + const int dkernel = dilation * (filter_size - 1) + 1; + int output_size = (input_size + 2 * padding - dkernel) / stride + 1; + return output_size; +} + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/depthwise_conv_op.cpp b/src/operators/depthwise_conv_op.cpp new file mode 100644 index 00000000000..2538298175c --- /dev/null +++ b/src/operators/depthwise_conv_op.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "operators/depthwise_conv_op.h" +#include +#include "framework/data_type.h" +#include "framework/op_proto_maker.h" +#include "framework/op_registry.h" +#include "operators/conv_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void DepthwiseConvOp::InferShape() const { + auto in_dims = param_.Input()->dims(); + auto filter_dims = param_.Filter()->dims(); + const std::vector &strides = param_.Strides(); + std::vector paddings = param_.Paddings(); + int groups = param_.Groups(); + std::vector dilations = param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + dilations[i], paddings[i], + strides[i])); + } + + framework::DDim ddim = framework::make_ddim(output_shape); + param_.Output()->Resize(ddim); +} + +template class DepthwiseConvOp; + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +USE_OP(depthwise_conv2d); +REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp); diff --git a/src/operators/depthwise_conv_op.h b/src/operators/depthwise_conv_op.h new file mode 100644 index 00000000000..c47fa0ffcac --- /dev/null +++ b/src/operators/depthwise_conv_op.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "framework/operator.h" +#include "operators/kernel/depthwise_conv_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template +class DepthwiseConvOp : public framework::OperatorWithKernel { + public: + DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel(type, inputs, outputs, attrs, + scope), + param_(inputs, outputs, attrs, *scope) {} + + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape() const override; + + void RunImpl() const { + operators::DepthwiseConvKernel kernel; + kernel.Compute(param_); + this->ClearVariables({"Filter", "Input"}); + } + + private: + ConvParam param_; +}; + +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp index 1e2572b9847..f04b8156c9d 100644 --- a/src/operators/kernel/arm/conv_kernel.cpp +++ b/src/operators/kernel/arm/conv_kernel.cpp @@ -17,19 +17,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -bool IsExpand(const std::vector &filter_dim, - const std::vector &strides, const std::vector &paddings, - const std::vector &dilations) { - bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; - for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); - strides_1 = strides_1 && (strides[j] == 1); - padding_0 = padding_0 && (paddings[j] == 0); - dilation_1 = dilation_1 && (dilations[j] == 1); - } - return !(filter_1 && strides_1 && padding_0 && dilation_1); -} - template <> void ConvKernel::Compute(const ConvParam ¶m) const { LOG(kLOG_DEBUG) << param; diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp new file mode 100644 index 00000000000..73aa9953cfc --- /dev/null +++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp @@ -0,0 +1,126 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "operators/kernel/depthwise_conv_kernel.h" +#include "operators/kernel/conv_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { + LOG(kLOG_DEBUG) << param; + + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor *output = param.Output(); + output->mutable_data(); + + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + + DLOG << " compute end get Attrs " << strides[0]; + + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + std::vector output_shape_vec(framework::vectorize(output->dims())); + + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + DLOG << " col_shape = " << col_shape; + DLOG << " col_matrix_shape = " << col_matrix_shape; + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + DLOG << " input_shape = " << input_shape; + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + DLOG << " filter.dims() = " << filter.dims(); + + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + DLOG << " in_batch.dims() = " << in_batch.dims(); + DLOG << " out_batch.dims() = " << out_batch.dims(); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + DLOG << " out_slice " << out_slice.dims(); + DLOG << " filter_slice " << filter_slice.dims(); + DLOG << " col_matrix " << col_matrix.dims(); + math::matmul(filter_slice, false, col_matrix, false, + static_cast(1), &out_slice, + static_cast(0)); + auto filter_ptr = filter_slice.data(); + } + } +} + +template class DepthwiseConvKernel; + +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/kernel/conv_kernel.h b/src/operators/kernel/conv_kernel.h index a756e2d2417..d43a174ffdb 100644 --- a/src/operators/kernel/conv_kernel.h +++ b/src/operators/kernel/conv_kernel.h @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include "framework/operator.h" #include "operators/math/im2col.h" #include "operators/math/math_function.h" @@ -23,12 +24,28 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -using namespace framework; +using framework::OpKernelBase; template -class ConvKernel : public framework::OpKernelBase { +class ConvKernel : public OpKernelBase { public: void Compute(const ConvParam ¶m) const; }; + +inline bool IsExpand(const std::vector &filter_dim, + const std::vector &strides, + const std::vector &paddings, + const std::vector &dilations) { + bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; + for (size_t j = 0; j < strides.size(); ++j) { + filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); + strides_1 = strides_1 && (strides[j] == 1); + padding_0 = padding_0 && (paddings[j] == 0); + dilation_1 = dilation_1 && (dilations[j] == 1); + } + + return !(filter_1 && strides_1 && padding_0 && dilation_1); +} + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/depthwise_conv_kernel.h b/src/operators/kernel/depthwise_conv_kernel.h new file mode 100644 index 00000000000..43ddfb25cd8 --- /dev/null +++ b/src/operators/kernel/depthwise_conv_kernel.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "framework/operator.h" +#include "operators/math/im2col.h" +#include "operators/math/math_function.h" +#include "operators/math/vol2col.h" +#include "operators/op_param.h" + +#pragma once; + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class DepthwiseConvKernel : public OpKernelBase { + public: + void Compute(const ConvParam ¶m) const; +}; +} // namespace operators +} // namespace paddle_mobile diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f464c3bd94f..2bb313342e2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -91,3 +91,7 @@ target_link_libraries(test-googlenet paddle-mobile) # gen test ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) target_link_libraries(test-sigmoid paddle-mobile) + +# gen test +ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) +target_link_libraries(test-depthwise-conv-op paddle-mobile) diff --git a/test/operators/test_depthwise_conv_op.cpp b/test/operators/test_depthwise_conv_op.cpp new file mode 100644 index 00000000000..648b4c5db99 --- /dev/null +++ b/test/operators/test_depthwise_conv_op.cpp @@ -0,0 +1,46 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../executor_for_test.h" +#include "../test_include.h" +#include "operators/depthwise_conv_op.h" + +int main() { + paddle_mobile::Loader loader; + // ../models/image_classification_resnet.inference.model + auto program = loader.Load(g_mobilenet_ssd); + + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); + + Executor4Test> + executor(program, "depthwise_conv2d"); + + paddle_mobile::framework::LoDTensor input; + // GetInput(g_test_image_1x3x224x224, &input, {1, 3, 224, 224}); + // use SetupTensor if not has local input image . + SetupTensor(&input, {1, 32, 150, 150}, static_cast(0), + static_cast(1)); + auto input_ptr = input.data(); + auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 150, 150}); + auto output = executor.Predict(input, "batch_norm_0.tmp_3", + "depthwise_conv2d_0.tmp_0", out_ddim); + + auto output_ptr = output->data(); + for (int j = 0; j < output->numel(); ++j) { + DLOG << " value of output: " << output_ptr[j]; + } + return 0; +} From 3e0e0705f517e2d4fc6a436d2fea65754d1d586b Mon Sep 17 00:00:00 2001 From: eclipsess Date: Wed, 30 May 2018 16:53:48 +0800 Subject: [PATCH 15/26] add test yolo and mobilenet --- src/framework/operator.h | 3 ++- test/CMakeLists.txt | 8 ++++++++ test/net/test_mobilenet.cpp | 39 +++++++++++++++++++++++++++++++++++ test/net/test_yolo.cpp | 41 +++++++++++++++++++++++++++++++++++++ test/test_helper.h | 1 + 5 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 test/net/test_mobilenet.cpp create mode 100644 test/net/test_yolo.cpp diff --git a/src/framework/operator.h b/src/framework/operator.h index a44d264a188..0d617617753 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -50,7 +50,8 @@ static std::unordered_map< {"lrn", {{"X"}, {"Out"}}}, {"concat", {{"X"}, {"Out"}}}, {"feed", {{"X"}, {"Out"}}}, - {"fetch", {{"X"}, {"Out"}}}}; + {"fetch", {{"X"}, {"Out"}}}, + {"reshape", {{"X"}, {"Out"}}}}; template class OperatorBase : PaddleMobileObject { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f464c3bd94f..c80d34c22e5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -84,10 +84,18 @@ target_link_libraries(test-gemm paddle-mobile) ADD_EXECUTABLE(test-enforce common/test_enforce.cpp) target_link_libraries(test-enforce paddle-mobile) +# gen test +ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) +target_link_libraries(test-yolo paddle-mobile) + # gen test ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-googlenet paddle-mobile) +# gen test +ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) +target_link_libraries(test-mobilenet paddle-mobile) + # gen test ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) target_link_libraries(test-sigmoid paddle-mobile) diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp new file mode 100644 index 00000000000..e686ad85be7 --- /dev/null +++ b/test/net/test_mobilenet.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::Loader loader; + auto time1 = time(); + auto program = loader.Load(g_mobilenet); + auto time2 = time(); + DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + paddle_mobile::Executor executor(program, 1); + + std::vector dims{1, 3, 224, 224}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + auto time3 = time(); + executor.predict(input, dims); + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + return 0; +} diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp new file mode 100644 index 00000000000..ab61fb250e3 --- /dev/null +++ b/test/net/test_yolo.cpp @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::Loader loader; + // ../../../test/models/googlenet + // ../../../test/models/mobilenet + auto time1 = time(); + auto program = loader.Load(g_yolo); + auto time2 = time(); + DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + paddle_mobile::Executor executor(program, 1); + + std::vector dims{1, 3, 227, 227}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 227, 227}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + auto time3 = time(); + executor.predict(input, dims); + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index dba4dec9bbc..fc4ed6c91dc 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -28,6 +28,7 @@ static const std::string g_mobilenet_ssd = "../models/mobilenet+ssd"; static const std::string g_squeezenet = "../models/squeezenet"; static const std::string g_resnet = "../models/image_classification_resnet.inference.model"; +static const std::string g_yolo = "../models/yolo"; static const std::string g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; using paddle_mobile::framework::DDim; From 680868a9dbc1501bf95e0f430ae89841e2759de9 Mon Sep 17 00:00:00 2001 From: eclipsess Date: Wed, 30 May 2018 17:54:46 +0800 Subject: [PATCH 16/26] add test resnet squeezenet ssd(mobilenet) --- src/common/types.h | 32 ++++++++++++++----------- src/io.cpp | 2 +- test/CMakeLists.txt | 11 +++++++++ test/net/test_mobilenet+ssd.cpp | 39 +++++++++++++++++++++++++++++++ test/net/test_resnet.cpp | 39 +++++++++++++++++++++++++++++++ test/net/test_squeezenet.cpp | 41 +++++++++++++++++++++++++++++++++ 6 files changed, 150 insertions(+), 14 deletions(-) create mode 100644 test/net/test_mobilenet+ssd.cpp create mode 100644 test/net/test_resnet.cpp create mode 100644 test/net/test_squeezenet.cpp diff --git a/src/common/types.h b/src/common/types.h index 227151adbbd..ca9e64cc60f 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -95,17 +95,23 @@ static const std::string G_OP_TYPE_FETCH = "fetch"; static std::unordered_map< std::string, std::pair, std::vector>> - op_input_output_key = {{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, - {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, - {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, - {G_OP_TYPE_MUL, {{"X"}, {"Out"}}}, - {G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}}, - {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, - {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, - {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, - {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, - {G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FEED, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FETCH, {{"X"}, {"Out"}}}, - {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}}}; + op_input_output_key = { + {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, + {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, + {G_OP_TYPE_MUL, {{"X"}, {"Out"}}}, + {G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}}, + {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, + {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, + {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, + {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FEED, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FETCH, {{"X"}, {"Out"}}}, + {G_OP_TYPE_TRANSPOSE, {{"X"}, {"Out"}}}, + {G_OP_TYPE_BOX_CODER, + {{"PriorBox", "PriorBoxVar", "TargetBox"}, {"OutputBox"}}}, + {G_OP_TYPE_PRIOR_BOX, {{"Image", "Input"}, {"Boxes", "Variances"}}}, + {G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}}, + {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/io.cpp b/src/io.cpp index 002e73b7964..b8350a81118 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -220,7 +220,7 @@ const framework::Program Loader::Load( } } } - // originProgramDesc->Description("program: "); + originProgramDesc->Description("program: "); if (optimize) { framework::ProgramOptimize program_optimize; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 37c0de1496b..c71306281e3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -96,6 +96,17 @@ target_link_libraries(test-googlenet paddle-mobile) ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenet paddle-mobile) +# gen test +ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) +target_link_libraries(test-resnet paddle-mobile) +# gen test +ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) +target_link_libraries(test-mobilenetssd paddle-mobile) + +# gen test +ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) +target_link_libraries(test-squeezenet paddle-mobile) + # gen test ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) target_link_libraries(test-sigmoid paddle-mobile) diff --git a/test/net/test_mobilenet+ssd.cpp b/test/net/test_mobilenet+ssd.cpp new file mode 100644 index 00000000000..e9d92e7a51b --- /dev/null +++ b/test/net/test_mobilenet+ssd.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::Loader loader; + auto time1 = time(); + auto program = loader.Load(g_mobilenet_ssd, false); + auto time2 = time(); + DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + paddle_mobile::Executor executor(program, 1, false); + + std::vector dims{1, 3, 300, 300}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 300, 300}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + auto time3 = time(); + executor.Predict(input, dims); + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + return 0; +} diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp new file mode 100644 index 00000000000..55f4c5efef2 --- /dev/null +++ b/test/net/test_resnet.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::Loader loader; + auto time1 = time(); + auto program = loader.Load(g_resnet, false); + auto time2 = time(); + DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + paddle_mobile::Executor executor(program, 1, false); + + std::vector dims{1, 3, 32, 32}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 32, 32}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + auto time3 = time(); + executor.Predict(input, dims); + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + return 0; +} diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp new file mode 100644 index 00000000000..30460018fe8 --- /dev/null +++ b/test/net/test_squeezenet.cpp @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::Loader loader; + // ../../../test/models/googlenet + // ../../../test/models/mobilenet + auto time1 = time(); + auto program = loader.Load(g_squeezenet, false); + auto time2 = time(); + DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + paddle_mobile::Executor executor(program, 1, false); + + std::vector dims{1, 3, 227, 227}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 227, 227}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + auto time3 = time(); + executor.Predict(input, dims); + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + return 0; +} From e8cc4c9279124cef7d2ba6985356bc76d523543a Mon Sep 17 00:00:00 2001 From: wangliu Date: Wed, 30 May 2018 19:09:05 +0800 Subject: [PATCH 17/26] add impl for executor'predict --- CMakeLists.txt | 3 ++- scripts/push2android.sh | 14 ++++++++++++++ src/common/enforce.h | 6 +++--- src/framework/operator.cpp | 14 +------------- src/framework/operator.h | 12 ++++++++++++ src/io.cpp | 34 +++++++++++++++++++++++++--------- src/io.h | 10 ++++++---- test/net/test_googlenet.cpp | 7 ++----- 8 files changed, 65 insertions(+), 35 deletions(-) create mode 100644 scripts/push2android.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index a9382f9697e..f60846e98aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.0) project(paddle-mobile) -add_definitions(-DPADDLE_MOBILE_DEBUG="true") +add_definitions(-DPADDLE_MOBILE_DEBUG) +add_definitions(-DENABLE_EXCEPTION) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_BUILD_TYPE RelWithDebInfo) diff --git a/scripts/push2android.sh b/scripts/push2android.sh new file mode 100644 index 00000000000..44b0ee32e99 --- /dev/null +++ b/scripts/push2android.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env sh + +push_fn () { +MODELS_PATH="../test/models/*" +EXE_FILE="../test/build/*" +EXE_DIR="data/local/tmp/bin" +MODELS_DIR="data/local/tmp/models" +LIB_PATH="../build/release/arm-v7a/build/*" +adb push ${EXE_FILE} ${EXE_DIR} +adb push ${LIB_PATH} ${EXE_DIR} +adb push ${MODELS_PATH} ${MODELS_DIR} +echo "test files sync completed" +} +push_fn diff --git a/src/common/enforce.h b/src/common/enforce.h index abd6217fbeb..52bda2258a0 100644 --- a/src/common/enforce.h +++ b/src/common/enforce.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#ifdef PADDLE_MOBILE_DEBUG +#ifdef ENABLE_EXCEPTION #include #include #include @@ -25,7 +25,7 @@ limitations under the License. */ namespace paddle_mobile { -#ifdef PADDLE_MOBILE_DEBUG +#ifdef ENABLE_EXCEPTION struct PaddleMobileException : public std::exception { const std::string exception_prefix = "paddle mobile C++ Exception: \n"; std::string message; @@ -64,7 +64,7 @@ struct PaddleMobileException : public std::exception { } #else #define PADDLE_MOBILE_THROW_EXCEPTION(...) -#define PADDLE_MOBILE_ASSERT(stat, ...) +#define PADDLE_MOBILE_ENFORCE(stat, ...) #endif } // namespace paddle_mobile diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 808002d4c8f..46feb97cb87 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -28,18 +28,6 @@ vector OperatorBase::GetOutKeys() const { return it->second.second; } -template -static T *GetVarValue(const string &key, const VariableNameMap &var_map, - const Scope &scope) { - auto var_vec = var_map.at(key); - if (!var_vec.empty()) { - auto var = scope.FindVar(var_vec[0]); - return var->GetMutable(); - } else { - return nullptr; - } -} - template OperatorBase::OperatorBase(const std::string &type, const VariableNameMap &inputs, @@ -60,7 +48,7 @@ void OperatorBase::CheckAllInputOutputSet() const {} template void OperatorBase::Run() const { RunImpl(); -#ifdef PADDLE_MOBILE_DEBUG +#if (PADDLE_MOBILE_DEBUG) vector output_keys = GetOutKeys(); for (const auto key : output_keys) { Tensor *out_ = GetVarValue(key, outputs_, *scope_); diff --git a/src/framework/operator.h b/src/framework/operator.h index 6194e5dcfff..2de3a953670 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -39,6 +39,18 @@ namespace framework { using std::string; using std::vector; +template +static T *GetVarValue(const string &key, const VariableNameMap &var_map, + const Scope &scope) { + auto var_vec = var_map.at(key); + if (!var_vec.empty()) { + auto var = scope.FindVar(var_vec[0]); + return var->GetMutable(); + } else { + return nullptr; + } +} + template class OperatorBase : PaddleMobileObject { public: diff --git a/src/io.cpp b/src/io.cpp index b8350a81118..ac89106e498 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -371,31 +371,47 @@ void Executor::InitMemory() { } template -void Executor::Predict(const framework::Tensor &t, int block_id) { +std::shared_ptr Executor::Predict( + const framework::Tensor &t) { framework::Variable *g_feed_value = program_.scope->Var("feed"); framework::Tensor *feed_tensor = g_feed_value->GetMutable(); feed_tensor->Resize(t.dims()); feed_tensor->ShareDataWith(t); std::shared_ptr to_predict_block = - to_predict_program_->Block(block_id); + to_predict_program_->Block(0); for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { auto op = ops_of_block_[*to_predict_block.get()][j]; op->Run(); } + auto ops = ops_of_block_[*to_predict_program_->Block(0)]; + auto last_op = ops.rbegin(); + auto output_map = (*last_op)->Outputs(); + std::vector out_keys = (*last_op)->GetOutKeys(); + PADDLE_MOBILE_ENFORCE(out_keys.size() > 0, "the last op contains no output"); + framework::LoDTensor *output_tensor = + framework::GetVarValue(out_keys[0], output_map, + *(program_.scope)); + return std::shared_ptr(output_tensor); +} +template +std::shared_ptr Executor::Predict( + const framework::Tensor &t, int block_id) { + return Predict(t); } template std::vector::Ptype> Executor::Predict( const std::vector &input, const std::vector &dims) { framework::Tensor tensor(input, framework::make_ddim(dims)); - - Predict(tensor, 0); - - framework::Variable *g_feed_value = program_.scope->Var("col"); - auto feed_tensor = g_feed_value->GetMutable(); - - return {}; + std::shared_ptr output_tensor = Predict(tensor, 0); + Executor::Ptype *output_ptr = + output_tensor->data::Ptype>(); + std::vector::Ptype> result_vector; + for (int j = 0; j < output_tensor->numel(); ++j) { + result_vector.push_back(output_ptr[j]); + } + return result_vector; } template class Executor; diff --git a/src/io.h b/src/io.h index de2d359bf58..ae99197baa9 100644 --- a/src/io.h +++ b/src/io.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include #include @@ -44,24 +45,25 @@ class Executor { public: typedef typename PrecisionTrait

::ptype Ptype; - Executor() = default; - Executor(const framework::Program p, int batch_size = 1, bool use_optimize = true); - // std::shared_ptr Predict(framework::Tensor &t); + std::shared_ptr Predict(const framework::Tensor &t); std::vector Predict(const std::vector &input, const std::vector &dims); protected: + Executor() = default; + void InitMemory(); void LoadMemory(const framework::VarDesc var_desc, framework::LoDTensor *tensor, const std::string &file_path); framework::Program program_; int batch_size_ = 1; std::shared_ptr to_predict_program_; - void Predict(const framework::Tensor &t, int block_id); + std::shared_ptr Predict(const framework::Tensor &t, + int block_id); std::map>>> ops_of_block_; diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index 139579e9116..0640af890cf 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -18,20 +18,17 @@ limitations under the License. */ int main() { paddle_mobile::Loader loader; - // ../../../test/models/googlenet - // ../../../test/models/mobilenet auto time1 = time(); auto program = loader.Load(g_googlenet, false); auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + DLOG << "load cost :" << time_diff(time1, time2) << "ms\n"; paddle_mobile::Executor executor(program, 1, false); - std::vector input; std::vector dims{1, 3, 224, 224}; GetInput(g_test_image_1x3x224x224, &input, dims); auto time3 = time(); executor.Predict(input, dims); auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + DLOG << "predict cost :" << time_diff(time3, time4) << "ms\n"; return 0; } From e64e51afbee01204c3ef97fec6a723651479cde8 Mon Sep 17 00:00:00 2001 From: wangliu Date: Thu, 31 May 2018 10:13:24 +0800 Subject: [PATCH 18/26] modify softmax to support input with multi batch --- src/operators/math/softmax.cpp | 8 +++++++- test/net/test_mobilenet.cpp | 13 +++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/operators/math/softmax.cpp b/src/operators/math/softmax.cpp index 6eaeb6e2561..224382eb2b7 100644 --- a/src/operators/math/softmax.cpp +++ b/src/operators/math/softmax.cpp @@ -136,9 +136,15 @@ class SoftmaxFuntor { public: void operator()(const framework::Tensor *X, framework::Tensor *Y) { + const DDim dDim = X->dims(); + for (int i = 0; i < dDim[0]; ++i) { + framework::Tensor sub_X = X->Slice(i, i + 1); + framework::Tensor sub_Y = Y->Slice(i, i + 1); + #if __ARM_NEON - SoftmaxCacl(X, Y); + SoftmaxCacl(&sub_X, &sub_Y); #endif + } } }; diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index b5d925227e4..aee0d456813 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -22,17 +22,22 @@ int main() { auto program = loader.Load(g_mobilenet, false); auto time2 = time(); DLOG << "load cost :" << time_diff(time1, time1) << "ms"; - paddle_mobile::Executor executor(program, 1, false); + paddle_mobile::Executor executor(program, 2, false); - std::vector dims{1, 3, 224, 224}; + std::vector dims{2, 3, 224, 224}; Tensor input_tensor; - SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), + SetupTensor(&input_tensor, {2, 3, 224, 224}, static_cast(0), static_cast(1)); std::vector input(input_tensor.data(), input_tensor.data() + input_tensor.numel()); auto time3 = time(); - executor.Predict(input, dims); + auto vec_result = executor.Predict(input, dims); + float sum = 0; + for (const auto item : vec_result) { + sum += item; + } + DLOG << "mobilenet output sum =" << sum; auto time4 = time(); DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; return 0; From c1a55d8c6d4f3811271f50ff5b875406aa754f48 Mon Sep 17 00:00:00 2001 From: wangliu Date: Thu, 31 May 2018 10:25:04 +0800 Subject: [PATCH 19/26] modify code style --- test/net/test_mobilenet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index aee0d456813..7ed9a3566e3 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -33,7 +33,7 @@ int main() { input_tensor.data() + input_tensor.numel()); auto time3 = time(); auto vec_result = executor.Predict(input, dims); - float sum = 0; + float sum = 0; for (const auto item : vec_result) { sum += item; } From 6ea5e26ebdf816961973979bdd0b480c4dc52be2 Mon Sep 17 00:00:00 2001 From: eclipsess Date: Thu, 31 May 2018 10:46:43 +0800 Subject: [PATCH 20/26] remove some annotations --- .../kernel/arm/depthwise_conv_kernel.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp index 73aa9953cfc..ff622d13340 100644 --- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp +++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp @@ -32,7 +32,7 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { std::vector paddings = param.Paddings(); std::vector dilations = param.Dilations(); - DLOG << " compute end get Attrs " << strides[0]; +// DLOG << " compute end get Attrs " << strides[0]; const int batch_size = static_cast(input->dims()[0]); @@ -59,17 +59,17 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } - DLOG << " col_shape = " << col_shape; - DLOG << " col_matrix_shape = " << col_matrix_shape; +// DLOG << " col_shape = " << col_shape; +// DLOG << " col_matrix_shape = " << col_matrix_shape; framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); - DLOG << " input_shape = " << input_shape; +// DLOG << " input_shape = " << input_shape; framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - DLOG << " filter.dims() = " << filter.dims(); +// DLOG << " filter.dims() = " << filter.dims(); framework::DDim output_matrix_shape = { output->dims()[1], @@ -85,8 +85,8 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { for (int i = 0; i < batch_size; i++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - DLOG << " in_batch.dims() = " << in_batch.dims(); - DLOG << " out_batch.dims() = " << out_batch.dims(); +// DLOG << " in_batch.dims() = " << in_batch.dims(); +// DLOG << " out_batch.dims() = " << out_batch.dims(); for (int g = 0; g < groups; g++) { Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); @@ -109,9 +109,9 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - DLOG << " out_slice " << out_slice.dims(); - DLOG << " filter_slice " << filter_slice.dims(); - DLOG << " col_matrix " << col_matrix.dims(); +// DLOG << " out_slice " << out_slice.dims(); +// DLOG << " filter_slice " << filter_slice.dims(); +// DLOG << " col_matrix " << col_matrix.dims(); math::matmul(filter_slice, false, col_matrix, false, static_cast(1), &out_slice, static_cast(0)); From 251dea7f2768a0bcfeac9fdd163f78fdbcd0f6be Mon Sep 17 00:00:00 2001 From: eclipsess Date: Thu, 31 May 2018 10:50:55 +0800 Subject: [PATCH 21/26] code style --- .../kernel/arm/depthwise_conv_kernel.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp index ff622d13340..1da52fa8d46 100644 --- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp +++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp @@ -32,7 +32,7 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { std::vector paddings = param.Paddings(); std::vector dilations = param.Dilations(); -// DLOG << " compute end get Attrs " << strides[0]; + // DLOG << " compute end get Attrs " << strides[0]; const int batch_size = static_cast(input->dims()[0]); @@ -59,17 +59,17 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } -// DLOG << " col_shape = " << col_shape; -// DLOG << " col_matrix_shape = " << col_matrix_shape; + // DLOG << " col_shape = " << col_shape; + // DLOG << " col_matrix_shape = " << col_matrix_shape; framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); -// DLOG << " input_shape = " << input_shape; + // DLOG << " input_shape = " << input_shape; framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); -// DLOG << " filter.dims() = " << filter.dims(); + // DLOG << " filter.dims() = " << filter.dims(); framework::DDim output_matrix_shape = { output->dims()[1], @@ -85,8 +85,8 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { for (int i = 0; i < batch_size; i++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); -// DLOG << " in_batch.dims() = " << in_batch.dims(); -// DLOG << " out_batch.dims() = " << out_batch.dims(); + // DLOG << " in_batch.dims() = " << in_batch.dims(); + // DLOG << " out_batch.dims() = " << out_batch.dims(); for (int g = 0; g < groups; g++) { Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); @@ -109,9 +109,9 @@ void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); -// DLOG << " out_slice " << out_slice.dims(); -// DLOG << " filter_slice " << filter_slice.dims(); -// DLOG << " col_matrix " << col_matrix.dims(); + // DLOG << " out_slice " << out_slice.dims(); + // DLOG << " filter_slice " << filter_slice.dims(); + // DLOG << " col_matrix " << col_matrix.dims(); math::matmul(filter_slice, false, col_matrix, false, static_cast(1), &out_slice, static_cast(0)); From b07f7b2d66bcdf6fbcba7fe5331372f9e7ec3891 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Thu, 31 May 2018 12:01:47 +0800 Subject: [PATCH 22/26] add split config --- src/framework/operator.h | 2 +- src/framework/program/block_desc.cpp | 6 +- src/framework/program/block_desc.h | 4 + .../program/program-optimize/node.cpp | 108 ++++++------- src/framework/program/program-optimize/node.h | 5 +- .../program-optimize/program_optimize.cpp | 153 +++++++++++++++++- .../program-optimize/program_optimize.h | 16 +- src/framework/program/program_desc.cpp | 10 +- 8 files changed, 227 insertions(+), 77 deletions(-) diff --git a/src/framework/operator.h b/src/framework/operator.h index 8e5e55fb469..6e5a2b089db 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -138,7 +138,7 @@ class FusionOpMatcher : PaddleMobileObject { virtual Node &BeginNode() { return node_; } - std::string BeginType() { return node_.BeginType(); } + std::string BeginType() { return node_.Type(); } protected: Node node_; diff --git a/src/framework/program/block_desc.cpp b/src/framework/program/block_desc.cpp index 7342abe2844..0ddb9126192 100644 --- a/src/framework/program/block_desc.cpp +++ b/src/framework/program/block_desc.cpp @@ -26,11 +26,7 @@ std::vector> BlockDesc::Vars() const { } std::vector> BlockDesc::Ops() const { - std::vector> res; - for (const auto &op : ops_) { - res.push_back(op); - } - return res; + return ops_; } BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc) diff --git a/src/framework/program/block_desc.h b/src/framework/program/block_desc.h index 1a22714b52a..84d7a90fc11 100644 --- a/src/framework/program/block_desc.h +++ b/src/framework/program/block_desc.h @@ -26,6 +26,7 @@ class BlockDesc : PaddleMobileObject { public: friend class Node; friend class ProgramOptimize; + BlockDesc() {} BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc); BlockDesc(const BlockDesc &block_desc) : index_(block_desc.index_), parent_index_(block_desc.parent_index_) { @@ -43,6 +44,8 @@ class BlockDesc : PaddleMobileObject { const int &ID() const { return index_; } + const bool &MultiThread() const { return multi_thread_; } + const int &Parent() const { return parent_index_; } bool operator==(const paddle_mobile::framework::BlockDesc &in_block) const { @@ -58,6 +61,7 @@ class BlockDesc : PaddleMobileObject { private: int index_; + bool multi_thread_; int parent_index_; std::vector> ops_; std::unordered_map> vars_; diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index 820fa6a443c..5edde24c598 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -45,17 +45,6 @@ bool Node::operator==(const Node &in) { return true; } -// std::shared_ptr Node::MatchTheFirstNode(std::string type){ -// -// for (const auto &node : outputs_){ -// if (node->type_ == type){ -// return node; -// }else{ -// -// } -// } -//} - std::vector> Node::OpDescs(uint size) { std::vector> op_descs; OpDescs(size - 1, &op_descs); @@ -75,21 +64,40 @@ void Node::OpDescs(uint index, void Node::OpDescs(std::vector> *op_desc, Node *node, bool adding_thread, int thread_num) { - bool can_add_split = false; if (outputs_.size() > 1) { + adding_thread = false; + } + + bool can_add_split = false; + // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持 + if (outputs_.size() > 1 && + op_input_output_key[op_desc_->type_].second.size() == 1) { can_add_split = true; - if (op_input_output_key[op_desc_->type_].second.size() != 1) { - DLOG << "当前 op desc 输出数不为 1 "; - can_add_split = false; - } + + // 遍历当前节点的 output 节点 for (const auto &output : outputs_) { - if (op_input_output_key.find(output->op_desc_->type_) != - op_input_output_key.end()) { - auto inputs_and_outputs = op_input_output_key[output->op_desc_->type_]; - auto outputs_of_output = - output->op_desc_->Output(inputs_and_outputs.second[0]); - auto inputs_of_output = - output->op_desc_->Input(inputs_and_outputs.first[0]); + // 不支持 output 有多个 output 的情况 + if (output->outputs_.size() > 0) { + can_add_split = false; + break; + } + + //与节点关联的 OpDesc + std::shared_ptr &op_desc = output->op_desc_; + + //获取这个 op 的 inputs key 和 outputs key + auto inputs_and_outputs = op_input_output_key[op_desc->type_]; + + //判断现在 是否存在这个 op + //判断这个 output 和 input key 的 size 等于 1 + if (op_input_output_key.find(op_desc->type_) != + op_input_output_key.end() && + inputs_and_outputs.first.size() == 1 && + inputs_and_outputs.second.size() == 1) { + auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]); + auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]); + + // 判断一下, 如果输入和输出没有同名, 是支持的 for (int i = 0; i < inputs_of_output.size(); ++i) { std::string input_of_output = inputs_of_output[i]; for (int j = 0; j < outputs_of_output.size(); ++j) { @@ -101,7 +109,7 @@ void Node::OpDescs(std::vector> *op_desc, } } } - } else { + } else { // 如果模型中包含没有的 op, 则不支持添加 split DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_; can_add_split = false; } @@ -124,12 +132,11 @@ void Node::OpDescs(std::vector> *op_desc, if (can_add_split) { adding_thread = true; - std::shared_ptr split_op_desc = - std::make_shared(); + std::shared_ptr split_op_desc = + std::make_shared(); split_op_desc->type_ = G_OP_TYPE_SPLIT; auto outputs = this->op_desc_->Output( op_input_output_key[this->op_desc_->Type()].second[0]); - split_op_desc->inputs_ = { {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; auto &split_outputs = @@ -157,41 +164,12 @@ std::vector> Node::OpDescs() { return op_descs; } -std::string Node::ToString(std::string blank, const Node *node) const { - std::stringstream ss; - ss << type_ << "-> \n"; - - if (inputs_.size() > 1 && node != inputs_.back()) { - return ss.str(); - } else if (inputs_.size() > 1 && node == inputs_.back()) { - ss << "\n" << blank << type_ << "\n"; - } - - for (int i = 0; i < outputs_.size(); ++i) { - ss << blank << outputs_[i]->ToString(blank + " ", this) << ""; - } - return ss.str(); -} - -std::string Node::ToString() const { return this->ToString(" ", this); } - std::shared_ptr Node::To(int size) { std::shared_ptr node = std::make_shared(); this->To(size - 1, node); return node; } -// Node &Node::To(int size) { -// if (size == 1) { -// this->outputs_.clear(); -// } -// -// for (int j = 0; j < this->outputs_.size(); ++j) { -// outputs_[j]->To(size - 1); -// } -// return *this; -//} - void Node::To(int index, std::shared_ptr node) { node->type_ = this->type_; if (index != 0) { @@ -268,6 +246,24 @@ void Node::Folder( } } +std::string Node::ToString(std::string blank, const Node *node) const { + std::stringstream ss; + ss << type_ << "-> \n"; + + if (inputs_.size() > 1 && node != inputs_.back()) { + return ss.str(); + } else if (inputs_.size() > 1 && node == inputs_.back()) { + ss << "\n" << blank << type_ << "\n"; + } + + for (int i = 0; i < outputs_.size(); ++i) { + ss << blank << outputs_[i]->ToString(blank + " ", this) << ""; + } + return ss.str(); +} + +std::string Node::ToString() const { return this->ToString(" ", this); } + void Node::Description() { if (op_desc_.get()) { DLOG << *op_desc_; diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h index 5dd1a3acbf5..b7fe9b1f07a 100644 --- a/src/framework/program/program-optimize/node.h +++ b/src/framework/program/program-optimize/node.h @@ -27,6 +27,7 @@ namespace paddle_mobile { namespace framework { class Node : PaddleMobileObject { + friend class ProgramOptimize; public: Node() {} explicit Node(const std::string &type) : type_(type) {} @@ -42,8 +43,8 @@ class Node : PaddleMobileObject { std::map> change_map); std::vector> OpDescs(uint size); std::vector> OpDescs(); - std::shared_ptr OpDesc() { return op_desc_; } - std::string BeginType() { return type_; } + std::shared_ptr OpDescOfNode() { return op_desc_; } + std::string Type() { return type_; } void Description(); private: diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index 737fed9bd56..4c757bac755 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -19,11 +19,12 @@ namespace paddle_mobile { namespace framework { -// std::shared_ptr ProgramOptimize::Optimize() {} - std::shared_ptr ProgramOptimize::FushionOptimize( - std::shared_ptr ori_des) { - ProgramDesc *optimize_program = new ProgramDesc(*ori_des); + std::shared_ptr ori_des, bool add_split) { + +// ProgramDesc *optimize_program = new ProgramDesc(*ori_des); + std::shared_ptr optimize_program = std::make_shared(*ori_des); + current_block_ = optimize_program->Blocks().size(); for (int i = 0; i < optimize_program->Blocks().size(); ++i) { std::unordered_map> output_nodes; @@ -96,10 +97,148 @@ std::shared_ptr ProgramOptimize::FushionOptimize( } // DLOG << "node: \n" << *begin_node; - block->ops_ = begin_node->OpDescs(); + + + std::vector> op_descs; + GenerateOps(&op_descs, begin_node.get()); + block->ops_ = op_descs; + } + + for (int m = 0; m < new_blocks_.size(); ++m) { + std::shared_ptr new_block = new_blocks_[m]; + new_block->index_ = m + ori_des->blocks_.size(); + optimize_program->blocks_.push_back(new_block); } - std::shared_ptr shared_optimzie(optimize_program); - return shared_optimzie; + return optimize_program; } + + +void ProgramOptimize::GenerateOps(std::vector> *op_desc, + Node *input_node, + Node *current_node, + bool adding_thread, + int thread_num, + std::shared_ptr new_block) { + if (current_node->outputs_.size() > 1) { + adding_thread = false; + } + + bool can_add_split = false; + // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持 + if (current_node->outputs_.size() > 1 && + op_input_output_key[current_node->op_desc_->type_].second.size() == 1) { + can_add_split = true; + + // 遍历当前节点的 output 节点 + for (const auto &output : current_node->outputs_) { + // 不支持 output 有多个 output 的情况 + if (output->outputs_.size() > 1) { + DLOG << "don't support multi output of output"; + can_add_split = false; + break; + } + + //与节点关联的 OpDesc + std::shared_ptr &op_desc = output->op_desc_; + + //获取这个 op 的 inputs key 和 outputs key + auto inputs_and_outputs = op_input_output_key[op_desc->type_]; + + //判断现在 是否存在这个 op + //判断这个 output 和 input key 的 size 等于 1 + if (op_input_output_key.find(op_desc->type_) != + op_input_output_key.end() && + inputs_and_outputs.first.size() == 1 && + inputs_and_outputs.second.size() == 1) { + auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]); + auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]); + + // 判断一下, 如果输入和输出没有同名, 是支持的 + for (int i = 0; i < inputs_of_output.size(); ++i) { + std::string input_of_output = inputs_of_output[i]; + for (int j = 0; j < outputs_of_output.size(); ++j) { + std::string output_of_output = outputs_of_output[j]; + if (input_of_output == output_of_output) { + DLOG << "output的 output 包含 input" << input_of_output; + can_add_split = false; + break; + } + } + } + } else { // 如果模型中包含没有的 op, 则不支持添加 split + DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_; + can_add_split = false; + } + } + } + + if (current_node->inputs_.size() > 1 && input_node != current_node->inputs_.back()) { + return; + } else if (current_node->inputs_.size() > 1 && input_node == current_node->inputs_.back()) { + new_block.reset(); + adding_thread = false; + op_desc->push_back(current_node->op_desc_); + } else { + if (new_block.get() && adding_thread) { + new_block->ops_.push_back(current_node->op_desc_); + } else { + op_desc->push_back(current_node->op_desc_); + } + } + if (adding_thread) { + Attribute attr; + attr.Set(thread_num); + current_node->op_desc_->attrs_["thread"] = attr; + } + + + + if (can_add_split) { + new_block = std::make_shared(); + new_block->multi_thread_ = true; + new_block->index_ = current_block_; + new_blocks_.push_back(new_block); + + adding_thread = true; + std::shared_ptr split_op_desc = + std::make_shared(); + split_op_desc->type_ = G_OP_TYPE_SPLIT; + auto outputs = current_node->op_desc_->Output( + op_input_output_key[current_node->op_desc_->Type()].second[0]); + split_op_desc->inputs_ = { + {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; + auto &split_outputs = + split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]]; + for (const auto &output : current_node->outputs_) { + split_outputs.push_back(outputs[0]); + } + + Attribute attr; + attr.Set(current_block_); + split_op_desc->attrs_["block_id"] = attr; + + op_desc->push_back(split_op_desc); + current_block_++; + } + + for (int i = 0; i < current_node->outputs_.size(); ++i) { + auto &output = current_node->outputs_[i]; + if (can_add_split) { + GenerateOps(op_desc, current_node, output.get(), adding_thread, i, new_block); + } else { + GenerateOps(op_desc, current_node, output.get(), adding_thread, thread_num, new_block); + } + } +} + +void ProgramOptimize::GenerateOps(std::vector> *op_descs, + Node *begin_node) { + + + //std::vector> *op_desc, + // Node *input_node, Node *current_node, bool adding_thread, int thread_num + this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); +} + } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/program/program-optimize/program_optimize.h b/src/framework/program/program-optimize/program_optimize.h index 3839fa1e36b..8ba8d2973fe 100644 --- a/src/framework/program/program-optimize/program_optimize.h +++ b/src/framework/program/program-optimize/program_optimize.h @@ -28,12 +28,20 @@ class ProgramOptimize { public: ProgramOptimize() {} std::shared_ptr FushionOptimize( - std::shared_ptr ori_des); + std::shared_ptr ori_des, bool add_split = false); private: - // std::shared_ptr ori_desc_; - std::vector>> - outputs_nodes_; + int current_block_; + std::vector> new_blocks_; + + void GenerateOps(std::vector> *op_descs, + Node *begin_node); + void GenerateOps(std::vector> *op_desc, + Node *input_node, + Node *current_node, + bool adding_thread, + int thread_num, + std::shared_ptr new_block); }; } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/program/program_desc.cpp b/src/framework/program/program_desc.cpp index 071f5cf5719..31f4bcb6f11 100644 --- a/src/framework/program/program_desc.cpp +++ b/src/framework/program/program_desc.cpp @@ -32,11 +32,13 @@ void ProgramDesc::Description(std::string header) { if (header.size()) { LOG(kLOG_INFO) << header; } - for (const auto &block : this->blocks_) { + + for (int i = 0; i < this->blocks_.size(); ++i) { + auto block = this->blocks_[i]; LOG(kLOG_DEBUG) << "block: " << block->ID(); LOG(kLOG_INFO) << "block ops size: " << block->Ops().size(); for (int j = 0; j < block->Ops().size(); ++j) { - const auto &op = block->Ops()[j]; + auto op = block->Ops()[j]; LOG(kLOG_DEBUG1) << "op: " << op->Type(); for (auto &input : op->GetInputs()) { LOG(kLOG_DEBUG2) << "input parameter: " << input.first; @@ -71,6 +73,10 @@ void ProgramDesc::Description(std::string header) { } } } + + for (const auto &block : this->blocks_) { + + } #endif } From deef88cba43c397d20aa280b51420e3fbf9d2590 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Thu, 31 May 2018 12:02:26 +0800 Subject: [PATCH 23/26] format files --- src/framework/program/block_desc.cpp | 4 +- .../program/program-optimize/node.cpp | 3 +- src/framework/program/program-optimize/node.h | 1 + .../program-optimize/program_optimize.cpp | 55 +++++++++---------- .../program-optimize/program_optimize.h | 7 +-- src/framework/program/program_desc.cpp | 1 - 6 files changed, 31 insertions(+), 40 deletions(-) diff --git a/src/framework/program/block_desc.cpp b/src/framework/program/block_desc.cpp index 0ddb9126192..21322f08256 100644 --- a/src/framework/program/block_desc.cpp +++ b/src/framework/program/block_desc.cpp @@ -25,9 +25,7 @@ std::vector> BlockDesc::Vars() const { return res; } -std::vector> BlockDesc::Ops() const { - return ops_; -} +std::vector> BlockDesc::Ops() const { return ops_; } BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc) : index_(desc->idx), parent_index_(desc->idx) { diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index 5edde24c598..31377222db8 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -132,8 +132,7 @@ void Node::OpDescs(std::vector> *op_desc, if (can_add_split) { adding_thread = true; - std::shared_ptr split_op_desc = - std::make_shared(); + std::shared_ptr split_op_desc = std::make_shared(); split_op_desc->type_ = G_OP_TYPE_SPLIT; auto outputs = this->op_desc_->Output( op_input_output_key[this->op_desc_->Type()].second[0]); diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h index b7fe9b1f07a..da7e26a9ac0 100644 --- a/src/framework/program/program-optimize/node.h +++ b/src/framework/program/program-optimize/node.h @@ -28,6 +28,7 @@ namespace framework { class Node : PaddleMobileObject { friend class ProgramOptimize; + public: Node() {} explicit Node(const std::string &type) : type_(type) {} diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index 4c757bac755..8b0bf295262 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -21,9 +21,9 @@ namespace framework { std::shared_ptr ProgramOptimize::FushionOptimize( std::shared_ptr ori_des, bool add_split) { - -// ProgramDesc *optimize_program = new ProgramDesc(*ori_des); - std::shared_ptr optimize_program = std::make_shared(*ori_des); + // ProgramDesc *optimize_program = new ProgramDesc(*ori_des); + std::shared_ptr optimize_program = + std::make_shared(*ori_des); current_block_ = optimize_program->Blocks().size(); for (int i = 0; i < optimize_program->Blocks().size(); ++i) { @@ -98,7 +98,6 @@ std::shared_ptr ProgramOptimize::FushionOptimize( // DLOG << "node: \n" << *begin_node; - std::vector> op_descs; GenerateOps(&op_descs, begin_node.get()); block->ops_ = op_descs; @@ -112,13 +111,10 @@ std::shared_ptr ProgramOptimize::FushionOptimize( return optimize_program; } - -void ProgramOptimize::GenerateOps(std::vector> *op_desc, - Node *input_node, - Node *current_node, - bool adding_thread, - int thread_num, - std::shared_ptr new_block) { +void ProgramOptimize::GenerateOps( + std::vector> *op_desc, Node *input_node, + Node *current_node, bool adding_thread, int thread_num, + std::shared_ptr new_block) { if (current_node->outputs_.size() > 1) { adding_thread = false; } @@ -147,7 +143,7 @@ void ProgramOptimize::GenerateOps(std::vector //判断现在 是否存在这个 op //判断这个 output 和 input key 的 size 等于 1 if (op_input_output_key.find(op_desc->type_) != - op_input_output_key.end() && + op_input_output_key.end() && inputs_and_outputs.first.size() == 1 && inputs_and_outputs.second.size() == 1) { auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]); @@ -172,9 +168,11 @@ void ProgramOptimize::GenerateOps(std::vector } } - if (current_node->inputs_.size() > 1 && input_node != current_node->inputs_.back()) { + if (current_node->inputs_.size() > 1 && + input_node != current_node->inputs_.back()) { return; - } else if (current_node->inputs_.size() > 1 && input_node == current_node->inputs_.back()) { + } else if (current_node->inputs_.size() > 1 && + input_node == current_node->inputs_.back()) { new_block.reset(); adding_thread = false; op_desc->push_back(current_node->op_desc_); @@ -191,8 +189,6 @@ void ProgramOptimize::GenerateOps(std::vector current_node->op_desc_->attrs_["thread"] = attr; } - - if (can_add_split) { new_block = std::make_shared(); new_block->multi_thread_ = true; @@ -200,15 +196,14 @@ void ProgramOptimize::GenerateOps(std::vector new_blocks_.push_back(new_block); adding_thread = true; - std::shared_ptr split_op_desc = - std::make_shared(); + std::shared_ptr split_op_desc = std::make_shared(); split_op_desc->type_ = G_OP_TYPE_SPLIT; auto outputs = current_node->op_desc_->Output( - op_input_output_key[current_node->op_desc_->Type()].second[0]); + op_input_output_key[current_node->op_desc_->Type()].second[0]); split_op_desc->inputs_ = { - {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; + {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}}; auto &split_outputs = - split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]]; + split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]]; for (const auto &output : current_node->outputs_) { split_outputs.push_back(outputs[0]); } @@ -224,19 +219,21 @@ void ProgramOptimize::GenerateOps(std::vector for (int i = 0; i < current_node->outputs_.size(); ++i) { auto &output = current_node->outputs_[i]; if (can_add_split) { - GenerateOps(op_desc, current_node, output.get(), adding_thread, i, new_block); + GenerateOps(op_desc, current_node, output.get(), adding_thread, i, + new_block); } else { - GenerateOps(op_desc, current_node, output.get(), adding_thread, thread_num, new_block); + GenerateOps(op_desc, current_node, output.get(), adding_thread, + thread_num, new_block); } } } -void ProgramOptimize::GenerateOps(std::vector> *op_descs, - Node *begin_node) { - - - //std::vector> *op_desc, - // Node *input_node, Node *current_node, bool adding_thread, int thread_num +void ProgramOptimize::GenerateOps( + std::vector> *op_descs, + Node *begin_node) { + // std::vector> *op_desc, + // Node *input_node, Node *current_node, bool adding_thread, int + // thread_num this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); } diff --git a/src/framework/program/program-optimize/program_optimize.h b/src/framework/program/program-optimize/program_optimize.h index 8ba8d2973fe..32d8d1fa914 100644 --- a/src/framework/program/program-optimize/program_optimize.h +++ b/src/framework/program/program-optimize/program_optimize.h @@ -37,11 +37,8 @@ class ProgramOptimize { void GenerateOps(std::vector> *op_descs, Node *begin_node); void GenerateOps(std::vector> *op_desc, - Node *input_node, - Node *current_node, - bool adding_thread, - int thread_num, - std::shared_ptr new_block); + Node *input_node, Node *current_node, bool adding_thread, + int thread_num, std::shared_ptr new_block); }; } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/program/program_desc.cpp b/src/framework/program/program_desc.cpp index 31f4bcb6f11..8483e1e5d68 100644 --- a/src/framework/program/program_desc.cpp +++ b/src/framework/program/program_desc.cpp @@ -75,7 +75,6 @@ void ProgramDesc::Description(std::string header) { } for (const auto &block : this->blocks_) { - } #endif } From 0d53906fd7db31a533156786e593c7d0e8c51320 Mon Sep 17 00:00:00 2001 From: wangliu Date: Thu, 31 May 2018 15:39:04 +0800 Subject: [PATCH 24/26] fix compile error on ubuntu --- src/framework/operator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 46feb97cb87..f798d7ade20 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -48,7 +48,7 @@ void OperatorBase::CheckAllInputOutputSet() const {} template void OperatorBase::Run() const { RunImpl(); -#if (PADDLE_MOBILE_DEBUG) +#ifdef PADDLE_MOBILE_DEBUG vector output_keys = GetOutKeys(); for (const auto key : output_keys) { Tensor *out_ = GetVarValue(key, outputs_, *scope_); From 2b92e037ff6fdf4cb2f3102c71e03995935fb90d Mon Sep 17 00:00:00 2001 From: liuruilong Date: Thu, 31 May 2018 16:23:51 +0800 Subject: [PATCH 25/26] fix fc crash --- src/common/types.h | 3 +- .../program/program-optimize/node.cpp | 41 +++++++++++++++++++ src/framework/program/program-optimize/node.h | 5 +++ .../program-optimize/program_optimize.cpp | 29 ++++++++++++- .../program-optimize/program_optimize.h | 4 +- src/io.cpp | 8 +++- src/operators/fusion_conv_add_relu_op.h | 6 +-- src/operators/fusion_fc_op.h | 6 +-- test/net/test_googlenet.cpp | 5 ++- 9 files changed, 95 insertions(+), 12 deletions(-) diff --git a/src/common/types.h b/src/common/types.h index ca9e64cc60f..5e651a89517 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -77,7 +77,7 @@ static const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; static const std::string G_OP_TYPE_BOX_CODER = "box_coder"; static const std::string G_OP_TYPE_CONCAT = "concat"; static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; -static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "FusionConvAddRelu"; +static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; static const std::string G_OP_TYPE_FC = "fc"; static const std::string G_OP_TYPE_LRN = "lrn"; static const std::string G_OP_TYPE_MUL = "mul"; @@ -92,6 +92,7 @@ static const std::string G_OP_TYPE_TRANSPOSE = "transpose"; static const std::string G_OP_TYPE_SPLIT = "split"; static const std::string G_OP_TYPE_FEED = "feed"; static const std::string G_OP_TYPE_FETCH = "fetch"; +static const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; static std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index 31377222db8..3910dc7a0b1 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -45,6 +45,47 @@ bool Node::operator==(const Node &in) { return true; } +bool Node::CanSplit(std::unordered_set complex_compute_set) { + bool split = false; + CanSplit(&split, false, 0, &complex_compute_set, this); + return split; +} + +void Node::CanSplit(bool *split, bool spliting, + int complex_count, + std::unordered_set *complex_compute_set, Node *pre_node) { + if (spliting) { + if (complex_compute_set->find(this->type_) != complex_compute_set->end()) { + complex_count++; + } + } + + if (inputs_.size() > 1 && pre_node != inputs_.back()) { + return; + } + if (inputs_.size() > 1 && pre_node == inputs_.back()) { + if (complex_count > 1) { + *split = true; + return; + } + } + + // multi output, to check + if (outputs_.size() > 1) { + spliting = true; + complex_compute_set = 0; + } else { + if (spliting == true && inputs_.size() > 0) { + spliting = false; + } else { + } + } + + for (auto &output : outputs_) { + output->CanSplit(split, spliting, complex_count, complex_compute_set, this); + } +} + std::vector> Node::OpDescs(uint size) { std::vector> op_descs; OpDescs(size - 1, &op_descs); diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h index da7e26a9ac0..914cb19589d 100644 --- a/src/framework/program/program-optimize/node.h +++ b/src/framework/program/program-optimize/node.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include +#include #include "common/log.h" #include "framework/paddle_mobile_object.h" @@ -36,6 +37,7 @@ class Node : PaddleMobileObject { : op_desc_(op_desc), type_(op_desc->Type()) {} Node &operator>(std::shared_ptr node); bool operator==(const Node &in); + bool CanSplit(std::unordered_set complex_compute_set); std::string ToString() const; std::shared_ptr To(int size); uint Depth(uint begin = 0); @@ -49,6 +51,9 @@ class Node : PaddleMobileObject { void Description(); private: + void CanSplit(bool *split, bool spliting, + int complex_count, + std::unordered_set *complex_compute_set, Node *pre_node); void OpDescs(std::vector> *op_desc, Node *node, bool adding_thread, int thread_num); void OpDescs(uint size, diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index 8b0bf295262..11f9b17ad55 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -99,6 +99,7 @@ std::shared_ptr ProgramOptimize::FushionOptimize( // DLOG << "node: \n" << *begin_node; std::vector> op_descs; + // bool can_splite = begin_node->CanSplit({G_OP_TYPE_CONV, G_OP_TYPE_BATCHNORM, G_OP_TYPE_DEPTHWISE_CONV}); GenerateOps(&op_descs, begin_node.get()); block->ops_ = op_descs; } @@ -111,6 +112,28 @@ std::shared_ptr ProgramOptimize::FushionOptimize( return optimize_program; } + +void ProgramOptimize::GenerateOps( + std::vector> *op_desc, Node *input_node, + Node *current_node) { + + if (current_node->inputs_.size() > 1 && + input_node != current_node->inputs_.back()) { + return; + } else if (current_node->inputs_.size() > 1 && + input_node == current_node->inputs_.back()) { + op_desc->push_back(current_node->op_desc_); + } else { + op_desc->push_back(current_node->op_desc_); + } + + for (int i = 0; i < current_node->outputs_.size(); ++i) { + auto &output = current_node->outputs_[i]; + GenerateOps(op_desc, current_node, output.get()); + } + +} + void ProgramOptimize::GenerateOps( std::vector> *op_desc, Node *input_node, Node *current_node, bool adding_thread, int thread_num, @@ -234,7 +257,11 @@ void ProgramOptimize::GenerateOps( // std::vector> *op_desc, // Node *input_node, Node *current_node, bool adding_thread, int // thread_num - this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); + if (false) { + this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); + } else { + this->GenerateOps(op_descs, begin_node, begin_node); + } } } // namespace framework diff --git a/src/framework/program/program-optimize/program_optimize.h b/src/framework/program/program-optimize/program_optimize.h index 32d8d1fa914..701358f5905 100644 --- a/src/framework/program/program-optimize/program_optimize.h +++ b/src/framework/program/program-optimize/program_optimize.h @@ -33,9 +33,11 @@ class ProgramOptimize { private: int current_block_; std::vector> new_blocks_; - void GenerateOps(std::vector> *op_descs, Node *begin_node); + void GenerateOps( + std::vector> *op_desc, Node *input_node, + Node *current_node); void GenerateOps(std::vector> *op_desc, Node *input_node, Node *current_node, bool adding_thread, int thread_num, std::shared_ptr new_block); diff --git a/src/io.cpp b/src/io.cpp index ac89106e498..c99556f0865 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -220,13 +220,18 @@ const framework::Program Loader::Load( } } } - originProgramDesc->Description("program: "); if (optimize) { framework::ProgramOptimize program_optimize; program.optimizeProgram = program_optimize.FushionOptimize(originProgramDesc); } + if (optimize) { + program.optimizeProgram->Description("optimize: "); + } else { + originProgramDesc->Description("program: "); + } + paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); return program; @@ -254,6 +259,7 @@ Executor::Executor(const framework::Program p, int batch_size, std::vector> ops = block_desc->Ops(); for (int j = 0; j < ops.size(); ++j) { std::shared_ptr op = ops[j]; + DLOG << "create op: " << op->Type(); auto op_base = framework::OpRegistry::CreateOp( op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), program_.scope); diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index 1fa3399cf22..0f52562f0bc 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -28,10 +28,10 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { std::make_shared(G_OP_TYPE_RELU); } - void FolderNodes(framework::Node &node) { + void FolderNodes(framework::Node *node) { std::vector> origin_descs = - node.OpDescs(node_.Depth()); - node.Folder(node_.Depth(), Type(), + node->OpDescs(node_.Depth()); + node->Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; } diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index fb49fa61b20..fe628631447 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -32,10 +32,10 @@ class FusionFcMatcher : public framework::FusionOpMatcher { node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD); } - void FolderNodes(framework::Node &node) { + void FolderNodes(framework::Node *node) { vector> origin_descs = - node.OpDescs(node_.Depth()); - node.Folder(node_.Depth(), Type(), + node->OpDescs(node_.Depth()); + node->Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index 0640af890cf..302cd3e726e 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -18,11 +18,12 @@ limitations under the License. */ int main() { paddle_mobile::Loader loader; + bool optimize = true; auto time1 = time(); - auto program = loader.Load(g_googlenet, false); + auto program = loader.Load(g_googlenet, optimize); auto time2 = time(); DLOG << "load cost :" << time_diff(time1, time2) << "ms\n"; - paddle_mobile::Executor executor(program, 1, false); + paddle_mobile::Executor executor(program, 1, optimize); std::vector input; std::vector dims{1, 3, 224, 224}; GetInput(g_test_image_1x3x224x224, &input, dims); From ddd8e462cbbe6a11ba98de01144c97c137d7f16a Mon Sep 17 00:00:00 2001 From: liuruilong Date: Thu, 31 May 2018 16:25:58 +0800 Subject: [PATCH 26/26] format files --- src/common/types.h | 3 ++- src/framework/program/program-optimize/node.cpp | 6 +++--- src/framework/program/program-optimize/node.h | 8 ++++---- .../program/program-optimize/program_optimize.cpp | 10 ++++------ .../program/program-optimize/program_optimize.h | 5 ++--- src/io.cpp | 1 - src/operators/fusion_conv_add_relu_op.h | 2 +- src/operators/fusion_fc_op.h | 4 ++-- 8 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/common/types.h b/src/common/types.h index 5e651a89517..04b78947a6a 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -77,7 +77,8 @@ static const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; static const std::string G_OP_TYPE_BOX_CODER = "box_coder"; static const std::string G_OP_TYPE_CONCAT = "concat"; static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; -static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; +static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = + "fusion_conv_add_relu"; static const std::string G_OP_TYPE_FC = "fc"; static const std::string G_OP_TYPE_LRN = "lrn"; static const std::string G_OP_TYPE_MUL = "mul"; diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index 3910dc7a0b1..c165b6568aa 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -51,9 +51,9 @@ bool Node::CanSplit(std::unordered_set complex_compute_set) { return split; } -void Node::CanSplit(bool *split, bool spliting, - int complex_count, - std::unordered_set *complex_compute_set, Node *pre_node) { +void Node::CanSplit(bool *split, bool spliting, int complex_count, + std::unordered_set *complex_compute_set, + Node *pre_node) { if (spliting) { if (complex_compute_set->find(this->type_) != complex_compute_set->end()) { complex_count++; diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h index 914cb19589d..8ef26f897d2 100644 --- a/src/framework/program/program-optimize/node.h +++ b/src/framework/program/program-optimize/node.h @@ -16,9 +16,9 @@ limitations under the License. */ #include #include +#include #include #include -#include #include "common/log.h" #include "framework/paddle_mobile_object.h" @@ -51,9 +51,9 @@ class Node : PaddleMobileObject { void Description(); private: - void CanSplit(bool *split, bool spliting, - int complex_count, - std::unordered_set *complex_compute_set, Node *pre_node); + void CanSplit(bool *split, bool spliting, int complex_count, + std::unordered_set *complex_compute_set, + Node *pre_node); void OpDescs(std::vector> *op_desc, Node *node, bool adding_thread, int thread_num); void OpDescs(uint size, diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index 11f9b17ad55..d9c3c51c3c8 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -99,7 +99,8 @@ std::shared_ptr ProgramOptimize::FushionOptimize( // DLOG << "node: \n" << *begin_node; std::vector> op_descs; - // bool can_splite = begin_node->CanSplit({G_OP_TYPE_CONV, G_OP_TYPE_BATCHNORM, G_OP_TYPE_DEPTHWISE_CONV}); + // bool can_splite = begin_node->CanSplit({G_OP_TYPE_CONV, + // G_OP_TYPE_BATCHNORM, G_OP_TYPE_DEPTHWISE_CONV}); GenerateOps(&op_descs, begin_node.get()); block->ops_ = op_descs; } @@ -112,11 +113,9 @@ std::shared_ptr ProgramOptimize::FushionOptimize( return optimize_program; } - void ProgramOptimize::GenerateOps( - std::vector> *op_desc, Node *input_node, - Node *current_node) { - + std::vector> *op_desc, Node *input_node, + Node *current_node) { if (current_node->inputs_.size() > 1 && input_node != current_node->inputs_.back()) { return; @@ -131,7 +130,6 @@ void ProgramOptimize::GenerateOps( auto &output = current_node->outputs_[i]; GenerateOps(op_desc, current_node, output.get()); } - } void ProgramOptimize::GenerateOps( diff --git a/src/framework/program/program-optimize/program_optimize.h b/src/framework/program/program-optimize/program_optimize.h index 701358f5905..93943cf8395 100644 --- a/src/framework/program/program-optimize/program_optimize.h +++ b/src/framework/program/program-optimize/program_optimize.h @@ -35,9 +35,8 @@ class ProgramOptimize { std::vector> new_blocks_; void GenerateOps(std::vector> *op_descs, Node *begin_node); - void GenerateOps( - std::vector> *op_desc, Node *input_node, - Node *current_node); + void GenerateOps(std::vector> *op_desc, + Node *input_node, Node *current_node); void GenerateOps(std::vector> *op_desc, Node *input_node, Node *current_node, bool adding_thread, int thread_num, std::shared_ptr new_block); diff --git a/src/io.cpp b/src/io.cpp index c99556f0865..8f6a07f2dd1 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -232,7 +232,6 @@ const framework::Program Loader::Load( originProgramDesc->Description("program: "); } - paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); return program; } diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index 0f52562f0bc..e93c910d2b3 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -32,7 +32,7 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { std::vector> origin_descs = node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), - {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); + {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; } }; diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index fe628631447..9019ef4d496 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -32,11 +32,11 @@ class FusionFcMatcher : public framework::FusionOpMatcher { node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD); } - void FolderNodes(framework::Node *node) { + void FolderNodes(framework::Node *node) { vector> origin_descs = node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), - {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); + {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}); } std::string Type() { return G_OP_TYPE_FC; }