From 3b30799a641388ec221a2066ec8e29cee294140d Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Sat, 11 Jan 2020 02:52:55 +0100 Subject: [PATCH 01/12] Add Round op (#4124) * Added round op * Add CPU support, unit tests * Disable UT for PlaidML * Update year Co-authored-by: Scott Cyphers --- src/ngraph/CMakeLists.txt | 2 + src/ngraph/op/op_version_tbl.hpp | 1 + src/ngraph/op/round.cpp | 34 ++++++++ src/ngraph/op/round.hpp | 49 +++++++++++ src/ngraph/ops.hpp | 1 + src/ngraph/opsets/opset0_tbl.hpp | 1 + src/ngraph/pass/constant_folding_unary.cpp | 9 +- src/ngraph/runtime/cpu/cpu_builder.cpp | 16 ++++ src/ngraph/runtime/cpu/cpu_emitter.cpp | 15 ++++ .../runtime/cpu/cpu_external_function.cpp | 2 +- src/ngraph/runtime/cpu/kernel/round.hpp | 39 +++++++++ .../runtime/interpreter/int_executable.hpp | 8 ++ src/ngraph/runtime/plaidml/unit_test.manifest | 4 + src/ngraph/runtime/reference/round.hpp | 52 ++++++++++++ src/ngraph/serializer.cpp | 8 +- test/CMakeLists.txt | 1 + test/backend/round.in.cpp | 83 +++++++++++++++++++ test/op_is.cpp | 9 ++ 18 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 src/ngraph/op/round.cpp create mode 100644 src/ngraph/op/round.hpp create mode 100644 src/ngraph/runtime/cpu/kernel/round.hpp create mode 100644 src/ngraph/runtime/reference/round.hpp create mode 100644 test/backend/round.in.cpp diff --git a/src/ngraph/CMakeLists.txt b/src/ngraph/CMakeLists.txt index f81aaa78830..0bbe6a42d3c 100644 --- a/src/ngraph/CMakeLists.txt +++ b/src/ngraph/CMakeLists.txt @@ -291,6 +291,8 @@ set (SRC op/reduce_mean.hpp op/reduce_sum.cpp op/reduce_sum.hpp + op/round.cpp + op/round.hpp op/quantize.cpp op/quantize.hpp op/quantized_convolution.cpp diff --git a/src/ngraph/op/op_version_tbl.hpp b/src/ngraph/op/op_version_tbl.hpp index 3d62d6c9c4b..0194bcd50f4 100644 --- a/src/ngraph/op/op_version_tbl.hpp +++ b/src/ngraph/op/op_version_tbl.hpp @@ -205,6 +205,7 @@ NGRAPH_OP(Result, ngraph::op::v0, 0) NGRAPH_OP(Reverse, ngraph::op::v0, 0) NGRAPH_OP(Reverse, ngraph::op::v1, 1) NGRAPH_OP(ReverseSequence, ngraph::op::v0, 0) +NGRAPH_OP(Round, ngraph::op::v0, 0) NGRAPH_OP(ScalarConstantLike, ngraph::op::v0, 0) NGRAPH_OP(ScaleShift, ngraph::op::v0, 0) NGRAPH_OP(ScatterAdd, ngraph::op::v0, 0) diff --git a/src/ngraph/op/round.cpp b/src/ngraph/op/round.cpp new file mode 100644 index 00000000000..6a39278b491 --- /dev/null +++ b/src/ngraph/op/round.cpp @@ -0,0 +1,34 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "ngraph/op/round.hpp" + +using namespace std; +using namespace ngraph; + +constexpr NodeTypeInfo op::Round::type_info; + +op::Round::Round(const Output& arg) + : UnaryElementwiseArithmetic(arg) +{ + constructor_validate_and_infer_types(); +} + +shared_ptr op::Round::copy_with_new_args(const NodeVector& new_args) const +{ + check_new_args_count(this, new_args); + return make_shared(new_args.at(0)); +} diff --git a/src/ngraph/op/round.hpp b/src/ngraph/op/round.hpp new file mode 100644 index 00000000000..b1c1fc917e5 --- /dev/null +++ b/src/ngraph/op/round.hpp @@ -0,0 +1,49 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include "ngraph/op/util/unary_elementwise_arithmetic.hpp" + +namespace ngraph +{ + namespace op + { + namespace v0 + { + /// \brief Elementwise round operation. + class NGRAPH_API Round : public util::UnaryElementwiseArithmetic + { + public: + static constexpr NodeTypeInfo type_info{"Round", 0}; + const NodeTypeInfo& get_type_info() const override { return type_info; } + /// \brief Constructs a round operation. + Round() = default; + + /// \brief Constructs a round operation. The output is round to the nearest integer + /// for each value. In case of halfs, the rule is to round them to the nearest even + /// integer. + /// + /// \param arg Node that produces the input tensor. + Round(const Output& arg); + + virtual std::shared_ptr + copy_with_new_args(const NodeVector& new_args) const override; + }; + } + using v0::Round; + } +} diff --git a/src/ngraph/ops.hpp b/src/ngraph/ops.hpp index f5f9dca8189..7b5b76c5849 100644 --- a/src/ngraph/ops.hpp +++ b/src/ngraph/ops.hpp @@ -161,6 +161,7 @@ #include "ngraph/op/result.hpp" #include "ngraph/op/reverse.hpp" #include "ngraph/op/reverse_sequence.hpp" +#include "ngraph/op/round.hpp" #include "ngraph/op/scatter_add.hpp" #include "ngraph/op/scatter_nd_add.hpp" #include "ngraph/op/select.hpp" diff --git a/src/ngraph/opsets/opset0_tbl.hpp b/src/ngraph/opsets/opset0_tbl.hpp index aa2de1ed469..5085235f7ad 100644 --- a/src/ngraph/opsets/opset0_tbl.hpp +++ b/src/ngraph/opsets/opset0_tbl.hpp @@ -173,6 +173,7 @@ NGRAPH_OP(Result, ngraph::op) NGRAPH_OP(Reverse, ngraph::op) NGRAPH_OP(ReverseSequence, ngraph::op) NGRAPH_OP(RNNCell, ngraph::op) +NGRAPH_OP(Round, ngraph::op) NGRAPH_OP(ScalarConstantLike, ngraph::op) NGRAPH_OP(ScaleShift, ngraph::op) NGRAPH_OP(ScatterAdd, ngraph::op) diff --git a/src/ngraph/pass/constant_folding_unary.cpp b/src/ngraph/pass/constant_folding_unary.cpp index f6e9d7d26cb..925e981c40e 100644 --- a/src/ngraph/pass/constant_folding_unary.cpp +++ b/src/ngraph/pass/constant_folding_unary.cpp @@ -23,6 +23,7 @@ #include "ngraph/op/negative.hpp" #include "ngraph/op/not.hpp" #include "ngraph/op/relu.hpp" +#include "ngraph/op/round.hpp" #include "ngraph/op/sign.hpp" #include "ngraph/op/sqrt.hpp" #include "ngraph/runtime/reference/abs.hpp" @@ -32,6 +33,7 @@ #include "ngraph/runtime/reference/negate.hpp" #include "ngraph/runtime/reference/not.hpp" #include "ngraph/runtime/reference/relu.hpp" +#include "ngraph/runtime/reference/round.hpp" #include "ngraph/runtime/reference/sign.hpp" #include "ngraph/runtime/reference/sqrt.hpp" @@ -42,7 +44,7 @@ bool is_supported_unary_op(std::shared_ptr n) { return is_type(n) || is_type(n) || is_type(n) || is_type(n) || is_type(n) || is_type(n) || - is_type(n) || is_type(n); + is_type(n) || is_type(n) || is_type(n); } template @@ -109,6 +111,11 @@ shared_ptr fold_constant_unary(shared_ptr constant, runtime::reference::relu( constant->get_data_ptr(), buffer.get_ptr(), shape_size(out_shape)); } + else if (is_type(unary)) + { + runtime::reference::round( + constant->get_data_ptr(), buffer.get_ptr(), shape_size(out_shape)); + } else if (is_type(unary)) { runtime::reference::sign( diff --git a/src/ngraph/runtime/cpu/cpu_builder.cpp b/src/ngraph/runtime/cpu/cpu_builder.cpp index c1ad65eabeb..2650cbb0f8d 100644 --- a/src/ngraph/runtime/cpu/cpu_builder.cpp +++ b/src/ngraph/runtime/cpu/cpu_builder.cpp @@ -59,6 +59,7 @@ #include "ngraph/op/power.hpp" #include "ngraph/op/relu.hpp" #include "ngraph/op/result.hpp" +#include "ngraph/op/round.hpp" #include "ngraph/op/sign.hpp" #include "ngraph/op/sin.hpp" #include "ngraph/op/sinh.hpp" @@ -100,6 +101,7 @@ #include "ngraph/runtime/cpu/kernel/or.hpp" #include "ngraph/runtime/cpu/kernel/relu.hpp" #include "ngraph/runtime/cpu/kernel/result.hpp" +#include "ngraph/runtime/cpu/kernel/round.hpp" #include "ngraph/runtime/cpu/kernel/sign.hpp" #include "ngraph/runtime/cpu/kernel/sin.hpp" #include "ngraph/runtime/cpu/kernel/sinh.hpp" @@ -366,6 +368,12 @@ namespace ngraph BUILD_UNARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::floor); } + template <> + void Builder::BUILDER_DECL(ngraph::op::Round) + { + BUILD_UNARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::round); + } + template <> void Builder::BUILDER_DECL(ngraph::op::Negative) { @@ -566,6 +574,12 @@ namespace ngraph BUILD_UNARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::floor); } + template <> + NodeExecutorTy Builder::BUILDER_CF_DECL(ngraph::op::Round) + { + BUILD_UNARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::round); + } + template <> NodeExecutorTy Builder::BUILDER_CF_DECL(ngraph::op::Ceiling) { @@ -699,6 +713,7 @@ namespace ngraph REGISTER_OP_BUILDER(Negative); REGISTER_OP_BUILDER(Exp); REGISTER_OP_BUILDER(Log); + REGISTER_OP_BUILDER(Round); REGISTER_OP_BUILDER(Sqrt); REGISTER_OP_BUILDER(Sign); REGISTER_OP_BUILDER(Sin); @@ -740,6 +755,7 @@ namespace ngraph REGISTER_CF_BUILDER(And); REGISTER_CF_BUILDER(Or); REGISTER_CF_BUILDER(Xor); + REGISTER_CF_BUILDER(Round); REGISTER_CF_BUILDER(Sign); REGISTER_CF_BUILDER(Not); REGISTER_CF_BUILDER(Power); diff --git a/src/ngraph/runtime/cpu/cpu_emitter.cpp b/src/ngraph/runtime/cpu/cpu_emitter.cpp index 1c1ce682902..20a47ba49fb 100644 --- a/src/ngraph/runtime/cpu/cpu_emitter.cpp +++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp @@ -2330,6 +2330,21 @@ namespace ngraph writer.block_end(); } + template <> + void CPU_Emitter::EMITTER_DECL(ngraph::op::Round) + { + (void)external_function; + (void)node; + writer.block_begin(); + size_t element_count = out[0].get_size(); + writer << "#pragma omp parallel for\n"; + writer << "for (size_t i = 0; i < " << element_count << "; i++)\n"; + writer.block_begin(); + writer << out[0].get_name() << "[i] = round(" << args[0].get_name() << "[i]);\n"; + writer.block_end(); + writer.block_end(); + } + template <> void CPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt) { diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp index 74428f26fa2..0d957725fc7 100644 --- a/src/ngraph/runtime/cpu/cpu_external_function.cpp +++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp @@ -454,7 +454,7 @@ static const runtime::cpu::OpMap dispatcher{ {TI(ngraph::op::Tile), &runtime::cpu::CPU_Emitter::emit}, {TI(ngraph::op::Gelu), &runtime::cpu::CPU_Emitter::emit}, {TI(ngraph::op::GeluBackprop), &runtime::cpu::CPU_Emitter::emit}, -}; + {TI(ngraph::op::Round), &runtime::cpu::CPU_Emitter::emit}}; static void generate_isnan_isinf_check(CodeWriter& writer, diff --git a/src/ngraph/runtime/cpu/kernel/round.hpp b/src/ngraph/runtime/cpu/kernel/round.hpp new file mode 100644 index 00000000000..aa7b93bd59d --- /dev/null +++ b/src/ngraph/runtime/cpu/kernel/round.hpp @@ -0,0 +1,39 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include "ngraph/runtime/reference/round.hpp" + +namespace ngraph +{ + namespace runtime + { + namespace cpu + { + namespace kernel + { + template + void round(void* arg, void* output, size_t count, int arena) + { + reference::round(static_cast(arg), + static_cast(output), + count); + } + } + } + } +} diff --git a/src/ngraph/runtime/interpreter/int_executable.hpp b/src/ngraph/runtime/interpreter/int_executable.hpp index cedca3b4b1c..d1bc86fae8f 100644 --- a/src/ngraph/runtime/interpreter/int_executable.hpp +++ b/src/ngraph/runtime/interpreter/int_executable.hpp @@ -96,6 +96,7 @@ #include "ngraph/runtime/reference/result.hpp" #include "ngraph/runtime/reference/reverse.hpp" #include "ngraph/runtime/reference/reverse_sequence.hpp" +#include "ngraph/runtime/reference/round.hpp" #include "ngraph/runtime/reference/scatter_add.hpp" #include "ngraph/runtime/reference/scatter_nd_add.hpp" #include "ngraph/runtime/reference/select.hpp" @@ -1590,6 +1591,13 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable } break; } + case OP_TYPEID::Round: + { + size_t element_count = shape_size(node.get_output_shape(0)); + reference::round( + args[0]->get_data_ptr(), out[0]->get_data_ptr(), element_count); + break; + } case OP_TYPEID::ScatterAdd: { if (node.get_input_element_type(1) == element::i64) diff --git a/src/ngraph/runtime/plaidml/unit_test.manifest b/src/ngraph/runtime/plaidml/unit_test.manifest index f58d9e7e1ef..dfc0f1bcb85 100644 --- a/src/ngraph/runtime/plaidml/unit_test.manifest +++ b/src/ngraph/runtime/plaidml/unit_test.manifest @@ -253,6 +253,10 @@ model_cum_sum_2d_axis_input model_cum_sum_2d_dynamic_axis_input model_cum_sum_3d_exclusive_reverse +# unsupported op: `Round` +round +round_2D + # onnx tests model_quant_conv_linear_2d model_quant_conv_linear_3d diff --git a/src/ngraph/runtime/reference/round.hpp b/src/ngraph/runtime/reference/round.hpp new file mode 100644 index 00000000000..50949ab5e3f --- /dev/null +++ b/src/ngraph/runtime/reference/round.hpp @@ -0,0 +1,52 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include + +namespace ngraph +{ + namespace runtime + { + namespace reference + { + template + T round_to_nearest_even(const T arg) + { + const auto floor_arg = std::floor(arg); + const auto diff = arg - floor_arg; + if (diff < 0.5f || (diff == 0.5f && static_cast(floor_arg) % 2 == 0)) + { + return floor_arg; + } + else + { + return floor_arg + 1.0f; + } + } + + template + void round(const T* arg, T* out, size_t count) + { + for (size_t i = 0; i < count; ++i) + { + out[i] = round_to_nearest_even(arg[i]); + } + } + } + } +} diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp index 30c3e49b820..11ea86f4eda 100644 --- a/src/ngraph/serializer.cpp +++ b/src/ngraph/serializer.cpp @@ -2654,7 +2654,11 @@ shared_ptr JSONDeserializer::deserialize_node(json node_js) } case OP_TYPEID::ReorgYolo: { break; } - + case OP_TYPEID::Round: + { + node = make_shared(args[0]); + break; + } case OP_TYPEID::ScalarConstantLike: { double value = node_js.at("value").get(); @@ -3553,6 +3557,8 @@ json JSONSerializer::serialize_node(const Node& n) } case OP_TYPEID::ReorgYolo: { break; } + case OP_TYPEID::Round: { break; + } case OP_TYPEID::DeformableConvolution_v1: { const auto tmp = static_cast(&n); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9f4fd127a07..2c19b25e704 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -350,6 +350,7 @@ set(MULTI_TEST_SRC backend/reshape.in.cpp backend/reverse_sequence.in.cpp backend/reverse.in.cpp + backend/round.in.cpp backend/scatter.in.cpp backend/select.in.cpp backend/shape_of.in.cpp diff --git a/test/backend/round.in.cpp b/test/backend/round.in.cpp new file mode 100644 index 00000000000..e18a26d0c32 --- /dev/null +++ b/test/backend/round.in.cpp @@ -0,0 +1,83 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/ndarray.hpp" +#include "util/test_control.hpp" +#include "util/test_tools.hpp" + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; + +NGRAPH_TEST(${BACKEND_NAME}, round) +{ + Shape shape{5}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A), ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{0.9f, 2.5f, 2.3f, 1.5f, -4.5f}); + auto result = backend->create_tensor(element::f32, shape); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_TRUE(test::all_close_f((vector{1.0f, 2.0f, 2.0f, 2.0f, -4.0f}), + read_vector(result), + MIN_FLOAT_TOLERANCE_BITS)); +} + +NGRAPH_TEST(${BACKEND_NAME}, round_2D) +{ + Shape shape{3, 5}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A), ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + vector{0.1f, + 0.5f, + 0.9f, + 1.2f, + 1.5f, + 1.8f, + 2.3f, + 2.5f, + 2.7f, + -1.1f, + -1.5f, + -1.9f, + -2.2f, + -2.5f, + -2.8f}); + auto result = backend->create_tensor(element::f32, shape); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_TRUE(test::all_close_f( + (vector{ + 0.f, 0.f, 1.f, 1.f, 2.f, 2.f, 2.f, 2.f, 3.f, -1.f, -2.f, -2.f, -2.f, -2.f, -3.f}), + read_vector(result), + MIN_FLOAT_TOLERANCE_BITS)); +} diff --git a/test/op_is.cpp b/test/op_is.cpp index 5753b4ce38e..e14b0271e0f 100644 --- a/test/op_is.cpp +++ b/test/op_is.cpp @@ -1131,6 +1131,15 @@ namespace EXPECT_FALSE(node.is_binary_elementwise_logical()); } + void op_is_Round() + { + op::Round node; + EXPECT_TRUE(node.is_unary_elementwise_arithmetic()); + EXPECT_FALSE(node.is_binary_elementwise_arithmetic()); + EXPECT_FALSE(node.is_binary_elementwise_comparison()); + EXPECT_FALSE(node.is_binary_elementwise_logical()); + } + void op_is_ScalarConstantLike() { op::ScalarConstantLike node; From 7b3b1b6cebfb4ce1fcce51828ccccfd949e6d623 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Sat, 11 Jan 2020 04:19:37 +0100 Subject: [PATCH 02/12] Change LogSoftmax to produce v1 and remove FusedOp (#4139) * Removed LogSoftmax FusedOp, changed onnx to produce v1 * Code review remakrs introduced * fix after merge from master Co-authored-by: Scott Cyphers --- src/ngraph/CMakeLists.txt | 2 - .../frontend/onnx_import/op/log_softmax.cpp | 16 ++++-- src/ngraph/op/fused/log_softmax.cpp | 54 ------------------- src/ngraph/op/fused/log_softmax.hpp | 54 ------------------- src/ngraph/op/op_version_tbl.hpp | 1 - src/ngraph/ops.hpp | 1 - src/ngraph/opsets/opset0_tbl.hpp | 1 - .../runtime/interpreter/int_executable.hpp | 1 - src/ngraph/serializer.cpp | 12 ----- test/CMakeLists.txt | 1 - test/op_is.cpp | 9 ---- test/type_prop/log_softmax.cpp | 42 --------------- 12 files changed, 11 insertions(+), 183 deletions(-) delete mode 100644 src/ngraph/op/fused/log_softmax.cpp delete mode 100644 src/ngraph/op/fused/log_softmax.hpp delete mode 100644 test/type_prop/log_softmax.cpp diff --git a/src/ngraph/CMakeLists.txt b/src/ngraph/CMakeLists.txt index 0bbe6a42d3c..d402e58ddde 100644 --- a/src/ngraph/CMakeLists.txt +++ b/src/ngraph/CMakeLists.txt @@ -385,8 +385,6 @@ set (SRC op/fused/gru_cell.hpp op/fused/layer_norm.cpp op/fused/layer_norm.hpp - op/fused/log_softmax.cpp - op/fused/log_softmax.hpp op/fused/lstm_cell.cpp op/fused/lstm_cell.hpp op/fused/lstm_sequence.cpp diff --git a/src/ngraph/frontend/onnx_import/op/log_softmax.cpp b/src/ngraph/frontend/onnx_import/op/log_softmax.cpp index 5800c9351cb..457c655b47a 100644 --- a/src/ngraph/frontend/onnx_import/op/log_softmax.cpp +++ b/src/ngraph/frontend/onnx_import/op/log_softmax.cpp @@ -16,8 +16,9 @@ #include +#include "default_opset.hpp" #include "log_softmax.hpp" -#include "ngraph/opsets/opset0.hpp" +#include "ngraph/validation_util.hpp" namespace ngraph { @@ -30,11 +31,16 @@ namespace ngraph NodeVector log_softmax(const Node& node) { NodeVector inputs{node.get_ng_inputs()}; - auto data = inputs.at(0); - auto data_shape = data->get_shape(); - int axis = node.get_attribute_value("axis", 1); + const auto data = inputs.at(0); + const auto data_shape = data->get_shape(); - return {std::make_shared(data, axis)}; + const auto axis = node.get_attribute_value("axis", 1); + const auto normalized_axis = + ngraph::normalize_axis(node.get_description(), axis, data_shape.size()); + + const auto softmax = + std::make_shared(data, normalized_axis); + return {std::make_shared(softmax)}; } } // namespace set_1 diff --git a/src/ngraph/op/fused/log_softmax.cpp b/src/ngraph/op/fused/log_softmax.cpp deleted file mode 100644 index 8625626cb12..00000000000 --- a/src/ngraph/op/fused/log_softmax.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** -#include - -#include "ngraph/op/fused/log_softmax.hpp" -#include "ngraph/op/log.hpp" -#include "ngraph/op/softmax.hpp" -#include "ngraph/validation_util.hpp" - -using namespace std; -using namespace ngraph; - -constexpr NodeTypeInfo op::LogSoftmax::type_info; - -op::LogSoftmax::LogSoftmax(const Output& data, int64_t axis) - : FusedOp({data}) - , m_axis(axis) -{ - constructor_validate_and_infer_types(); -} - -NodeVector op::LogSoftmax::decompose_op() const -{ - const auto data = input_value(0); - const auto data_shape = data.get_shape(); - - auto axis = ngraph::normalize_axis(this, m_axis, data_shape.size()); - - std::vector axes(data_shape.size() - axis); - std::iota(std::begin(axes), std::end(axes), axis); - - auto softmax = std::make_shared(data, axes); - - return {std::make_shared(softmax)}; -} - -shared_ptr op::LogSoftmax::copy_with_new_args(const NodeVector& new_args) const -{ - check_new_args_count(this, new_args); - return make_shared(new_args.at(0), m_axis); -} diff --git a/src/ngraph/op/fused/log_softmax.hpp b/src/ngraph/op/fused/log_softmax.hpp deleted file mode 100644 index 584d6a1ec54..00000000000 --- a/src/ngraph/op/fused/log_softmax.hpp +++ /dev/null @@ -1,54 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -#pragma once - -#include "ngraph/node.hpp" -#include "ngraph/op/op.hpp" -#include "ngraph/op/util/fused_op.hpp" - -namespace ngraph -{ - namespace op - { - namespace v0 - { - /// \brief LogSoftmax operation - class NGRAPH_API LogSoftmax : public ngraph::op::util::FusedOp - { - public: - static constexpr NodeTypeInfo type_info{"LogSoftmax", 0}; - LogSoftmax() = default; - const NodeTypeInfo& get_type_info() const override { return type_info; } - /// \brief Constructs a LogSoftmax node. - /// - /// \param data Node that produces the first input tensor - /// \param axis Describes the axis of the inputs when coerced to 2D - LogSoftmax(const Output& data, int64_t axis); - - virtual NodeVector decompose_op() const override; - - virtual std::shared_ptr - copy_with_new_args(const NodeVector& new_args) const override; - - int64_t get_axis() const { return m_axis; } - protected: - int64_t m_axis; - }; - } - using v0::LogSoftmax; - } // namespace op -} // namespace ngraph diff --git a/src/ngraph/op/op_version_tbl.hpp b/src/ngraph/op/op_version_tbl.hpp index 0194bcd50f4..f2b0ab05329 100644 --- a/src/ngraph/op/op_version_tbl.hpp +++ b/src/ngraph/op/op_version_tbl.hpp @@ -132,7 +132,6 @@ NGRAPH_OP(Less, ngraph::op::v1, 1) NGRAPH_OP(LessEq, ngraph::op::v0, 0) NGRAPH_OP(LessEqual, ngraph::op::v1, 1) NGRAPH_OP(Log, ngraph::op, 0) -NGRAPH_OP(LogSoftmax, ngraph::op::v0, 0) NGRAPH_OP(LogicalAnd, ngraph::op::v1, 1) NGRAPH_OP(LogicalNot, ngraph::op::v1, 1) NGRAPH_OP(LogicalOr, ngraph::op::v1, 1) diff --git a/src/ngraph/ops.hpp b/src/ngraph/ops.hpp index 7b5b76c5849..1727c9aedcd 100644 --- a/src/ngraph/ops.hpp +++ b/src/ngraph/ops.hpp @@ -98,7 +98,6 @@ #include "ngraph/op/fused/gru_cell.hpp" #include "ngraph/op/fused/hard_sigmoid.hpp" #include "ngraph/op/fused/layer_norm.hpp" -#include "ngraph/op/fused/log_softmax.hpp" #include "ngraph/op/fused/lstm_cell.hpp" #include "ngraph/op/fused/lstm_sequence.hpp" #include "ngraph/op/fused/matmul.hpp" diff --git a/src/ngraph/opsets/opset0_tbl.hpp b/src/ngraph/opsets/opset0_tbl.hpp index 5085235f7ad..b66861f7464 100644 --- a/src/ngraph/opsets/opset0_tbl.hpp +++ b/src/ngraph/opsets/opset0_tbl.hpp @@ -127,7 +127,6 @@ NGRAPH_OP(LayerNormBackprop, ngraph::op) NGRAPH_OP(Less, ngraph::op) NGRAPH_OP(LessEq, ngraph::op) NGRAPH_OP(Log, ngraph::op) -NGRAPH_OP(LogSoftmax, ngraph::op) NGRAPH_OP(LRN, ngraph::op) NGRAPH_OP(LSTMCell, ngraph::op) NGRAPH_OP(LSTMSequence, ngraph::op) diff --git a/src/ngraph/runtime/interpreter/int_executable.hpp b/src/ngraph/runtime/interpreter/int_executable.hpp index d1bc86fae8f..6cf3fb01e91 100644 --- a/src/ngraph/runtime/interpreter/int_executable.hpp +++ b/src/ngraph/runtime/interpreter/int_executable.hpp @@ -1865,7 +1865,6 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable case OP_TYPEID::Interpolate: case OP_TYPEID::LayerNorm: case OP_TYPEID::LayerNormBackprop: - case OP_TYPEID::LogSoftmax: case OP_TYPEID::LSTMCell: case OP_TYPEID::LSTMSequence: case OP_TYPEID::MVN: diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp index 11ea86f4eda..b0709425cd2 100644 --- a/src/ngraph/serializer.cpp +++ b/src/ngraph/serializer.cpp @@ -1939,12 +1939,6 @@ shared_ptr JSONDeserializer::deserialize_node(json node_js) args[0], args[1], read_auto_broadcast(node_js, "auto_broadcast")); break; } - case OP_TYPEID::LogSoftmax: - { - auto axis = node_js.at("axis").get(); - node = make_shared(args[0], axis); - break; - } case OP_TYPEID::LRN: { auto alpha = node_js.at("alpha").get(); @@ -3960,12 +3954,6 @@ json JSONSerializer::serialize_node(const Node& n) } break; } - case OP_TYPEID::LogSoftmax: - { - auto tmp = static_cast(&n); - node["axis"] = tmp->get_axis(); - break; - } case OP_TYPEID::LRN: { auto tmp = static_cast(&n); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2c19b25e704..2921766d5ac 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -148,7 +148,6 @@ set(SRC type_prop/hard_sigmoid.cpp type_prop/index_reduction.cpp type_prop/layer_norm.cpp - type_prop/log_softmax.cpp type_prop/lrn.cpp type_prop/lstm_cell.cpp type_prop/lstm_sequence.cpp diff --git a/test/op_is.cpp b/test/op_is.cpp index e14b0271e0f..d9c0bb6d75f 100644 --- a/test/op_is.cpp +++ b/test/op_is.cpp @@ -717,15 +717,6 @@ namespace EXPECT_FALSE(node.is_binary_elementwise_logical()); } - void op_is_LogSoftmax() - { - op::LogSoftmax node; - EXPECT_FALSE(node.is_unary_elementwise_arithmetic()); - EXPECT_FALSE(node.is_binary_elementwise_arithmetic()); - EXPECT_FALSE(node.is_binary_elementwise_comparison()); - EXPECT_FALSE(node.is_binary_elementwise_logical()); - } - void op_is_LRN() { op::LRN node; diff --git a/test/type_prop/log_softmax.cpp b/test/type_prop/log_softmax.cpp deleted file mode 100644 index a0dd0724bf3..00000000000 --- a/test/type_prop/log_softmax.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -#include "gtest/gtest.h" -#include "ngraph/ngraph.hpp" -#include "util/type_prop.hpp" - -using namespace std; -using namespace ngraph; - -TEST(type_prop, log_softmax) -{ - const auto data = make_shared(element::f64, Shape{2, 2}); - const auto axis = 2; - try - { - const auto log_softmax = make_shared(data, axis); - // Should have thrown, so fail if it didn't - FAIL() << "Invalid axis value not detected"; - } - catch (const ngraph_error& error) - { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter axis ")); - } - catch (...) - { - FAIL() << "Log softmax failed for unexpected reason"; - } -} From ef553de3cd682065fe95835b221a8d3e16724aef Mon Sep 17 00:00:00 2001 From: Robert Kimball Date: Sun, 12 Jan 2020 06:31:01 -0800 Subject: [PATCH 03/12] Move GCPU to runtime/gcpu so that unit test manifest works (#4150) * Move GCPU to runtime/gcpu so that unit test manifest works * style * Add tests to manifest * Fix CPU only check * Update GCPU dependency Co-authored-by: Scott Cyphers --- CMakeLists.txt | 7 +- src/ngraph/runtime/CMakeLists.txt | 2 +- .../{generic_cpu => gcpu}/CMakeLists.txt | 6 +- .../{generic_cpu => gcpu}/gcpu_backend.cpp | 6 +- .../{generic_cpu => gcpu}/gcpu_backend.hpp | 0 .../gcpu_backend_visibility.hpp | 0 .../{generic_cpu => gcpu}/gcpu_executable.cpp | 2 +- src/ngraph/runtime/gcpu/gcpu_executable.hpp | 114 ++++++++++ .../{generic_cpu => gcpu}/kernel/dot.hpp | 0 .../{generic_cpu => gcpu}/unit_test.manifest | 3 + .../runtime/generic_cpu/gcpu_executable.hpp | 198 ------------------ 11 files changed, 125 insertions(+), 213 deletions(-) rename src/ngraph/runtime/{generic_cpu => gcpu}/CMakeLists.txt (87%) rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_backend.cpp (93%) rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_backend.hpp (100%) rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_backend_visibility.hpp (100%) rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_executable.cpp (99%) create mode 100644 src/ngraph/runtime/gcpu/gcpu_executable.hpp rename src/ngraph/runtime/{generic_cpu => gcpu}/kernel/dot.hpp (100%) rename src/ngraph/runtime/{generic_cpu => gcpu}/unit_test.manifest (63%) delete mode 100644 src/ngraph/runtime/generic_cpu/gcpu_executable.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fda32e33b81..d3e2a30598a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,7 +169,7 @@ option(NGRAPH_USE_LEGACY_MKLDNN "Use legacy MKLDNN" FALSE) option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE) option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE) option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE) -option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" FALSE) +option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" TRUE) option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE) option(NGRAPH_DEPRECATED_ENABLE "Enable compiler deprecation pragmas for deprecated APIs (recommended only for development use)" FALSE) option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" FALSE) @@ -199,10 +199,7 @@ if (NGRAPH_STATIC_LIB_ENABLE) set(NGRAPH_EXPORT_TARGETS_ENABLE OFF) endif() -if (NGRAPH_CPU_ENABLE - AND - (NOT NGRAPH_GENERIC_CPU_ENABLE) - ) +if (NGRAPH_CPU_ENABLE) set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON) endif() diff --git a/src/ngraph/runtime/CMakeLists.txt b/src/ngraph/runtime/CMakeLists.txt index 7d22a723568..f11b51922cb 100755 --- a/src/ngraph/runtime/CMakeLists.txt +++ b/src/ngraph/runtime/CMakeLists.txt @@ -28,7 +28,7 @@ if (NGRAPH_NOP_ENABLE) endif() if (NGRAPH_GENERIC_CPU_ENABLE) - add_subdirectory(generic_cpu) + add_subdirectory(gcpu) endif() if (NGRAPH_PLAIDML_ENABLE) diff --git a/src/ngraph/runtime/generic_cpu/CMakeLists.txt b/src/ngraph/runtime/gcpu/CMakeLists.txt similarity index 87% rename from src/ngraph/runtime/generic_cpu/CMakeLists.txt rename to src/ngraph/runtime/gcpu/CMakeLists.txt index be7c5b52944..91ba90ef47e 100644 --- a/src/ngraph/runtime/generic_cpu/CMakeLists.txt +++ b/src/ngraph/runtime/gcpu/CMakeLists.txt @@ -15,17 +15,13 @@ # ****************************************************************************** if (NGRAPH_GENERIC_CPU_ENABLE) - # find_package(OpenMP) - # if (OPENMP_FOUND) - # add_compile_options(${OpenMP_CXX_FLAGS}) - # endif() add_library(gcpu_backend SHARED gcpu_backend.cpp gcpu_executable.cpp) if(NGRAPH_LIB_VERSIONING_ENABLE) set_target_properties(gcpu_backend PROPERTIES VERSION ${NGRAPH_VERSION} SOVERSION ${NGRAPH_API_VERSION}) endif() - target_link_libraries(gcpu_backend PRIVATE ngraph libeigen) + target_link_libraries(gcpu_backend PRIVATE ngraph interpreter_backend) target_compile_definitions(gcpu_backend PRIVATE GCPU_BACKEND_DLL_EXPORTS) install(TARGETS gcpu_backend diff --git a/src/ngraph/runtime/generic_cpu/gcpu_backend.cpp b/src/ngraph/runtime/gcpu/gcpu_backend.cpp similarity index 93% rename from src/ngraph/runtime/generic_cpu/gcpu_backend.cpp rename to src/ngraph/runtime/gcpu/gcpu_backend.cpp index 3b7ddbc8a54..748242b0d4e 100644 --- a/src/ngraph/runtime/generic_cpu/gcpu_backend.cpp +++ b/src/ngraph/runtime/gcpu/gcpu_backend.cpp @@ -14,12 +14,12 @@ // limitations under the License. //***************************************************************************** -#include "ngraph/runtime/generic_cpu/gcpu_backend_visibility.hpp" +#include "ngraph/runtime/gcpu/gcpu_backend_visibility.hpp" #include "ngraph/except.hpp" #include "ngraph/runtime/backend_manager.hpp" -#include "ngraph/runtime/generic_cpu/gcpu_backend.hpp" -#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp" +#include "ngraph/runtime/gcpu/gcpu_backend.hpp" +#include "ngraph/runtime/gcpu/gcpu_executable.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/util.hpp" diff --git a/src/ngraph/runtime/generic_cpu/gcpu_backend.hpp b/src/ngraph/runtime/gcpu/gcpu_backend.hpp similarity index 100% rename from src/ngraph/runtime/generic_cpu/gcpu_backend.hpp rename to src/ngraph/runtime/gcpu/gcpu_backend.hpp diff --git a/src/ngraph/runtime/generic_cpu/gcpu_backend_visibility.hpp b/src/ngraph/runtime/gcpu/gcpu_backend_visibility.hpp similarity index 100% rename from src/ngraph/runtime/generic_cpu/gcpu_backend_visibility.hpp rename to src/ngraph/runtime/gcpu/gcpu_backend_visibility.hpp diff --git a/src/ngraph/runtime/generic_cpu/gcpu_executable.cpp b/src/ngraph/runtime/gcpu/gcpu_executable.cpp similarity index 99% rename from src/ngraph/runtime/generic_cpu/gcpu_executable.cpp rename to src/ngraph/runtime/gcpu/gcpu_executable.cpp index d718813d495..191a46b795c 100644 --- a/src/ngraph/runtime/generic_cpu/gcpu_executable.cpp +++ b/src/ngraph/runtime/gcpu/gcpu_executable.cpp @@ -14,7 +14,7 @@ // limitations under the License. //***************************************************************************** -#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp" +#include "ngraph/runtime/gcpu/gcpu_executable.hpp" #include "ngraph/cpio.hpp" #include "ngraph/descriptor/layout/dense_tensor_layout.hpp" #include "ngraph/except.hpp" diff --git a/src/ngraph/runtime/gcpu/gcpu_executable.hpp b/src/ngraph/runtime/gcpu/gcpu_executable.hpp new file mode 100644 index 00000000000..5debb75c429 --- /dev/null +++ b/src/ngraph/runtime/gcpu/gcpu_executable.hpp @@ -0,0 +1,114 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "ngraph/ops.hpp" +#include "ngraph/runtime/aligned_buffer.hpp" +#include "ngraph/runtime/backend.hpp" +#include "ngraph/runtime/host_tensor.hpp" +#include "ngraph/runtime/interpreter/int_executable.hpp" +#include "ngraph/runtime/opt_kernel/broadcast.hpp" +#include "ngraph/runtime/opt_kernel/reshape.hpp" +#include "ngraph/runtime/tensor.hpp" + +namespace ngraph +{ + namespace runtime + { + namespace gcpu + { + class GCPUBackend; + class GCPUExecutable; + + namespace + { + // This expands the op list in op_tbl.hpp into a list of enumerations that look like + // this: + // Abs, + // Acos, + // ... + enum class OP_TYPEID + { +#define NGRAPH_OP(NAME, NAMESPACE) NAME, +#include "ngraph/opsets/opset0_tbl.hpp" +#undef NGRAPH_OP + UnknownOp + }; + } + } + } +} + +class ngraph::runtime::gcpu::GCPUExecutable : public runtime::interpreter::INTExecutable +{ + friend class GCPUBackend; + +public: + GCPUExecutable(const std::shared_ptr& function, + bool enable_performance_collection = false); + + bool call(const std::vector>& outputs, + const std::vector>& intputs) override; + +private: + int get_alignment() const { return 64; } + void generate_calls(const element::Type& type, + const Node& op, + const std::vector>& outputs, + const std::vector>& inputs) override; + + template + void gop_engine(const Node& node, + const std::vector>& out, + const std::vector>& args) + { + switch (INTExecutable::get_typeid(node)) + { + case ngraph::runtime::interpreter::OP_TYPEID::Broadcast: + { + const op::Broadcast* broadcast = static_cast(&node); + Shape in_shape = node.get_input_shape(0); + Shape out_shape = node.get_output_shape(0); + AxisSet broadcast_axes = broadcast->get_broadcast_axes(); + reference::broadcast(args[0]->get_data_ptr(), + out[0]->get_data_ptr(), + in_shape, + out_shape, + broadcast_axes); + break; + } + case ngraph::runtime::interpreter::OP_TYPEID::Reshape: + { + const op::Reshape* reshape = static_cast(&node); + reference::reshape(args[0]->get_data_ptr(), + out[0]->get_data_ptr(), + node.get_input_shape(0), + reshape->get_input_order(), + node.get_output_shape(0)); + break; + } + default: op_engine(node, out, args); break; + } + } +}; diff --git a/src/ngraph/runtime/generic_cpu/kernel/dot.hpp b/src/ngraph/runtime/gcpu/kernel/dot.hpp similarity index 100% rename from src/ngraph/runtime/generic_cpu/kernel/dot.hpp rename to src/ngraph/runtime/gcpu/kernel/dot.hpp diff --git a/src/ngraph/runtime/generic_cpu/unit_test.manifest b/src/ngraph/runtime/gcpu/unit_test.manifest similarity index 63% rename from src/ngraph/runtime/generic_cpu/unit_test.manifest rename to src/ngraph/runtime/gcpu/unit_test.manifest index a54f9764ece..1a5623c7834 100644 --- a/src/ngraph/runtime/generic_cpu/unit_test.manifest +++ b/src/ngraph/runtime/gcpu/unit_test.manifest @@ -3,3 +3,6 @@ tile_3d_few_repeats fake_quantize_pdpd convert_float32_bf16 convert_bf16_float32 + +onnx_GCPU.model_quant_conv_linear +onnx_GCPU.top_k_opset_10 diff --git a/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp b/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp deleted file mode 100644 index 6b8085aa189..00000000000 --- a/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp +++ /dev/null @@ -1,198 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "ngraph/ops.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" -#include "ngraph/runtime/backend.hpp" -#include "ngraph/runtime/generic_cpu/kernel/broadcast.hpp" -#include "ngraph/runtime/generic_cpu/kernel/dot.hpp" -#include "ngraph/runtime/generic_cpu/kernel/reshape.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/runtime/interpreter/int_executable.hpp" -#include "ngraph/runtime/reference/abs.hpp" -#include "ngraph/runtime/reference/acos.hpp" -#include "ngraph/runtime/reference/add.hpp" -#include "ngraph/runtime/reference/all.hpp" -#include "ngraph/runtime/reference/allreduce.hpp" -#include "ngraph/runtime/reference/and.hpp" -#include "ngraph/runtime/reference/any.hpp" -#include "ngraph/runtime/reference/argmax.hpp" -#include "ngraph/runtime/reference/argmin.hpp" -#include "ngraph/runtime/reference/asin.hpp" -#include "ngraph/runtime/reference/atan.hpp" -#include "ngraph/runtime/reference/atan2.hpp" -#include "ngraph/runtime/reference/avg_pool.hpp" -#include "ngraph/runtime/reference/batch_mat_mul.hpp" -#include "ngraph/runtime/reference/batch_norm.hpp" -#include "ngraph/runtime/reference/broadcast.hpp" -#include "ngraph/runtime/reference/broadcast_distributed.hpp" -#include "ngraph/runtime/reference/ceiling.hpp" -#include "ngraph/runtime/reference/concat.hpp" -#include "ngraph/runtime/reference/constant.hpp" -#include "ngraph/runtime/reference/convert.hpp" -#include "ngraph/runtime/reference/convolution.hpp" -#include "ngraph/runtime/reference/copy.hpp" -#include "ngraph/runtime/reference/cos.hpp" -#include "ngraph/runtime/reference/cosh.hpp" -#include "ngraph/runtime/reference/dequantize.hpp" -#include "ngraph/runtime/reference/divide.hpp" -#include "ngraph/runtime/reference/dot.hpp" -#include "ngraph/runtime/reference/embedding_lookup.hpp" -#include "ngraph/runtime/reference/equal.hpp" -#include "ngraph/runtime/reference/erf.hpp" -#include "ngraph/runtime/reference/exp.hpp" -#include "ngraph/runtime/reference/floor.hpp" -#include "ngraph/runtime/reference/gather.hpp" -#include "ngraph/runtime/reference/gather_nd.hpp" -#include "ngraph/runtime/reference/generate_mask.hpp" -#include "ngraph/runtime/reference/greater.hpp" -#include "ngraph/runtime/reference/greater_eq.hpp" -#include "ngraph/runtime/reference/less.hpp" -#include "ngraph/runtime/reference/less_eq.hpp" -#include "ngraph/runtime/reference/log.hpp" -#include "ngraph/runtime/reference/lrn.hpp" -#include "ngraph/runtime/reference/max.hpp" -#include "ngraph/runtime/reference/max_pool.hpp" -#include "ngraph/runtime/reference/maximum.hpp" -#include "ngraph/runtime/reference/min.hpp" -#include "ngraph/runtime/reference/minimum.hpp" -#include "ngraph/runtime/reference/multiply.hpp" -#include "ngraph/runtime/reference/negate.hpp" -#include "ngraph/runtime/reference/not.hpp" -#include "ngraph/runtime/reference/not_equal.hpp" -#include "ngraph/runtime/reference/one_hot.hpp" -#include "ngraph/runtime/reference/or.hpp" -#include "ngraph/runtime/reference/pad.hpp" -#include "ngraph/runtime/reference/power.hpp" -#include "ngraph/runtime/reference/product.hpp" -#include "ngraph/runtime/reference/quantize.hpp" -#include "ngraph/runtime/reference/recv.hpp" -#include "ngraph/runtime/reference/relu.hpp" -#include "ngraph/runtime/reference/replace_slice.hpp" -#include "ngraph/runtime/reference/reshape.hpp" -#include "ngraph/runtime/reference/result.hpp" -#include "ngraph/runtime/reference/reverse.hpp" -#include "ngraph/runtime/reference/reverse_sequence.hpp" -#include "ngraph/runtime/reference/scatter_add.hpp" -#include "ngraph/runtime/reference/scatter_nd_add.hpp" -#include "ngraph/runtime/reference/select.hpp" -#include "ngraph/runtime/reference/send.hpp" -#include "ngraph/runtime/reference/shape_of.hpp" -#include "ngraph/runtime/reference/sigmoid.hpp" -#include "ngraph/runtime/reference/sign.hpp" -#include "ngraph/runtime/reference/sin.hpp" -#include "ngraph/runtime/reference/sinh.hpp" -#include "ngraph/runtime/reference/slice.hpp" -#include "ngraph/runtime/reference/softmax.hpp" -#include "ngraph/runtime/reference/sqrt.hpp" -#include "ngraph/runtime/reference/subtract.hpp" -#include "ngraph/runtime/reference/sum.hpp" -#include "ngraph/runtime/reference/tan.hpp" -#include "ngraph/runtime/reference/tanh.hpp" -#include "ngraph/runtime/reference/topk.hpp" -#include "ngraph/runtime/reference/xor.hpp" -#include "ngraph/runtime/tensor.hpp" -#include "ngraph/state/bernoulli_rng_state.hpp" - -namespace ngraph -{ - namespace runtime - { - namespace gcpu - { - class GCPUBackend; - class GCPUExecutable; - - namespace - { - // This expands the op list in op_tbl.hpp into a list of enumerations that look like - // this: - // Abs, - // Acos, - // ... - enum class OP_TYPEID - { -#define NGRAPH_OP(NAME, NAMESPACE) NAME, -#include "ngraph/opsets/opset0_tbl.hpp" -#undef NGRAPH_OP - UnknownOp - }; - } - } - } -} - -class ngraph::runtime::gcpu::GCPUExecutable : public runtime::interpreter::INTExecutable -{ - friend class GCPUBackend; - -public: - GCPUExecutable(const std::shared_ptr& function, - bool enable_performance_collection = false); - - bool call(const std::vector>& outputs, - const std::vector>& intputs) override; - -private: - int get_alignment() const { return 64; } - void generate_calls(const element::Type& type, - const Node& op, - const std::vector>& outputs, - const std::vector>& inputs) override; - - template - void gop_engine(const Node& node, - const std::vector>& out, - const std::vector>& args) - { - switch (INTExecutable::get_typeid(node)) - { - case ngraph::runtime::interpreter::OP_TYPEID::Broadcast: - { - const op::Broadcast* broadcast = static_cast(&node); - Shape in_shape = node.get_input_shape(0); - Shape out_shape = node.get_output_shape(0); - AxisSet broadcast_axes = broadcast->get_broadcast_axes(); - reference::broadcast(args[0]->get_data_ptr(), - out[0]->get_data_ptr(), - in_shape, - out_shape, - broadcast_axes); - break; - } - case ngraph::runtime::interpreter::OP_TYPEID::Reshape: - { - const op::Reshape* reshape = static_cast(&node); - reference::reshape(args[0]->get_data_ptr(), - out[0]->get_data_ptr(), - node.get_input_shape(0), - reshape->get_input_order(), - node.get_output_shape(0)); - break; - } - default: op_engine(node, out, args); break; - } - } -}; From f6fe6aca125f8295262d3eb1625e77e5f09cd542 Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Mon, 13 Jan 2020 09:35:34 -0800 Subject: [PATCH 04/12] =?UTF-8?q?Restore=20constant=20folding=20for=20DynR?= =?UTF-8?q?eshape=20until=20users=20are=20converted=20to=20=E2=80=A6=20(#4?= =?UTF-8?q?164)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Restore constant folding for DynReshape until users are converted to v1 Reshape * Disbale test when no serialization Co-authored-by: baojun <32073718+baojun-nervana@users.noreply.github.com> --- .../pass/constant_folding_dyn_reshape.cpp | 165 +- test/dyn_elimination.cpp | 27 +- test/models/paddlepaddle/transpose.json | 1924 +++++++++++++++++ 3 files changed, 2043 insertions(+), 73 deletions(-) create mode 100644 test/models/paddlepaddle/transpose.json diff --git a/src/ngraph/pass/constant_folding_dyn_reshape.cpp b/src/ngraph/pass/constant_folding_dyn_reshape.cpp index 8a02a4b145f..415d95247e8 100644 --- a/src/ngraph/pass/constant_folding_dyn_reshape.cpp +++ b/src/ngraph/pass/constant_folding_dyn_reshape.cpp @@ -17,6 +17,7 @@ #include #include "constant_folding.hpp" +#include "ngraph/op/experimental/dyn_reshape.hpp" #include "ngraph/op/reshape.hpp" #include "ngraph/runtime/reference/reshape.hpp" #include "ngraph/type/element_type.hpp" @@ -24,9 +25,9 @@ using namespace std; using namespace ngraph; -template +template shared_ptr fold_constant_dyn_reshape(shared_ptr constant_data, - shared_ptr dyn_reshape) + R dyn_reshape) { const Shape& out_shape = dyn_reshape->get_shape(); runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(T)); @@ -44,18 +45,102 @@ shared_ptr fold_constant_dyn_reshape(shared_ptr cons return make_shared(dyn_reshape->get_element_type(), out_shape, data_ptr); } +template +std::shared_ptr do_fold(R dyn_reshape_match, shared_ptr constant_data_match) +{ + std::shared_ptr replacement; + auto type = dyn_reshape_match->get_element_type(); + switch (type) + { + case element::Type_t::undefined: + NGRAPH_CHECK(false, + "Encountered 'undefined' element type in constant_dyn_reshape_callback"); + break; + case element::Type_t::dynamic: + NGRAPH_CHECK(false, "Encountered 'dynamic' element type in constant_dyn_reshape_callback"); + break; + case element::Type_t::u1: + NGRAPH_CHECK(false, "Encountered 'u1' element type in constant_dyn_reshape_callback"); + break; + case element::Type_t::boolean: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::bf16: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::f16: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::f32: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::f64: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::i8: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::i16: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::i32: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::i64: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::u8: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::u16: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::u32: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + case element::Type_t::u64: + replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); + break; + } + return replacement; +} + void pass::ConstantFolding::construct_constant_dyn_reshape() { auto constant_data_label = make_shared( element::f32, Shape{2, 4}, pattern::has_class()); auto constant_shape_label = make_shared(element::i64, Shape{1}, pattern::has_class()); - auto dyn_reshape = + auto reshape_v1 = make_shared(constant_data_label, constant_shape_label, false); + auto dyn_reshape = + make_shared(constant_data_label, constant_shape_label, false); // Note: No need to capture or consider constant_shape_label, because // shape propagation will have transferred the info to dyn_reshape's // output. + auto constant_reshape_v1_callback = [constant_data_label](pattern::Matcher& m) { + NGRAPH_DEBUG << "In callback for constant_reshape_v1_callback against node = " + << m.get_match_root()->get_name(); + + auto pattern_map = m.get_pattern_map(); + + auto constant_data_match = + static_pointer_cast(pattern_map[constant_data_label]); + auto match_root = m.get_match_root(); + NGRAPH_CHECK(revalidate_and_ensure_static(match_root)); + shared_ptr replacement; + replacement = + do_fold(static_pointer_cast(match_root), constant_data_match); + replace_node(m.get_match_root(), replacement); + return true; + }; + + auto reshape_v1_matcher = + make_shared(reshape_v1, "ConstantFolding.ConstantReshapev1"); + this->add_matcher( + reshape_v1_matcher, constant_reshape_v1_callback, PassProperty::CHANGE_DYNAMIC_STATE); + auto constant_dyn_reshape_callback = [constant_data_label](pattern::Matcher& m) { NGRAPH_DEBUG << "In callback for constant_dyn_reshape_callback against node = " << m.get_match_root()->get_name(); @@ -64,75 +149,11 @@ void pass::ConstantFolding::construct_constant_dyn_reshape() auto constant_data_match = static_pointer_cast(pattern_map[constant_data_label]); - auto dyn_reshape_match = static_pointer_cast(m.get_match_root()); - - NGRAPH_CHECK(revalidate_and_ensure_static(dyn_reshape_match)); - - std::shared_ptr replacement; - auto type = dyn_reshape_match->get_element_type(); - switch (type) - { - case element::Type_t::undefined: - NGRAPH_CHECK(false, - "Encountered 'undefined' element type in constant_dyn_reshape_callback"); - break; - case element::Type_t::dynamic: - NGRAPH_CHECK(false, - "Encountered 'dynamic' element type in constant_dyn_reshape_callback"); - break; - case element::Type_t::u1: - NGRAPH_CHECK(false, "Encountered 'u1' element type in constant_dyn_reshape_callback"); - break; - case element::Type_t::boolean: - replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::bf16: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::f16: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::f32: - replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::f64: - replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::i8: - replacement = fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::i16: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::i32: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::i64: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::u8: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::u16: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::u32: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - case element::Type_t::u64: - replacement = - fold_constant_dyn_reshape(constant_data_match, dyn_reshape_match); - break; - } - + auto match_root = m.get_match_root(); + NGRAPH_CHECK(revalidate_and_ensure_static(match_root)); + shared_ptr replacement; + replacement = + do_fold(static_pointer_cast(match_root), constant_data_match); replace_node(m.get_match_root(), replacement); return true; }; diff --git a/test/dyn_elimination.cpp b/test/dyn_elimination.cpp index 4c6eb9d2a19..d519616b4cb 100644 --- a/test/dyn_elimination.cpp +++ b/test/dyn_elimination.cpp @@ -14,10 +14,13 @@ // limitations under the License. //***************************************************************************** -#include "ngraph/pass/dyn_elimination.hpp" #include "gtest/gtest.h" + #include "ngraph/ngraph.hpp" +#include "ngraph/pass/constant_folding.hpp" +#include "ngraph/pass/dyn_elimination.hpp" #include "ngraph/pass/manager.hpp" +#include "ngraph/pass/opset0_downgrade.hpp" #include "util/all_close_f.hpp" #include "util/test_tools.hpp" @@ -266,3 +269,25 @@ TEST(dyn_elimination, range_f64) ASSERT_TRUE(test::all_close_f( vals, vector{-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75})); } + +#ifndef NGRAPH_JSON_DISABLE +TEST(dyn_elimination, paddlepaddle_transpose) +{ + string model = "paddlepaddle/transpose.json"; + const string json_path = file_util::path_join(SERIALIZED_ZOO, model); + const string json_string = file_util::read_file_to_string(json_path); + shared_ptr f = ngraph::deserialize(json_string); + + vector arg_element_types = {element::f64, element::f64}; + vector arg_shapes = {{3, 4}, {4, 3}}; + std::vector arg_value_base_pointers = {nullptr, nullptr}; + auto clone = specialize_function(f, arg_element_types, arg_shapes, arg_value_base_pointers); + + pass::Manager passes; + passes.register_pass(); + passes.register_pass(); + passes.register_pass(); // Converts dynamic v1 variants to v0 ops + passes.set_per_pass_validation(false); + passes.run_passes(clone); +} +#endif diff --git a/test/models/paddlepaddle/transpose.json b/test/models/paddlepaddle/transpose.json new file mode 100644 index 00000000000..984116bd08b --- /dev/null +++ b/test/models/paddlepaddle/transpose.json @@ -0,0 +1,1924 @@ +[ + { + "name": "Function_0", + "ops": [ + { + "cacheable": false, + "element_type": "double", + "name": "Parameter_1", + "op": "Parameter", + "op_version": 0, + "outputs": [ + "Parameter_1_0" + ], + "shape": null, + "type_info": { + "name": "Parameter", + "version": 0 + } + }, + { + "cacheable": false, + "element_type": "double", + "name": "Parameter_0", + "op": "Parameter", + "op_version": 0, + "outputs": [ + "Parameter_0_0" + ], + "shape": null, + "type_info": { + "name": "Parameter", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_0" + ], + "name": "ShapeOf_80", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_80_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_84", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_84_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_79", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_79_0" + ], + "shape": [], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_82", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_82_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "inputs": [ + "Constant_79", + "Constant_82" + ], + "name": "DynReshape_85", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_85_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "element_type": "int64_t", + "name": "Constant_83", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_83_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_80", + "Constant_84", + "DynReshape_85", + "Constant_83" + ], + "lower_bounds_mask": [], + "name": "DynSlice_86", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_86_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_88", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_88_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_86", + "Constant_88" + ], + "name": "Product_89", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_89_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_89" + ], + "name": "Reshape_90", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_90_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_80" + ], + "name": "ShapeOf_81", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_81_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_80", + "DynReshape_85", + "ShapeOf_81", + "Constant_83" + ], + "lower_bounds_mask": [], + "name": "DynSlice_87", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_87_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_91", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_91_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_87", + "Constant_91" + ], + "name": "Product_92", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_92_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_92" + ], + "name": "Reshape_93", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_93_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "axis": 0, + "inputs": [ + "Reshape_90", + "Reshape_93" + ], + "name": "Concat_94", + "op": "Concat", + "op_version": 0, + "outputs": [ + "Concat_94_0" + ], + "type_info": { + "name": "Concat", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_0", + "Concat_94" + ], + "name": "DynReshape_95", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_95_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "element_type": "int64_t", + "name": "Constant_113", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_113_0" + ], + "shape": [ + 2 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1", + "0" + ] + }, + { + "inputs": [ + "DynReshape_95", + "Constant_113" + ], + "name": "Transpose_114", + "op": "Transpose", + "op_version": 0, + "outputs": [ + "Transpose_114_0" + ], + "type_info": { + "name": "Transpose", + "version": 0 + } + }, + { + "element_type": "double", + "name": "Constant_63", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_63_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "input_order": [ + 0 + ], + "inputs": [ + "Constant_63" + ], + "name": "Reshape_68", + "op": "Reshape", + "op_version": 0, + "output_shape": [], + "outputs": [ + "Reshape_68_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_0" + ], + "name": "ShapeOf_11", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_11_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_15", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_15_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_10", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_10_0" + ], + "shape": [], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_13", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_13_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "inputs": [ + "Constant_10", + "Constant_13" + ], + "name": "DynReshape_16", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_16_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "element_type": "int64_t", + "name": "Constant_14", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_14_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_11", + "Constant_15", + "DynReshape_16", + "Constant_14" + ], + "lower_bounds_mask": [], + "name": "DynSlice_17", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_17_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_19", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_19_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_17", + "Constant_19" + ], + "name": "Product_20", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_20_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_20" + ], + "name": "Reshape_21", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_21_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_11" + ], + "name": "ShapeOf_12", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_12_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_11", + "DynReshape_16", + "ShapeOf_12", + "Constant_14" + ], + "lower_bounds_mask": [], + "name": "DynSlice_18", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_18_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_22", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_22_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_18", + "Constant_22" + ], + "name": "Product_23", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_23_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_23" + ], + "name": "Reshape_24", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_24_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "axis": 0, + "inputs": [ + "Reshape_21", + "Reshape_24" + ], + "name": "Concat_25", + "op": "Concat", + "op_version": 0, + "outputs": [ + "Concat_25_0" + ], + "type_info": { + "name": "Concat", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_0", + "Concat_25" + ], + "name": "DynReshape_26", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_26_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "inputs": [ + "Parameter_1" + ], + "name": "ShapeOf_28", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_28_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_32", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_32_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_27", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_27_0" + ], + "shape": [], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_30", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_30_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "inputs": [ + "Constant_27", + "Constant_30" + ], + "name": "DynReshape_33", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_33_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "element_type": "int64_t", + "name": "Constant_31", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_31_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_28", + "Constant_32", + "DynReshape_33", + "Constant_31" + ], + "lower_bounds_mask": [], + "name": "DynSlice_34", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_34_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_36", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_36_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_34", + "Constant_36" + ], + "name": "Product_37", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_37_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_37" + ], + "name": "Reshape_38", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_38_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_28" + ], + "name": "ShapeOf_29", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_29_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_28", + "DynReshape_33", + "ShapeOf_29", + "Constant_31" + ], + "lower_bounds_mask": [], + "name": "DynSlice_35", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_35_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_39", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_39_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_35", + "Constant_39" + ], + "name": "Product_40", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_40_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_40" + ], + "name": "Reshape_41", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_41_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "axis": 0, + "inputs": [ + "Reshape_38", + "Reshape_41" + ], + "name": "Concat_42", + "op": "Concat", + "op_version": 0, + "outputs": [ + "Concat_42_0" + ], + "type_info": { + "name": "Concat", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_1", + "Concat_42" + ], + "name": "DynReshape_43", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_43_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "inputs": [ + "DynReshape_26", + "DynReshape_43" + ], + "name": "Dot_44", + "op": "Dot", + "op_version": 0, + "outputs": [ + "Dot_44_0" + ], + "reduction_axes_count": 1, + "type_info": { + "name": "Dot", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_0" + ], + "name": "ShapeOf_7", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_7_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_7" + ], + "lower_bounds": [ + 0 + ], + "name": "Slice_46", + "op": "Slice", + "op_version": 0, + "outputs": [ + "Slice_46_0" + ], + "strides": [ + 1 + ], + "type_info": { + "name": "Slice", + "version": 0 + }, + "upper_bounds": [ + 1 + ] + }, + { + "inputs": [ + "Parameter_1" + ], + "name": "ShapeOf_8", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_8_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_47", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_47_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "inputs": [ + "ShapeOf_8" + ], + "name": "ShapeOf_9", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_9_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_45", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_45_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_8", + "Constant_47", + "ShapeOf_9", + "Constant_45" + ], + "lower_bounds_mask": [], + "name": "DynSlice_48", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_48_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "axis": 0, + "inputs": [ + "Slice_46", + "DynSlice_48" + ], + "name": "Concat_49", + "op": "Concat", + "op_version": 0, + "outputs": [ + "Concat_49_0" + ], + "type_info": { + "name": "Concat", + "version": 0 + } + }, + { + "inputs": [ + "Dot_44", + "Concat_49" + ], + "name": "DynReshape_50", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_50_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "inputs": [ + "DynReshape_50" + ], + "name": "ShapeOf_64", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_64_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_65", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_65_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "ShapeOf_64", + "Constant_65" + ], + "name": "Product_66", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_66_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "inputs": [ + "Product_66" + ], + "name": "Convert_67", + "op": "Convert", + "op_version": 0, + "outputs": [ + "Convert_67_0" + ], + "target_type": "double", + "type_info": { + "name": "Convert", + "version": 0 + } + }, + { + "inputs": [ + "Reshape_68", + "Convert_67" + ], + "name": "Divide_69", + "op": "Divide", + "op_version": 0, + "outputs": [ + "Divide_69_0" + ], + "pythondiv": true, + "type_info": { + "name": "Divide", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_73", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_73_0" + ], + "shape": [], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "ShapeOf_64" + ], + "name": "ShapeOf_70", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_70_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "input_order": [ + 0 + ], + "inputs": [ + "ShapeOf_70" + ], + "name": "Reshape_71", + "op": "Reshape", + "op_version": 0, + "output_shape": [], + "outputs": [ + "Reshape_71_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_72", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_72_0" + ], + "shape": [], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "inputs": [ + "Constant_73", + "Reshape_71", + "Constant_72" + ], + "name": "Range_74", + "op": "Range", + "op_version": 0, + "outputs": [ + "Range_74_0" + ], + "type_info": { + "name": "Range", + "version": 0 + } + }, + { + "inputs": [ + "Divide_69", + "ShapeOf_64", + "Range_74" + ], + "name": "DynBroadcast_75", + "op": "DynBroadcast", + "op_version": 0, + "outputs": [ + "DynBroadcast_75_0" + ], + "type_info": { + "name": "DynBroadcast", + "version": 0 + } + }, + { + "inputs": [ + "DynReshape_95" + ], + "name": "ShapeOf_117", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_117_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_117" + ], + "lower_bounds": [ + 0 + ], + "name": "Slice_118", + "op": "Slice", + "op_version": 0, + "outputs": [ + "Slice_118_0" + ], + "strides": [ + 1 + ], + "type_info": { + "name": "Slice", + "version": 0 + }, + "upper_bounds": [ + 1 + ] + }, + { + "inputs": [ + "Parameter_1" + ], + "name": "ShapeOf_97", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_97_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "element_type": "int64_t", + "name": "Constant_101", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_101_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_96", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_96_0" + ], + "shape": [], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "element_type": "int64_t", + "name": "Constant_99", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_99_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "inputs": [ + "Constant_96", + "Constant_99" + ], + "name": "DynReshape_102", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_102_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "element_type": "int64_t", + "name": "Constant_100", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_100_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "1" + ] + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_97", + "Constant_101", + "DynReshape_102", + "Constant_100" + ], + "lower_bounds_mask": [], + "name": "DynSlice_103", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_103_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_105", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_105_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_103", + "Constant_105" + ], + "name": "Product_106", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_106_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_106" + ], + "name": "Reshape_107", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_107_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_97" + ], + "name": "ShapeOf_98", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_98_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "ellipsis_mask": [], + "inputs": [ + "ShapeOf_97", + "DynReshape_102", + "ShapeOf_98", + "Constant_100" + ], + "lower_bounds_mask": [], + "name": "DynSlice_104", + "new_axis": [], + "op": "DynSlice", + "op_version": 0, + "outputs": [ + "DynSlice_104_0" + ], + "shrink_axis": [], + "type_info": { + "name": "DynSlice", + "version": 0 + }, + "upper_bounds_mask": [] + }, + { + "element_type": "int64_t", + "name": "Constant_108", + "op": "Constant", + "op_version": 0, + "outputs": [ + "Constant_108_0" + ], + "shape": [ + 1 + ], + "type_info": { + "name": "Constant", + "version": 0 + }, + "value": [ + "0" + ] + }, + { + "inputs": [ + "DynSlice_104", + "Constant_108" + ], + "name": "Product_109", + "op": "Product", + "op_version": 0, + "outputs": [ + "Product_109_0" + ], + "reduction_axes": [ + 0 + ], + "type_info": { + "name": "Product", + "version": 0 + } + }, + { + "input_order": [], + "inputs": [ + "Product_109" + ], + "name": "Reshape_110", + "op": "Reshape", + "op_version": 0, + "output_shape": [ + 1 + ], + "outputs": [ + "Reshape_110_0" + ], + "type_info": { + "name": "Reshape", + "version": 0 + } + }, + { + "axis": 0, + "inputs": [ + "Reshape_107", + "Reshape_110" + ], + "name": "Concat_111", + "op": "Concat", + "op_version": 0, + "outputs": [ + "Concat_111_0" + ], + "type_info": { + "name": "Concat", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_1", + "Concat_111" + ], + "name": "DynReshape_112", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_112_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "inputs": [ + "DynReshape_112" + ], + "name": "ShapeOf_119", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_119_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "inputs": [ + "ShapeOf_119" + ], + "lower_bounds": [ + 1 + ], + "name": "Slice_120", + "op": "Slice", + "op_version": 0, + "outputs": [ + "Slice_120_0" + ], + "strides": [ + 1 + ], + "type_info": { + "name": "Slice", + "version": 0 + }, + "upper_bounds": [ + 2 + ] + }, + { + "axis": 0, + "inputs": [ + "Slice_118", + "Slice_120" + ], + "name": "Concat_121", + "op": "Concat", + "op_version": 0, + "outputs": [ + "Concat_121_0" + ], + "type_info": { + "name": "Concat", + "version": 0 + } + }, + { + "inputs": [ + "DynBroadcast_75", + "Concat_121" + ], + "name": "DynReshape_122", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_122_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "inputs": [ + "Transpose_114", + "DynReshape_122" + ], + "name": "Dot_123", + "op": "Dot", + "op_version": 0, + "outputs": [ + "Dot_123_0" + ], + "reduction_axes_count": 1, + "type_info": { + "name": "Dot", + "version": 0 + } + }, + { + "inputs": [ + "Parameter_1" + ], + "name": "ShapeOf_77", + "op": "ShapeOf", + "op_version": 0, + "outputs": [ + "ShapeOf_77_0" + ], + "type_info": { + "name": "ShapeOf", + "version": 0 + } + }, + { + "inputs": [ + "Dot_123", + "ShapeOf_77" + ], + "name": "DynReshape_124", + "op": "DynReshape", + "op_version": 0, + "outputs": [ + "DynReshape_124_0" + ], + "type_info": { + "name": "DynReshape", + "version": 0 + }, + "zero_flag": false + }, + { + "inputs": [ + "DynReshape_124" + ], + "name": "Result_125", + "needs_default_layout": true, + "op": "Result", + "op_version": 0, + "outputs": [ + "Result_125_0" + ], + "type_info": { + "name": "Result", + "version": 0 + } + } + ], + "parameters": [ + "Parameter_0", + "Parameter_1" + ], + "result": [ + "Result_125" + ] + } +] From 19a7d71054b99b3695472e9c2b01173531a27db3 Mon Sep 17 00:00:00 2001 From: Robert Kimball Date: Mon, 13 Jan 2020 14:51:38 -0800 Subject: [PATCH 05/12] Fix topk deserialize to be backwards compatible (#4171) --- src/ngraph/serializer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp index b0709425cd2..078dc68c410 100644 --- a/src/ngraph/serializer.cpp +++ b/src/ngraph/serializer.cpp @@ -2943,7 +2943,8 @@ shared_ptr JSONDeserializer::deserialize_node(json node_js) { auto compute_max = node_js.at("compute_max").get(); auto target_type = read_element_type(node_js.at("index_element_type")); - op::TopKSortType sort = node_js.at("sort").get(); + op::TopKSortType sort = + get_or_default(node_js, "sort", op::TopKSortType::SORT_VALUES); if (has_key(node_js, "top_k_axis")) { auto top_k_axis = node_js.at("top_k_axis").get(); From 660ae5acd86e06a765123293895b147b3479a929 Mon Sep 17 00:00:00 2001 From: Leona C Date: Mon, 13 Jan 2020 16:23:57 -0800 Subject: [PATCH 06/12] Update doc to 0.28 (#4166) * Update sitemap to not use a page title * Document Release Notes for 0.28.0; update section on Distributed Training Co-authored-by: Scott Cyphers --- doc/sphinx/source/conf.py | 5 ++-- doc/sphinx/source/ops/constant.rst | 2 +- doc/sphinx/source/ops/parameter.rst | 8 +++--- doc/sphinx/source/ops/result.rst | 2 +- doc/sphinx/source/project/release-notes.rst | 22 +++++++++++------ doc/sphinx/source/training/index.rst | 6 ++++- doc/sphinx/source/training/overview.rst | 27 ++------------------- 7 files changed, 31 insertions(+), 41 deletions(-) diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index b90857c9212..b7500ca5db0 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -73,11 +73,11 @@ # built documents. # # The short X.Y version. -version = '0.27' +version = '0.28' # The Documentation full version, including alpha/beta/rc tags. Some features # available in the latest code will not necessarily be documented first -release = '0.27.1' +release = '0.28.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -143,6 +143,7 @@ ] } +html_last_updated_fmt= '' # -- Options for HTMLHelp output ------------------------------------------ diff --git a/doc/sphinx/source/ops/constant.rst b/doc/sphinx/source/ops/constant.rst index fa931d28dd0..b8ae174420f 100644 --- a/doc/sphinx/source/ops/constant.rst +++ b/doc/sphinx/source/ops/constant.rst @@ -43,6 +43,6 @@ Outputs C++ Interface ============= -.. doxygenclass:: ngraph::op::Constant +.. doxygenclass:: ngraph::op::v0::Constant :project: ngraph :members: diff --git a/doc/sphinx/source/ops/parameter.rst b/doc/sphinx/source/ops/parameter.rst index 0e9ddc07656..efec9007d4d 100644 --- a/doc/sphinx/source/ops/parameter.rst +++ b/doc/sphinx/source/ops/parameter.rst @@ -4,7 +4,7 @@ Parameter ######### -.. code-block: cpp +.. code-block:: cpp Parameter // A function parameter. @@ -38,7 +38,9 @@ Outputs | ``output`` | ``element_type`` | ``shape`` | +------------+------------------+------------+ -A ``Parameter`` produces the value of the tensor passed to the function in the position of the parameter in the function's arguments. The passed tensor must have the element type and shape specified by the parameter. +A ``Parameter`` produces the value of the tensor passed to the function +in the position of the parameter in the function's arguments. The passed +tensor must have the element type and shape specified by the parameter. Backprop ======== @@ -51,6 +53,6 @@ Backprop C++ Interface ============= -.. doxygenclass:: ngraph::op::Parameter +.. doxygenclass:: ngraph::op::v0::Parameter :project: ngraph :members: diff --git a/doc/sphinx/source/ops/result.rst b/doc/sphinx/source/ops/result.rst index 6537c0676ec..23eeaf5d477 100644 --- a/doc/sphinx/source/ops/result.rst +++ b/doc/sphinx/source/ops/result.rst @@ -45,6 +45,6 @@ Mathematical Definition C++ Interface ============= -.. doxygenclass:: ngraph::op::Result +.. doxygenclass:: ngraph::op::v0::Result :project: ngraph :members: diff --git a/doc/sphinx/source/project/release-notes.rst b/doc/sphinx/source/project/release-notes.rst index be14a68d376..04f7f7fe4bd 100644 --- a/doc/sphinx/source/project/release-notes.rst +++ b/doc/sphinx/source/project/release-notes.rst @@ -21,19 +21,15 @@ We are pleased to announce the release of version |version|. Core updates for |version| -------------------------- -+ New ops -+ Provenance improvements from 0.25.1 -+ More dynamic shape ops -+ More informative errors - Latest documentation updates ---------------------------- -+ Additional details on quantization -+ Index updates -+ API updates ++ Dynamic Shapes and APIs ++ Provenance ++ Add linkages and overview for quantization APIs ++ New ngraph.ai themed illustrations .. important:: Pre-releases (``-rc-0.*``) have newer features, and are less stable. @@ -42,6 +38,16 @@ Latest documentation updates Changelog on Previous Releases ============================== +0.27.1 + ++ Fixes broken serializer for Sum and Product ++ New ops ++ Provenance improvements from 0.25.1 ++ More dynamic shape ops ++ More informative errors ++ Additional details on quantization ++ Index updates ++ API updates + All ops support ``Output`` arguments + Additional ops + ONNX handling unknown domains diff --git a/doc/sphinx/source/training/index.rst b/doc/sphinx/source/training/index.rst index 44d5e3b2782..bdc665e08ed 100644 --- a/doc/sphinx/source/training/index.rst +++ b/doc/sphinx/source/training/index.rst @@ -5,10 +5,14 @@ Distributed Training #################### +.. important:: Distributed training for CPU backend is not supported. Distributed + training support is provided only with the Intel® Nervana™ Neural Network Processor + for Training (NNP-T). .. toctree:: :maxdepth: 1 - overview.rst data_ingest.rst + + diff --git a/doc/sphinx/source/training/overview.rst b/doc/sphinx/source/training/overview.rst index 14f3cd136e8..6b088250092 100644 --- a/doc/sphinx/source/training/overview.rst +++ b/doc/sphinx/source/training/overview.rst @@ -1,3 +1,5 @@ +:orphan: + .. training/overview.rst: .. _overview: @@ -5,28 +7,3 @@ Basic Concepts ============== -.. important:: Distributed training is not officially supported as of version - |version|; however, some configuration options have worked for nGraph - devices in testing environments. - - -Data scientists with locally-scalable rack or cloud-based resources will likely -find it worthwhile to experiment with different modes or variations of -distributed training. Deployments using nGraph Library with supported backends -can be configured to train with data parallelism and will soon work with model -parallelism. Distributing workloads is increasingly important, as more data and -bigger models mean the ability to :doc:`../core/constructing-graphs/distribute-train` -work with larger and larger datasets, or to work with models having many layers -that aren't designed to fit to a single device. - -Distributed training with data parallelism splits the data and each worker -node has the same model; during each iteration, the gradients are aggregated -across all workers with an op that performs "allreduce", and applied to update -the weights. - -Using multiple machines helps to scale and speed up deep learning. With large -mini-batch training, one could train ResNet-50 with Imagenet-1k data to the -*Top 5* classifier in minutes using thousands of CPU nodes. See -`arxiv.org/abs/1709.05011`_. - -.. _arxiv.org/abs/1709.05011: https://arxiv.org/format/1709.05011 \ No newline at end of file From b3db038eb592e7ef8b8648d82c62ade503481d69 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Mon, 13 Jan 2020 20:23:35 -0800 Subject: [PATCH 07/12] Fix op::Pad::set_padding_above to actually set m_padding_above. (#4169) Co-authored-by: Scott Cyphers --- src/ngraph/op/pad.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ngraph/op/pad.hpp b/src/ngraph/op/pad.hpp index 08a2bdac5d6..fa7b13586cd 100644 --- a/src/ngraph/op/pad.hpp +++ b/src/ngraph/op/pad.hpp @@ -61,7 +61,7 @@ namespace ngraph const CoordinateDiff& get_padding_above() const { return m_padding_above; } void set_padding_above(const CoordinateDiff& padding_above) { - m_padding_below = padding_above; + m_padding_above = padding_above; } /// \brief DEPRECATED. This is just a stub for backends that used to implement the From 6de4893b4f95883956cfbc03c56913a001168f0d Mon Sep 17 00:00:00 2001 From: Nagy Mostafa Date: Mon, 13 Jan 2020 20:55:08 -0800 Subject: [PATCH 08/12] [MLIR] In-place memory optimization for elt-wise and concat ops. (#3832) * AliasMap WIP * Added liveness info * WIP * WIP: Tests * WIP: LIT tests * Added knobs for mem optimization pass. More LIT tests * Revert affine_lowerer change * More elaborate comment * Minor fixes * style-apply * Rename liveness * Switch to Analysis framework * Fix optimization conditions * Remove LIT test * style * Switch to equivalence relationship impl of non-alias relationship * refined comment * Switch non-alias to equivalence relationship * Fix bad merge * Adding tests. WIP * Added buffer size tracking and unit-tests * Added LIT and unit-tests * Turn optimization ON * style * fix unit-tests * Fix useCount * Fix copyright and typo * Refine few comments, remove new lines * style fix Co-authored-by: Scott Cyphers Co-authored-by: Sang Ik Lee --- src/contrib/mlir/CMakeLists.txt | 2 +- .../mlir/backend/analysis/memory_analysis.cpp | 613 ++++++++++++++++++ .../mlir/backend/analysis/memory_analysis.hpp | 79 +++ src/contrib/mlir/backend/cpu/cpu_backend.cpp | 17 - .../mlir/backend/pass/affine_lowerer.cpp | 121 +++- .../mlir/backend/pass/memory_optimization.cpp | 160 ----- .../mlir/backend/pass/memory_optimization.hpp | 27 - src/contrib/mlir/backend/pass/op_lowerers.inc | 2 +- src/contrib/mlir/core/ngraph_dialect/ops.cpp | 22 - src/contrib/mlir/core/ngraph_dialect/ops.hpp | 4 - test/backend/concat.in.cpp | 137 ++++ test/mlir/affine_conversion/memory_opt.mlir | 128 ++++ 12 files changed, 1053 insertions(+), 259 deletions(-) create mode 100644 src/contrib/mlir/backend/analysis/memory_analysis.cpp create mode 100644 src/contrib/mlir/backend/analysis/memory_analysis.hpp delete mode 100644 src/contrib/mlir/backend/pass/memory_optimization.cpp delete mode 100644 src/contrib/mlir/backend/pass/memory_optimization.hpp create mode 100644 test/mlir/affine_conversion/memory_opt.mlir diff --git a/src/contrib/mlir/CMakeLists.txt b/src/contrib/mlir/CMakeLists.txt index 10494225c48..a405ae924dc 100644 --- a/src/contrib/mlir/CMakeLists.txt +++ b/src/contrib/mlir/CMakeLists.txt @@ -25,7 +25,7 @@ add_subdirectory(tools/ngraph-opt) set(SRC backend/cpu/cpu_backend.cpp backend/pass/affine_lowerer.cpp - backend/pass/memory_optimization.cpp + backend/analysis/memory_analysis.cpp core/compiler.cpp core/ngraph_dialect/dialect.cpp core/ngraph_dialect/type.cpp diff --git a/src/contrib/mlir/backend/analysis/memory_analysis.cpp b/src/contrib/mlir/backend/analysis/memory_analysis.cpp new file mode 100644 index 00000000000..17fd28102c0 --- /dev/null +++ b/src/contrib/mlir/backend/analysis/memory_analysis.cpp @@ -0,0 +1,613 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +// NOTE: This file follows nGraph format style and MLIR naming convention since it does +// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. + +#include "memory_analysis.hpp" +#include "contrib/mlir/core/compiler.hpp" +#include "contrib/mlir/core/ngraph_dialect/ops.hpp" +#include "contrib/mlir/core/ngraph_dialect/type.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static llvm::cl::opt clEnableNgInPlaceMemory( + "ngraph-memory-opt", + llvm::cl::init(true), + llvm::cl::desc("Enable ngraph dialect in-place memory optimization pass")); + +static llvm::cl::opt + clEnableNgInPlaceConcat("ngraph-memory-opt-concat", + llvm::cl::init(true), + llvm::cl::desc("Enable inplace concat optimization")); + +static llvm::cl::opt + clEnableNgInPlaceEltWise("ngraph-memory-opt-eltwise", + llvm::cl::init(true), + llvm::cl::desc("Enable inplace element wise optimization")); + +// anonymous namespace +// no need to expose any of the following outside of this file +namespace +{ + using namespace ngraph::runtime; + using namespace ngraph::runtime::ngmlir; + using namespace mlir; + + // A helper data-structure to track cannot alias relationship between + // tensor syms. If NoAlias[T] contains S, then T and S cannot alias. + // The relationship is an equivalence (transitive, symmetric, reflexive) + // Initially each sym is put in its own equivalence class (set). + // If two syms a and b are found to be non-alias (equivalent), + // their equivalence classes are unioned + class AliasRelation + { + public: + /// Initialize the relationship for a number of syms + void init(std::unordered_set& symbols); + /// Checks if values a and b can alias + bool canAlias(Value* a, Value* b); + void insertNoAlias(Value* a, Value* b); + + private: + using BV = llvm::BitVector; + std::unordered_map m_valueToIdx; + std::unordered_map m_idxToValue; + std::unordered_map m_valueToSet; + SmallVector m_sets; + }; + + // Simple single basic block liveness analysis + // TODO: Replace with MLIR's liveness analysis + class LivenessAnalysis + { + public: + bool isLive(Value* v); + void setLive(Value* v); + void kill(Value* v); + void getLiveValues(llvm::SmallVectorImpl& values); + void reset(); + + private: + unsigned m_maxIdx = 0; + SmallVector m_liveness; + std::unordered_map m_valueToIdx; + }; + + // Memory Assignment analysis + // Tries to find operations that can be done in place where applicable + // by assigning a virtual buffer ID to values. + // The buffer assignment is used later in affine lowering pass to create + // or re-use memrefs + class MemoryAssignment + { + public: + MemoryAssignment(MemoryAnalysis* memAnalysis) + : m_memAnalysis(memAnalysis) + { + m_inplaceOps = { +#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE}, +#include "contrib/mlir/backend/pass/op_lowerers.inc" + }; + m_bufferId = 0; + } + void run(ModuleOp* module); + + private: + void processDestructiveInPlace(mlir::Operation* op); + void processConcat(mlir::Operation* op); + bool isSafeInPlace(mlir::Operation* op); + bool isInputOrOutputValue(mlir::Value* value); + LivenessAnalysis m_liveness; + AliasRelation m_aliasRelation; + std::unordered_map m_inplaceOps; + int m_bufferId; + MemoryAnalysis* m_memAnalysis; + }; + + // helpers + // Determines the buffer size a value needs based on its type + // offset is where that value should start in the buffer + static unsigned getBufferSizeForOperand(mlir::Value* value, int offset); + + // Go backwards over instructions + // + // Re-use buffers if none of the dst/srcs are input/output of the sub-graph + // + // For destructive in-place ops (elt-wise): + // - Find first src where it is last use (src is dead). + // If all srcs are last-use, then pick one with lower number of uses. + // If no src is found, bail out. + // - If dst has pre-assigned buffer/offset, then copy them to src. + // If not, assign new buffer to both dst and src. + // - Mark all live syms at this point to not alias src + // + // For non-Destructive in-place ops: + // Concat: + // - Reuse buffer if + // - Concat axis is most-significant non-one axis, and + // - all operands can alias dest. + // - If dst has an assignment, copy it over to srcs as long as + // there is no conflicting src pre-assignment + // - If dst has no assignment, and all srcs have no assignment, + // assign new buffer to dst and srcs + // + // Slice: TBD + // Reshape: TBD + // + // Update liveness info + void MemoryAssignment::run(ModuleOp* module) + { + if (!clEnableNgInPlaceMemory) + { + // Optimization disabled + return; + } + SmallVector funcOps(module->getOps()); + + if (funcOps.size() > 1 || funcOps.empty()) + { + // single func for now + return; + } + auto f = funcOps.back(); + auto& blocks = f.getBlocks(); + if (blocks.size() != 1) + { + // single block func for now + return; + } + auto& block = *(blocks.begin()); + + // count number of syms in the code and initialize alias relationship + std::unordered_set syms; + + for (auto it = block.begin(); it != block.end(); it++) + { + Operation* op = &(*it); + for (auto it : op->getResults()) + { + Value* v = it; + if (syms.find(v) == syms.end()) + { + syms.insert(v); + } + } + for (auto it : op->getOperands()) + { + Value* v = it; + if (syms.find(v) == syms.end()) + { + syms.insert(v); + } + } + } + m_aliasRelation.init(syms); + // scan instructions backwards + for (auto it = block.rbegin(); it != block.rend(); it++) + { + Operation* op = &(*it); + + if (isSafeInPlace(op)) + { + // TODO: replace with Op Interface check + if (dyn_cast(op)) + { + if (clEnableNgInPlaceConcat) + processConcat(op); + } + else + { + if (clEnableNgInPlaceEltWise) + processDestructiveInPlace(op); + } + } + // update liveness info + for (auto dit : op->getResults()) + { + m_liveness.kill(dit); + } + for (auto uit : op->getOperands()) + { + m_liveness.setLive(uit); + } + } + } + + void MemoryAssignment::processConcat(mlir::Operation* op) + { + auto concat = cast(op); + { + // concat on the highest non-one axis + auto concatAxis = concat.concatenation_axis(); + auto result = concat.getResult(); + auto shape = (result->getType().cast()).getShape(); + std::vector opndOffsets; + BufferInfo bufferInfo; + int bufferId = -1, baseOffset = 0; + unsigned bufferSize = 0; + + if (isInputOrOutputValue(op->getResult(0))) + { + // dst is output, bail out + return; + }; + + for (auto i = 0; i < shape.size(); i++) + { + if (i == concatAxis) + { + break; + } + if (shape[i] != 1) + { + return; + } + } + // check that all operands and dst can alias + // and that none is input or output + for (auto opnd : op->getOperands()) + { + if (!m_aliasRelation.canAlias(result, opnd) || isInputOrOutputValue(opnd)) + { + return; + } + } + // calculate relative offsets in the output buffer + int opndOffset = 0; + for (auto i = 0; i < op->getNumOperands(); i++) + { + if (i == 0) + { + opndOffsets.push_back(0); + } + else + { + auto opnd = op->getOperand(i - 1); + auto tensorType = opnd->getType().cast(); + opndOffset += tensorType.getNumElements(); + opndOffsets.push_back(opndOffset); + } + } + // check for consistent pre-existing buffer assignments + bufferInfo = m_memAnalysis->getBufferInfo(op); + // if dest has an assignment + if (bufferInfo.isValid()) + { + // set buffer ID and base offset to that of dest's + bufferId = bufferInfo.m_bufferId; + baseOffset = bufferInfo.m_offset; + // check if we can re-use it for all src operands + int bufferOffset = 0; + for (auto i = 0; i < op->getNumOperands(); i++) + { + auto opnd = op->getOperand(i); + auto defOp = opnd->getDefiningOp(); + NGRAPH_CHECK(defOp != nullptr, "Defining operation expected"); + // calculate expected absolute offset in the buffer + bufferOffset = baseOffset + opndOffsets[i]; + + bufferInfo = m_memAnalysis->getBufferInfo(defOp); + if (bufferInfo.isValid()) + { + if (bufferInfo.m_bufferId != bufferId || + bufferInfo.m_offset != bufferOffset) + { + // buffer ID or offset mismatch, bailout + return; + } + } + } + } + else + { + // dst has no buffer assignment + // TODO: + // We can re-use an existing assignment of a src operand if + // Every other src either: + // a. has a matching pre-assigned buffer ID and offset, or + // b. is unassigned a buffer/offset, and the computed offset is valid + // (non-negative), and no other live tensor aliases the chunk + // of the buffer we want to assign. + // To achieve this, we need to track buffer->{tensor,offset,size} and + // perform the check + // + // Example: + // V1 = Concat S0 (?), S1{0,16}, S2 (?) + // R0 = ... + // R2 = ... + // V2 = Concat R0{0, 0}, S1 {0,16}, R2{0,32} + // + // For the first concat, we could use the assignment of S1 (from second concat) + // to define assignments for S0 and S2, and since R0, R2 are dead, no live tensors + // alias into the buffer, and the assignment is valid. + // + // On the other hand, the following is invalid + // Example: + // R0 = ... + // V1 = Concat S0(?), S1(0,16), S2(?) + // R2 = ... + // V2 = Concat R0, S1{0,16}, R2 + // Reusing assignment of S1 in the first concat will cause S0 and R0 to alias. + // And since R0 is alive the write to R0 will overwrite S0. + // For now, assign only if all srcs have no prior assignments + for (auto opnd : op->getOperands()) + { + if (m_memAnalysis->getBufferInfo(opnd->getDefiningOp()).isValid()) + { + return; + } + } + } + // We didn't find any pre-existing buffer assignment, create a new buffer + if (bufferId == -1) + { + bufferId = m_bufferId++; + baseOffset = 0; + } + + // adjust the buffer size based on this instruction + // max size is determined from dst offset and type + bufferSize = getBufferSizeForOperand(op->getResult(0), baseOffset); + m_memAnalysis->setBufferSize(bufferId, bufferSize); + + // Update analysis map. No need to check if we are over-writing previous entries + // since they should all match. + m_memAnalysis->setBufferInfo(op, {bufferId, baseOffset}); + for (auto i = 0; i < op->getNumOperands(); i++) + { + auto opnd = op->getOperand(i); + auto defOp = opnd->getDefiningOp(); + NGRAPH_CHECK(defOp != nullptr, "Defining operation expected"); + auto opndOffset = baseOffset + opndOffsets[i]; + m_memAnalysis->setBufferInfo(defOp, {bufferId, opndOffset}); + } + } + } + + void MemoryAssignment::processDestructiveInPlace(mlir::Operation* op) + { + NGRAPH_CHECK(op->getNumResults() == 1, "Destructive in-place with multi-def ?"); + Value* use = nullptr; + int useCount = -1; + + if (isInputOrOutputValue(op->getResult(0))) + { + // dst is output, bail out + return; + }; + // pick a dead operand that is not an input or output with the least number of uses + for (auto opnd : op->getOperands()) + { + if (!m_liveness.isLive(opnd) && !isInputOrOutputValue(opnd)) + { + int uses = 0; + for (auto& i : opnd->getUses()) + { + uses++; + } + if (useCount == -1 || uses < useCount) + { + use = opnd; + useCount = uses; + } + } + } + if (!use) + { + return; + } + // assign new buffer or copy buffer info from dst + auto bufferInfo = m_memAnalysis->getBufferInfo(op); + if (!bufferInfo.isValid()) + { + // attach a new buffer id, and 0 offset on obth src and result + bufferInfo = {m_bufferId++, 0}; + m_memAnalysis->setBufferInfo(op, bufferInfo); + m_memAnalysis->setBufferInfo(use->getDefiningOp(), bufferInfo); + } + else + { + // copy result buffer id and offset to src + m_memAnalysis->setBufferInfo(use->getDefiningOp(), bufferInfo); + } + auto bufferSize = 0; + bufferSize = getBufferSizeForOperand(op->getResult(0), bufferInfo.m_offset); + m_memAnalysis->setBufferSize(bufferInfo.m_bufferId, bufferSize); + // update aliasing info + // use value cannot alias any live value + SmallVector liveValues; + m_liveness.getLiveValues(liveValues); + for (auto& value : liveValues) + { + m_aliasRelation.insertNoAlias(use, value); + } + } + bool MemoryAssignment::isInputOrOutputValue(mlir::Value* value) + { + auto defOp = value->getDefiningOp(); + // If no defining op, then this is a block arg, skip operand + // + // TODO: This check is assuming single BB function, improve to handle control-flow. + // In which case, we have to track block args to all pred branches that feed them, + // all the way up to the initial def, if any, or entry block arg. This is preferably + // done as a pre-pass to capture all inputs/output values. + if (!defOp) + { + return true; + } + // If the defined value is an output of the sub-graph, cannot do it in place + // + // TODO: Improve to support control flow. Track value use-chain along branches/block-args, + // if we hit a use in a return, it is an output value. + for (auto& use : value->getUses()) + { + auto useOp = use.getOwner(); + if (isa(useOp)) + { + return true; + } + } + return false; + } + // TODO Change this to use interfaces. + bool MemoryAssignment::isSafeInPlace(mlir::Operation* op) + { + auto it = m_inplaceOps.find(op->getName().getStringRef().str()); + + return it != m_inplaceOps.end() ? it->second : false; + } + + void AliasRelation::init(std::unordered_set& symbols) + { + unsigned numSyms = symbols.size(); + m_sets.resize(numSyms); + for (auto& bv : m_sets) + { + bv.resize(numSyms); + } + // populate id->value and value->id maps + unsigned i = 0; + for (auto v : symbols) + { + m_idxToValue[i] = v; + m_valueToIdx[v] = i; + m_valueToSet[v] = &m_sets[i]; + // set bit for that value + m_sets[i].set(i); + i++; + } + } + + bool AliasRelation::canAlias(Value* a, Value* b) + { + // check if a and b are in the same set + return m_valueToSet[a] != m_valueToSet[b]; + } + + void AliasRelation::insertNoAlias(Value* a, Value* b) + { + // union the two sets that a and b belong to + // update the maps accordingly + if (!canAlias(a, b)) + { + // nothing to do + return; + } + // union the two sets of a and b + BV* aSet = m_valueToSet[a]; + BV* bSet = m_valueToSet[b]; + BV uSet = (*aSet); + uSet |= (*bSet); + // replace aSet with union + auto pSet = m_valueToSet[a]; + *pSet = uSet; + // update value to set maps + for (auto it = pSet->set_bits_begin(); it != pSet->set_bits_end(); it++) + { + unsigned id = *it; + auto value = m_idxToValue[id]; + m_valueToSet[value] = pSet; + } + } + + void LivenessAnalysis::reset() + { + m_valueToIdx.clear(); + m_liveness.clear(); + m_maxIdx = 0; + } + + void LivenessAnalysis::getLiveValues(llvm::SmallVectorImpl& values) + { + for (auto& entry : m_valueToIdx) + { + if (m_liveness[entry.second]) + { + values.push_back(entry.first); + } + } + } + + bool LivenessAnalysis::isLive(Value* v) + { + auto it = m_valueToIdx.find(v); + if (it == m_valueToIdx.end()) + { + return false; + } + return m_liveness[it->second]; + } + + void LivenessAnalysis::setLive(Value* v) + { + auto it = m_valueToIdx.find(v); + if (it == m_valueToIdx.end()) + { + m_valueToIdx[v] = m_maxIdx++; + m_liveness.push_back(true); + NGRAPH_CHECK(m_liveness.size() == m_maxIdx); + } + else + { + m_liveness[it->second] = true; + } + } + + void LivenessAnalysis::kill(Value* v) + { + auto it = m_valueToIdx.find(v); + if (it == m_valueToIdx.end()) + { + // already dead + return; + } + m_liveness[it->second] = false; + } + // helpers + unsigned getBufferSizeForOperand(mlir::Value* value, int offset) + { + auto tensorType = value->getType().dyn_cast(); + NGRAPH_CHECK(tensorType, "Invalid type to find buffer size for"); + + unsigned bufferSize = offset * std::ceil(tensorType.getElementBitWidth() / 8); + bufferSize += tensorType.getSizeInBytes(); + + return bufferSize; + } +} + +namespace mlir +{ + MemoryAnalysis::MemoryAnalysis(Operation* op) + { + MemoryAssignment memoryAssignment(this); + auto moduleOp = dyn_cast(op); + NGRAPH_CHECK(moduleOp != nullptr, "Expecting FuncOp for anaylsis"); + memoryAssignment.run(&moduleOp); + } +} // namespace mlir diff --git a/src/contrib/mlir/backend/analysis/memory_analysis.hpp b/src/contrib/mlir/backend/analysis/memory_analysis.hpp new file mode 100644 index 00000000000..9881ce2774d --- /dev/null +++ b/src/contrib/mlir/backend/analysis/memory_analysis.hpp @@ -0,0 +1,79 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +// NOTE: This file follows nGraph format style and MLIR naming convention since it does +// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. + +#pragma once + +#include +#include +#include "ngraph/check.hpp" + +namespace mlir +{ + // BufferInfo + struct BufferInfo + { + // Buffer Id. If -1 then invalid buffer. + int m_bufferId; + // Offset into the buffer + int m_offset; + bool isValid() const { return m_bufferId != -1; } + }; + + struct MemoryAnalysis + { + using BufferInfoMap = std::unordered_map; + using BufferSizeMap = std::unordered_map; + // Compute this analysis with the provided operation. + MemoryAnalysis(Operation* op); + BufferInfo getBufferInfo(Operation* op) + { + auto it = m_bufferInfo.find(op); + if (it == m_bufferInfo.end()) + { + return {-1, -1}; + } + return it->second; + } + void setBufferInfo(Operation* op, BufferInfo bufferInfo) { m_bufferInfo[op] = bufferInfo; } + void setBufferSize(unsigned bufferId, unsigned size) + { + auto it = m_bufferSize.find(bufferId); + if (it != m_bufferSize.end()) + { + it->second = (size > it->second) ? size : it->second; + } + else + { + m_bufferSize[bufferId] = size; + } + } + unsigned getBufferSize(unsigned bufferId) + { + auto it = m_bufferSize.find(bufferId); + NGRAPH_CHECK(it != m_bufferSize.end(), "Buffer has no size!"); + return it->second; + } + + private: + // Records assignment of BufferInfo to each inplace op + BufferInfoMap m_bufferInfo; + // Records buffer size required for each buffer id in bytes + BufferSizeMap m_bufferSize; + }; +} diff --git a/src/contrib/mlir/backend/cpu/cpu_backend.cpp b/src/contrib/mlir/backend/cpu/cpu_backend.cpp index 017d38761d9..18b60d4b616 100644 --- a/src/contrib/mlir/backend/cpu/cpu_backend.cpp +++ b/src/contrib/mlir/backend/cpu/cpu_backend.cpp @@ -19,7 +19,6 @@ #include "cpu_backend.hpp" #include "contrib/mlir/backend/pass/affine_lowerer.hpp" -#include "contrib/mlir/backend/pass/memory_optimization.hpp" #include "contrib/mlir/utils.hpp" #include "ngraph/check.hpp" @@ -160,7 +159,6 @@ void MLIRCPUBackend::init() void MLIRCPUBackend::codegen() { - optimizeNgDialect(); lowerNgDialect(); } @@ -261,18 +259,3 @@ void MLIRCPUBackend::optimizeAffineDialect() // Run Std dialect optimizations. // TODO } - -void MLIRCPUBackend::optimizeNgDialect() -{ - mlir::PassManager pm(&m_context); - mlir::applyPassManagerCLOptions(pm); - if (clEnableNgInPlaceMemoryOpt) - { - pm.addPass(mlir::createMemoryOptimizationPass()); - } - - if (failed(pm.run(m_module.get()))) - { - NGRAPH_CHECK(false, "MLIR pass manager failed"); - } -} diff --git a/src/contrib/mlir/backend/pass/affine_lowerer.cpp b/src/contrib/mlir/backend/pass/affine_lowerer.cpp index 4843e04e63d..fe995790210 100644 --- a/src/contrib/mlir/backend/pass/affine_lowerer.cpp +++ b/src/contrib/mlir/backend/pass/affine_lowerer.cpp @@ -19,11 +19,13 @@ #include "affine_lowerer.hpp" +#include "contrib/mlir/backend/analysis/memory_analysis.hpp" #include "contrib/mlir/core/ngraph_dialect/ops.hpp" #include "contrib/mlir/core/ngraph_dialect/type.hpp" #include "ngraph/assertion.hpp" #include +#include #include #include #include @@ -165,6 +167,8 @@ namespace ValueHandle createZeroConstant(mlir::Type type); ValueHandle createOneConstant(mlir::Type type); + bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass); + /// Conversion from types in the nGraph dialect to the Standard dialect. class NGraphTypeConverter : public TypeConverter { @@ -184,29 +188,25 @@ namespace void runOnModule() override; SmallVector buildOutputDefs(Operation* op, PatternRewriter& rewriter); - /// Allocates a linear buffer for a temporary tensor - Value* createTempBuffer(Type type, PatternRewriter& rewriter); - + /// Allocates a linear buffer for a temporary memref that shares its + /// underlying memory. Used in conjunction with createTempMemref + Value* createTempBuffer(int bufferId, PatternRewriter& rewriter); /// Creates an allocation or view of a memref. /// type MemRef Type /// buffer Optional buffer value to create view over /// offset Optional offset into the buffer this view starts at /// - /// If buffer is null, a new allocation of a memref is created. - /// Offset is ignored. If buffer is non-null, then we create a temp - /// view over a pre-allocated buffer (see createTempBuffer) - + /// If buffer is null it allocates a Memref directly and Offset is ignored. + /// If not, it creates a view over the pre-allocated buffer at the given offset. Value* createTempMemref(Type type, Value* buffer, unsigned offset, PatternRewriter& rewriter); - /// Inserts dealloc Ops for each temporary allocated by AllocOp void insertDeallocs(PatternRewriter& rewriter); - NGraphTypeConverter& getTypeConverter() { return typeConverter; } + MemoryAnalysis* getMemAnalysis() const { return m_memAnalysis; } private: /// Collect a set of patterns to convert from the nGraph dialect to Affine dialect. void populateNGraphToAffineConversionPatterns(OwningRewritePatternList& patterns); - void findOutputValues(); void insertNoAliasArgAttrs(); @@ -219,7 +219,7 @@ namespace // Track pre-assigned buffers for each Value and re-use it if one is available. using IdToMemRefMap = std::unordered_map; IdToMemRefMap m_id_to_memref; - + MemoryAnalysis* m_memAnalysis; // TODO: Workaround for findOutputValues and buildOutputDefs. See NGCPU-470. std::string funcName; }; @@ -232,6 +232,9 @@ namespace populateNGraphToAffineConversionPatterns(patterns); + // Get Memory analysis for in-place memory optimizations + m_memAnalysis = &getAnalysis(); + // Create target that defines legal ops for nGraph dialect to be lowered to. ConversionTarget target(getContext()); @@ -336,24 +339,25 @@ namespace // will re-use the same buffer. auto tensorType = origResult->getType().cast(); Value* newResult = nullptr; - Attribute bufferIdAttr = getBufferId(op); + auto bufferInfo = m_memAnalysis->getBufferInfo(op); Type memRefType = typeConverter.convertType(tensorType); - Value* bufferValue = nullptr; - if (!bufferIdAttr) + + if (!bufferInfo.isValid()) { // Allocate new memref newResult = createTempMemref(memRefType, nullptr, 0, rewriter); } else { - unsigned bufferId = bufferIdAttr.cast().getInt(); + unsigned bufferId = bufferInfo.m_bufferId; + unsigned offset = bufferInfo.m_offset; // Re-use a buffer if it exist, else create a new one and update map IdToMemRefMap::iterator it = m_id_to_memref.find(bufferId); if (it == m_id_to_memref.end()) { // create a new buffer - bufferValue = createTempBuffer(memRefType, rewriter); + bufferValue = createTempBuffer(bufferId, rewriter); m_id_to_memref[bufferId] = bufferValue; } else @@ -361,7 +365,7 @@ namespace bufferValue = it->second; } // Create a temp view over the linear buffer - newResult = createTempMemref(memRefType, bufferValue, 0, rewriter); + newResult = createTempMemref(memRefType, bufferValue, offset, rewriter); } NGRAPH_CHECK(newResult != nullptr, "Temp memref value is not set"); newResults.push_back(newResult); @@ -370,18 +374,17 @@ namespace return newResults; } - Value* DialectLoweringPass::createTempBuffer(Type type, PatternRewriter& rewriter) + Value* DialectLoweringPass::createTempBuffer(int bufferId, PatternRewriter& rewriter) { - MemRefType memRefType = type.cast(); - - NGRAPH_CHECK(memRefType.hasStaticShape(), "Dynamic shapes are not supported"); - - // deduce linear buffer shape - unsigned sizeInBytes = memRefType.getSizeInBits() / 8; + unsigned sizeInBytes = getMemAnalysis()->getBufferSize(bufferId); + NGRAPH_CHECK(bufferId >= 0, "Invalid buffer id to allocate"); + NGRAPH_CHECK(sizeInBytes > 0, "Zero buffer allocation?"); + LLVM_DEBUG(llvm::dbgs() << "Allocating buffer of size " << sizeInBytes << " bytes\n"); MemRefType bufferType = - MemRefType::get({sizeInBytes}, IntegerType::get(8, type.getContext()), {}); + MemRefType::get({sizeInBytes}, IntegerType::get(8, rewriter.getContext()), {}); + // TODO: Set alignment Value* alloc = rewriter.create(rewriter.getUnknownLoc(), bufferType); memRefsToDealloc.push_back(alloc); @@ -404,7 +407,6 @@ namespace unsigned offset, PatternRewriter& rewriter) { - NGRAPH_CHECK(offset == 0, "Only zero offset is supported"); MemRefType memRefType = type.cast(); if (buffer) { @@ -414,7 +416,7 @@ namespace // linear // buffer // This is simply (d0, d1, d2, .. dN-1) --> d0 * S0 + d1 * S1 ... + dN-1 * SN-1 - // Where Si is the stride along the i_th dimension + // Where Si is the stride along the i_th dimension in elements auto shape = memRefType.getShape(); SmallVector strides(shape.size(), 0); strides[shape.size() - 1] = 1; @@ -1503,6 +1505,71 @@ namespace } NGRAPH_UNREACHABLE("Unsupported type"); } + + // Given a concat op, it will check if dst and operands have + // a valid buffer/offset assignment that will make this op + // valid in-place + bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass) + { + NGRAPH_CHECK(isa(op), "Expecting concat operation"); + auto concat = cast(op); + auto concatAxis = concat.concatenation_axis(); + auto result = concat.getResult(); + auto shape = (result->getType().cast()).getShape(); + auto memAnalysis = pass.getMemAnalysis(); + BufferInfo bufferInfo = memAnalysis->getBufferInfo(op); + + if (!bufferInfo.isValid()) + { + // no buffer assignment to dst, nothing to do + return false; + } + + auto dstBufferId = bufferInfo.m_bufferId; + auto dstOffset = bufferInfo.m_offset; + + LLVM_DEBUG(llvm::dbgs() << ">> Check in-place concat\n"); + LLVM_DEBUG(op->dump()); + for (auto i = 0; i < shape.size(); i++) + { + if (i == concatAxis) + { + break; + } + if (shape[i] != 1) + { + LLVM_DEBUG(llvm::dbgs() << "Axis FAIL. Skipping instruction\n"); + return false; + } + } + LLVM_DEBUG(llvm::dbgs() << "Axis OK\n"); + + // Check if the buffer id and offsets are consistent with what's exepcted + LLVM_DEBUG(llvm::dbgs() << "Dst (id, offset) = (" << dstBufferId << ", " << dstOffset + << ")\n"); + // relative offset in the buffer + int opndOffset = 0; + for (auto opnd : op->getOperands()) + { + bufferInfo = memAnalysis->getBufferInfo(opnd->getDefiningOp()); + auto srcBufferId = bufferInfo.m_bufferId; + auto srcOffset = bufferInfo.m_offset; + LLVM_DEBUG(llvm::dbgs() << "Src (id, offset) = (" << srcBufferId << ", " << srcOffset + << ")\n"); + if (!bufferInfo.isValid() || srcBufferId != dstBufferId || + srcOffset != (opndOffset + dstOffset)) + { + // mismatch in buffer IDs or offsets + LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets FAIL. Skipping instruction\n"); + return false; + } + auto tensorType = opnd->getType().cast(); + opndOffset += tensorType.getNumElements(); + } + LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets OK\n"); + + return true; + } } // namespace namespace mlir diff --git a/src/contrib/mlir/backend/pass/memory_optimization.cpp b/src/contrib/mlir/backend/pass/memory_optimization.cpp deleted file mode 100644 index 004dfd1a7d8..00000000000 --- a/src/contrib/mlir/backend/pass/memory_optimization.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -// NOTE: This file follows nGraph format style and MLIR naming convention since it does -// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. - -#include "contrib/mlir/core/compiler.hpp" -#include "contrib/mlir/core/ngraph_dialect/ops.hpp" -#include "contrib/mlir/core/ngraph_dialect/type.hpp" - -#include "ngraph/assertion.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// anonymous namespace -// no need to expose any of the following outside of this file -namespace -{ - using namespace ngraph::runtime; - using namespace ngraph::runtime::ngmlir; - using namespace mlir; - - /// Memory Optimization pass - /// - Tries to perform operations in place where applicable by assigning a virtual buffer ID - /// to values. Those are used later in affine lowering pass to create or re-use memrefs - class MemoryOptimizationPass : public mlir::FunctionPass - { - public: - MemoryOptimizationPass() - { - m_inplaceOps = { -#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE}, -#include "contrib/mlir/backend/pass/op_lowerers.inc" - }; - } - void runOnFunction() override; - - private: - bool isSafeInPlace(mlir::Operation* op); - std::unordered_map m_inplaceOps; - static unsigned bufferId; - }; - - unsigned MemoryOptimizationPass::bufferId = 0; - - void MemoryOptimizationPass::runOnFunction() - { - auto f = getFunction(); - - f.walk([&](mlir::Operation* op) { - if (!isSafeInPlace(op)) - { - return; - } - - if (op->getNumResults() > 1) - { - return; - } - - auto defVal = op->getResult(0); - - // If the defined value is an output of the sub-graph, cannot do it in place - for (auto use = defVal->use_begin(); use != defVal->use_end(); use++) - { - auto useOp = use->getOwner(); - if (isa(useOp)) - { - return; - } - } - - // Check if we can re-use the buffer of any of the inputs. Conjunction of the following: - // - single use value or all uses in the current op - // - not an input argument - - // TODO: Check instead if last post-dominating (dataflow-wise) use. - for (auto opnd = op->operand_begin(); opnd != op->operand_end(); opnd++) - { - auto val = *opnd; - // we optimize if the val has one use or if all uses are in the current op - bool optimize; - - optimize = val->hasOneUse(); - - if (!optimize) - { - optimize = true; - // check if all uses are in the current op - for (auto use = val->use_begin(); use != val->use_end(); use++) - { - if (use->getOwner() != op) - { - optimize = false; - } - } - } - - if (optimize) - { - // do we have a buffer id attached to this value - auto defOp = val->getDefiningOp(); - // If no defining op, then this is a block arg, skip operand - if (!defOp) - { - continue; - } - IntegerAttr attr = getBufferId(defOp); - - if (!attr) - { - // attach a new buffer id - attr = setBufferId(defOp, this->bufferId++); - } - // propagate attribute to dst, and we are done - setBufferId(op, attr); - - return; - } - } - }); - } - - bool MemoryOptimizationPass::isSafeInPlace(mlir::Operation* op) - { - auto it = m_inplaceOps.find(op->getName().getStringRef().str()); - return it != m_inplaceOps.end() ? it->second : false; - } -} - -namespace mlir -{ - std::unique_ptr createMemoryOptimizationPass() - { - return std::make_unique(); - } -} // namespace mlir diff --git a/src/contrib/mlir/backend/pass/memory_optimization.hpp b/src/contrib/mlir/backend/pass/memory_optimization.hpp deleted file mode 100644 index b4041b40b8e..00000000000 --- a/src/contrib/mlir/backend/pass/memory_optimization.hpp +++ /dev/null @@ -1,27 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -// NOTE: This file follows nGraph format style and MLIR naming convention since it does -// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API. - -#pragma once - -#include - -namespace mlir -{ - std::unique_ptr createMemoryOptimizationPass(); -} diff --git a/src/contrib/mlir/backend/pass/op_lowerers.inc b/src/contrib/mlir/backend/pass/op_lowerers.inc index bbf39097640..d906d8de2b4 100644 --- a/src/contrib/mlir/backend/pass/op_lowerers.inc +++ b/src/contrib/mlir/backend/pass/op_lowerers.inc @@ -27,7 +27,7 @@ MLIR_OP(NGAddOp , true ) MLIR_OP(NGArgMaxRedOp , false ) MLIR_OP(NGArgMinRedOp , false ) -MLIR_OP(NGConcatOp , false ) +MLIR_OP(NGConcatOp , true ) MLIR_OP(NGConvolutionOp , false ) MLIR_OP(NGDivOp , true ) MLIR_OP(NGDotOp , false ) diff --git a/src/contrib/mlir/core/ngraph_dialect/ops.cpp b/src/contrib/mlir/core/ngraph_dialect/ops.cpp index 0b646e854b5..7e6509a42c1 100644 --- a/src/contrib/mlir/core/ngraph_dialect/ops.cpp +++ b/src/contrib/mlir/core/ngraph_dialect/ops.cpp @@ -309,28 +309,6 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op) return mlir::success(); } -static std::string getBufferIdAttrName() -{ - return "ng.buffer_id"; -} - -void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr) -{ - op->setAttr(getBufferIdAttrName(), attr); -} - -mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val) -{ - auto attr = mlir::IntegerAttr::get(IntegerType::get(32, op->getContext()), val); - setBufferId(op, attr); - return attr; -} - -mlir::IntegerAttr getBufferId(mlir::Operation* op) -{ - return op->getAttrOfType(getBufferIdAttrName()); -} - namespace mlir { #include "ops_interfaces.cpp.inc" diff --git a/src/contrib/mlir/core/ngraph_dialect/ops.hpp b/src/contrib/mlir/core/ngraph_dialect/ops.hpp index bef4d1c1fe7..4066daa14ad 100644 --- a/src/contrib/mlir/core/ngraph_dialect/ops.hpp +++ b/src/contrib/mlir/core/ngraph_dialect/ops.hpp @@ -41,7 +41,3 @@ namespace mlir #include "ops.h.inc" #undef GET_OP_CLASSES } - -void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr); -mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val); -mlir::IntegerAttr getBufferId(mlir::Operation* op); diff --git a/test/backend/concat.in.cpp b/test/backend/concat.in.cpp index 4fb2c7a38ae..46f0be51ce7 100644 --- a/test/backend/concat.in.cpp +++ b/test/backend/concat.in.cpp @@ -349,6 +349,143 @@ NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor) (vector{3, 7, 2}), read_vector(result), MIN_FLOAT_TOLERANCE_BITS)); } +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_1) +{ + Shape shape{1, 2, 2}; + Shape shape_r{1, 4, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto add1 = make_shared(A, B); + auto add2 = make_shared(A, B); + auto concat = make_shared(NodeVector{add1, add2}, 1); + auto f = make_shared(make_shared(concat, concat), ParameterVector{A, B}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{1, 1, 1, 1}); + + auto result = backend->create_tensor(element::f32, shape_r); + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a, b}); + vector expected; + expected.resize(8, 4); + + EXPECT_TRUE(test::all_close_f(expected, read_vector(result), MIN_FLOAT_TOLERANCE_BITS)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_2) +{ + Shape shape{1, 2, 2}; + Shape shape_r{1, 8, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto add1 = make_shared(A, B); + auto add2 = make_shared(A, B); + auto concat1 = make_shared(NodeVector{add1, add2}, 1); + auto concat2 = make_shared(NodeVector{add1, add2}, 1); + auto concat12 = make_shared(NodeVector{concat1, concat2}, 1); + auto f = make_shared(make_shared(concat12, concat12), ParameterVector{A, B}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape_r); + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a, b}); + vector expected; + expected.resize(16, 4); + + EXPECT_TRUE(test::all_close_f(expected, read_vector(result), MIN_FLOAT_TOLERANCE_BITS)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_3) +{ + Shape shape{1, 2, 2}; + Shape shape_r{1, 16, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto concat1 = make_shared(NodeVector{A, B}, 1); + auto concat2 = make_shared(NodeVector{A, B}, 1); + auto concat3 = make_shared(NodeVector{A, B}, 1); + auto concat4 = make_shared(NodeVector{A, B}, 1); + auto concat12 = make_shared(NodeVector{concat1, concat2}, 1); + auto concat34 = make_shared(NodeVector{concat3, concat4}, 1); + auto concat14 = make_shared(NodeVector{concat12, concat34}, 1); + auto f = make_shared(make_shared(concat14, concat14), ParameterVector{A, B}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape_r); + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a, b}); + vector expected; + expected.resize(32, 2); + + EXPECT_TRUE(test::all_close_f(expected, read_vector(result), MIN_FLOAT_TOLERANCE_BITS)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat) +{ + Shape shape{2, 2}; + Shape shape_r{4, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto add1 = make_shared(A, B); + auto add2 = make_shared(add1, add1); + auto concat = make_shared(NodeVector{add1, add2}, 0); + auto add3 = make_shared(concat, concat); + auto f = make_shared(add3, ParameterVector{A, B}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape_r); + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a, b}); + vector expected = {4, 4, 4, 4, 8, 8, 8, 8}; + EXPECT_TRUE(test::all_close_f(expected, read_vector(result), MIN_FLOAT_TOLERANCE_BITS)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat_2) +{ + Shape shape{1, 2, 2}; + Shape shape_r{1, 6, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto add1 = make_shared(A, B); + auto add2 = make_shared(A, B); + auto add3 = make_shared(A, B); + auto add4 = make_shared(A, B); + auto add5 = make_shared(A, B); + + auto concat1 = make_shared(NodeVector{add1, add2, add3}, 1); + + auto concat2 = make_shared(NodeVector{add4, add2, add5}, 1); + + auto add6 = make_shared(concat1, concat2); + auto f = make_shared(add6, ParameterVector{A, B}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape_r); + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a, b}); + vector expected = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; + EXPECT_TRUE(test::all_close_f(expected, read_vector(result), MIN_FLOAT_TOLERANCE_BITS)); +} // from numpy import * // a=linspace(1,2*3*4*3*2,2*3*4*3*2) // b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2) diff --git a/test/mlir/affine_conversion/memory_opt.mlir b/test/mlir/affine_conversion/memory_opt.mlir new file mode 100644 index 00000000000..147807ea5ad --- /dev/null +++ b/test/mlir/affine_conversion/memory_opt.mlir @@ -0,0 +1,128 @@ +// RUN: ngraph-opt %s --split-input-file --ngraph-memory-opt --ngraph-memory-opt-concat --ngraph-memory-opt-eltwise -convert-ngraph-to-affine | FileCheck %s + +// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1) +// CHECK-LABEL: test0 +// CHECK: %[[B:.*]] = alloc() : memref<16xi8> +// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]> +// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]> +// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]> +// CHECK: dealloc %[[B]] : memref<16xi8> +func @test0(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> { + %0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> + %1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> + %2 = "ng.add"(%1, %1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> + %3 = "ng.add"(%2, %2) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> + "ng.return"(%3) : (!ng.tensor<2x2xf32>) -> () +} + +// ----- + +// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1) +// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1 + 4) +// CHECK-LABEL: test1 +// CHECK: %[[B:.*]] = alloc() : memref<32xi8> +// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP0]]> +// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP1]]> +// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<4x2xf32, #[[MAP0]]> +// CHECK: dealloc %[[B]] : memref<32xi8> +func @test1(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32> { + %0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> + %1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> + %2 = "ng.concat"(%0, %1) {concatenation_axis = 0} : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32> + %3 = "ng.add"(%2, %2) : (!ng.tensor<4x2xf32>, !ng.tensor<4x2xf32>) -> !ng.tensor<4x2xf32> + "ng.return"(%3) : (!ng.tensor<4x2xf32>) -> () +} + +// ----- + +// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2) +// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4) +// CHECK-DAG: #[[MAP2:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2) +// CHECK-DAG: #[[MAP3:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2) +// CHECK-LABEL: test2 +// CHECK: %[[B1:.*]] = alloc() : memref<32xi8> +// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP0]]> +// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP1]]> +// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x4x2xf32, #[[MAP2]]> +// CHECK: %[[B2:.*]] = alloc() : memref<64xi8> +// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]> +// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]> +func @test2(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>){ + %0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + %1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + // inplace + %2 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32> + // cannot be done inplace, %3 and %2 cannot alias + %3 = "ng.concat"(%0, %1, %2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32> + // inplace destructive. %3 and %2 cannot alias + %4 = "ng.add"(%3, %3) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32> + + // no inplace, result is output + %5 = "ng.add"(%2, %2) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x4x2xf32> + // no inplace, result is output + %6 = "ng.add"(%4, %4) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32> + "ng.return"(%5, %6) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>) -> () +} + +// ----- + +// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2) +// CHECK-DAG: #[[MAP8:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 8) +// CHECK-DAG: #[[MAP9:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 16) +// CHECK-DAG: #[[MAP10:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 24) +// CHECK-DAG: #[[MAP11:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2) +// CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2 + 16) +// CHECK-DAG: #[[MAP13:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2) +// CHECK-LABEL: test3 +// CHECK: %[[B:.*]] = alloc() : memref<128xi8> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP0]]> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP8]]> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP9]]> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP10]]> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP11]]> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP12]]> +// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x16x2xf32, #[[MAP13]]> +// CHECK: dealloc %[[B]] : memref<128xi8> +func @test3(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x16x2xf32> { + %0 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32> + %1 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32> + %2 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32> + %3 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32> + %4 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32> + %5 = "ng.concat"(%2, %3) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32> + %6 = "ng.concat"(%4, %5) {concatenation_axis = 1} : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x16x2xf32> + %7 = "ng.add"(%6, %6) : (!ng.tensor<1x16x2xf32>, !ng.tensor<1x16x2xf32>) -> !ng.tensor<1x16x2xf32> + "ng.return"(%7) : (!ng.tensor<1x16x2xf32>) -> () +} + +// ----- + +//CHECK-DAG: #[[MAP4:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4) +//CHECK-DAG: #[[MAP5:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2) +//CHECK-DAG: #[[MAP6:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 8) +//CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 12 + d1 * 2 + d2) +// CHECK-LABEL: test4 +//CHECK: %[[B1:.*]] = alloc() : memref<1x2x2xf32> +//CHECK: %[[B2:.*]] = alloc() : memref<48xi8> +//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP4]]> +//CHECK: %[[B3:.*]] = alloc() : memref<1x2x2xf32> +//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP5]]> +//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP6]]> +//CHECK: %[[B4:.*]] = alloc() : memref<1x6x2xf32> +//CHECK: std.view %1[][] : memref<48xi8> to memref<1x6x2xf32, #[[MAP12]]> +//CHECK: dealloc %[[B1]] : memref<1x2x2xf32> +//CHECK: dealloc %[[B2]] : memref<48xi8> +//CHECK: dealloc %[[B3]] : memref<1x2x2xf32> +//CHECK: dealloc %[[B4]] : memref<1x6x2xf32> +func @test4(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x8x2xf32> { + %S0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + %S1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + %S2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + %R0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + %R2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32> + // pre-existing assignment of S1 in %D2 prevents assignment for %D1 concat + %D1 = "ng.concat"(%S0, %S1, %S2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32> + %D2 = "ng.concat"(%R0, %S1, %R2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32> + %D3 = "ng.add"(%D1, %D2) : (!ng.tensor<1x6x2xf32>, !ng.tensor<1x6x2xf32>) -> !ng.tensor<1x6x2xf32> + "ng.return"(%D3) : (!ng.tensor<1x6x2xf32>) -> () +} From f08372ba99bec78804d8b4ab2ce221603e4a9cd4 Mon Sep 17 00:00:00 2001 From: Tomasz Socha Date: Tue, 14 Jan 2020 06:38:21 +0100 Subject: [PATCH 09/12] [PY] Add flag to compile python API in debug mode (#4052) * [PY] Add flag to compile python API in debug mode * Move debug and release flags to helper function * Add missing Docstring Co-authored-by: Sang Ik Lee --- python/setup.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index b0d08f82e9a..5ff0dff4431 100644 --- a/python/setup.py +++ b/python/setup.py @@ -26,6 +26,7 @@ PYNGRAPH_ROOT_DIR = os.path.abspath(os.path.dirname(__file__)) NGRAPH_DEFAULT_INSTALL_DIR = os.environ.get('HOME') NGRAPH_ONNX_IMPORT_ENABLE = os.environ.get('NGRAPH_ONNX_IMPORT_ENABLE') +NGRAPH_PYTHON_DEBUG = os.environ.get('NGRAPH_PYTHON_DEBUG') def find_ngraph_dist_dir(): @@ -367,6 +368,13 @@ def _add_extra_compile_arg(self, flag, compile_args): return True return False + def add_debug_or_release_flags(self): + """Return compiler flags for Release and Debug build types.""" + if NGRAPH_PYTHON_DEBUG in ['TRUE', 'ON', True]: + return ['-O0', '-g'] + else: + return ['-O2', '-D_FORTIFY_SOURCE=2'] + def build_extensions(self): """Build extension providing extra compiler flags.""" if sys.platform == 'win32': @@ -388,7 +396,8 @@ def build_extensions(self): add_platform_specific_link_args(ext.extra_link_args) ext.extra_compile_args += ['-Wformat', '-Wformat-security'] - ext.extra_compile_args += ['-O2', '-D_FORTIFY_SOURCE=2'] + ext.extra_compile_args += self.add_debug_or_release_flags() + if sys.platform == 'darwin': ext.extra_compile_args += ['-stdlib=libc++'] build_ext.build_extensions(self) From 9bfbd3c6ee3edb8873954332a64dc636a2ad977a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= Date: Tue, 14 Jan 2020 15:25:19 +0100 Subject: [PATCH 10/12] [ONNX] Extended support for provenance tags (#4154) * Checking if provenance_tags key exists * Add provenance tag prototype * Format provenance tag * Display provenance tag * Clean debug printing * Add const to variables * Separate method for add provenance tags * Return NodeVector reference * Return const NodeVector * Moved add_provenance_tags function to commons * Style apply * Simple model for tests * Provenance tag test * Expect substring instead of equal * Add provenance tags to intermediate nodes recursively * One tag per node * Add traverse node args instead of recursion * Return NodeVector instead of set of pointers * Use treverse_nodes and lambda function * Remove unused helper functions * Remove is_constant() condition * Update test model prototxt * Update test substring * Use node name and output names to build provenance tags in onnx importer * Unit tests for onnx_importer provenance tags * Missing include * Add provenance tags to constants buit from ONNX initializers * Add provenance tags to Constants and Parameters created out of ONNX inputs and initializers * More strict assertions in onnx provenance tests * Unit test for onnx importer Parameter nodes tagging * Helper function for the onnx provenance tests * Some docs * Obsolete comment removal * Separate file for onnx provenance tags unit tests * Code formatting * Move the inputs tagging to the Graph class * Tagging moved to the Graph class entirely * Missing include and extra helper variable * Unit tests helper documentation * Change the UT helper to lowercase Co-authored-by: Katarzyna Mitrus --- .../frontend/onnx_import/core/graph.cpp | 77 ++++++++++++++- .../frontend/onnx_import/core/graph.hpp | 10 ++ .../frontend/onnx_import/utils/common.cpp | 18 ---- .../frontend/onnx_import/utils/common.hpp | 2 - test/CMakeLists.txt | 1 + .../onnx/provenance_input_tags.prototxt | 45 +++++++++ .../provenance_multiple_outputs_op.prototxt | 77 +++++++++++++++ .../provenance_node_name_and_outputs.prototxt | 51 ++++++++++ .../onnx/provenance_only_outputs.prototxt | 50 ++++++++++ test/onnx/onnx_import.in.cpp | 15 --- test/onnx/onnx_import_provenance.in.cpp | 95 +++++++++++++++++++ 11 files changed, 402 insertions(+), 39 deletions(-) create mode 100644 test/models/onnx/provenance_input_tags.prototxt create mode 100644 test/models/onnx/provenance_multiple_outputs_op.prototxt create mode 100644 test/models/onnx/provenance_node_name_and_outputs.prototxt create mode 100644 test/models/onnx/provenance_only_outputs.prototxt create mode 100644 test/onnx/onnx_import_provenance.in.cpp diff --git a/src/ngraph/frontend/onnx_import/core/graph.cpp b/src/ngraph/frontend/onnx_import/core/graph.cpp index dfa2772b200..f79d781e645 100644 --- a/src/ngraph/frontend/onnx_import/core/graph.cpp +++ b/src/ngraph/frontend/onnx_import/core/graph.cpp @@ -15,6 +15,8 @@ //***************************************************************************** #include +#include +#include #include "graph.hpp" #include "node.hpp" @@ -57,6 +59,37 @@ namespace ngraph std::string domain = get_node_domain(node_proto); return (domain.empty() ? "" : domain + ".") + node_proto.op_type(); } + + static std::string concat_strings( + const std::vector>& strings) + { + const auto concat_with_comma = + [](const std::string& accumulator, + std::reference_wrapper next_string) { + return accumulator + ", " + next_string.get(); + }; + + return std::accumulate( + strings.begin() + 1, strings.end(), strings.begin()->get(), concat_with_comma); + } + + static std::string build_input_provenance_tag(const std::string& input_name, + const Shape& shape) + { + std::stringstream tag_builder; + tag_builder << ""; + return tag_builder.str(); + } + + static std::string build_op_provenance_tag(const Node& onnx_node) + { + const auto output_names = concat_strings(onnx_node.get_output_names()); + const auto node_name = + onnx_node.get_name().empty() ? "" : onnx_node.get_name() + " "; + + return std::string{" " + + output_names + ")>"}; + } } // namespace detail Graph::Graph(const onnx::GraphProto& graph_proto, Model& model, const Weights& weights) @@ -72,7 +105,9 @@ namespace ngraph m_initializers.emplace(initializer_tensor.name(), tensor); // For each initializer, create a Constant node and store in cache - m_ng_node_cache.emplace(initializer_tensor.name(), tensor.get_ng_constant()); + auto ng_constant = tensor.get_ng_constant(); + add_provenance_tag_to_initializer(tensor, ng_constant); + m_ng_node_cache.emplace(initializer_tensor.name(), std::move(ng_constant)); } } @@ -87,8 +122,10 @@ namespace ngraph continue; } - m_ng_node_cache[input.name()] = - m_inputs.back().get_ng_node(m_parameters, m_initializers, weights); + const auto value_info = m_inputs.back(); + auto ng_node = value_info.get_ng_node(m_parameters, m_initializers, weights); + add_provenance_tag_to_input(value_info, ng_node); + m_ng_node_cache[input.name()] = std::move(ng_node); } // Process all graph outputs @@ -160,11 +197,43 @@ namespace ngraph { const auto ng_node_factory = m_model->get_operator(onnx_node.op_type(), onnx_node.domain()); + const auto ng_node_vector = ng_node_factory(onnx_node); - common::add_provenance_tags(onnx_node, ng_node_vector); + add_provenance_tags(onnx_node, ng_node_vector); + return ng_node_vector; } + void Graph::add_provenance_tag_to_initializer( + const Tensor& tensor, std::shared_ptr node) const + { + const std::string tag = + detail::build_input_provenance_tag(tensor.get_name(), tensor.get_shape()); + + node->add_provenance_tag(tag); + } + + void Graph::add_provenance_tag_to_input(const ValueInfo& input, + std::shared_ptr node) const + { + const std::string tag = + detail::build_input_provenance_tag(input.get_name(), input.get_shape()); + + node->add_provenance_tag(tag); + } + + void Graph::add_provenance_tags(const Node& onnx_node, + const NodeVector& ng_node_vector) const + { + const auto tag = detail::build_op_provenance_tag(onnx_node); + const auto ng_inputs = onnx_node.get_ng_inputs(); + + ngraph::traverse_nodes( + ng_node_vector, + [&tag](std::shared_ptr ng_node) { ng_node->add_provenance_tag(tag); }, + false, + ng_inputs); + } } // namespace onnx_import } // namespace ngraph diff --git a/src/ngraph/frontend/onnx_import/core/graph.hpp b/src/ngraph/frontend/onnx_import/core/graph.hpp index 3fb5cdb0b62..c24924d91eb 100644 --- a/src/ngraph/frontend/onnx_import/core/graph.hpp +++ b/src/ngraph/frontend/onnx_import/core/graph.hpp @@ -20,6 +20,7 @@ #include #include +#include "default_opset.hpp" #include "model.hpp" #include "ngraph/op/parameter.hpp" #include "operator_set.hpp" @@ -46,6 +47,15 @@ namespace ngraph const std::string& get_name() const { return m_graph_proto->name(); } NodeVector make_ng_nodes(const Node& onnx_node) const; + protected: + void add_provenance_tag_to_initializer( + const Tensor& initializer, std::shared_ptr node) const; + + void add_provenance_tag_to_input(const ValueInfo& input, + std::shared_ptr node) const; + + void add_provenance_tags(const Node& onnx_node, const NodeVector& ng_node_vector) const; + private: const onnx::GraphProto* m_graph_proto; std::vector m_nodes; diff --git a/src/ngraph/frontend/onnx_import/utils/common.cpp b/src/ngraph/frontend/onnx_import/utils/common.cpp index 261c750e907..a4e5e1915fd 100644 --- a/src/ngraph/frontend/onnx_import/utils/common.cpp +++ b/src/ngraph/frontend/onnx_import/utils/common.cpp @@ -28,24 +28,6 @@ namespace ngraph { namespace common { - const NodeVector& add_provenance_tags(const Node& onnx_node, - const NodeVector& ng_node_vector) - { - const std::string node_name = - onnx_node.get_name().empty() ? "unnamed node" : onnx_node.get_name(); - const std::string provenance_tag = - ""; - - auto ng_inputs = onnx_node.get_ng_inputs(); - ngraph::traverse_nodes(ng_node_vector, - [&](std::shared_ptr ng_node) { - ng_node->add_provenance_tag(provenance_tag); - }, - false, - ng_inputs); - return ng_node_vector; - } - const ngraph::element::Type& get_ngraph_element_type(int64_t onnx_type) { switch (onnx_type) diff --git a/src/ngraph/frontend/onnx_import/utils/common.hpp b/src/ngraph/frontend/onnx_import/utils/common.hpp index 5fbb871e2da..8189cf1f591 100644 --- a/src/ngraph/frontend/onnx_import/utils/common.hpp +++ b/src/ngraph/frontend/onnx_import/utils/common.hpp @@ -38,8 +38,6 @@ namespace ngraph { namespace common { - const NodeVector& add_provenance_tags(const Node& onnx_node, - const NodeVector& ng_node_vector); const ngraph::element::Type& get_ngraph_element_type(std::int64_t onnx_type); /// \brief Return a monotonic sequence. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2921766d5ac..7833ea27469 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -498,6 +498,7 @@ if (NGRAPH_ONNX_IMPORT_ENABLE) list(APPEND MULTI_TEST_SRC onnx/onnx_import.in.cpp onnx/onnx_import_convpool.in.cpp + onnx/onnx_import_provenance.in.cpp onnx/onnx_import_reshape.in.cpp onnx/onnx_import_rnn.in.cpp onnx/onnx_import_quant.in.cpp) diff --git a/test/models/onnx/provenance_input_tags.prototxt b/test/models/onnx/provenance_input_tags.prototxt new file mode 100644 index 00000000000..85ecbebf698 --- /dev/null +++ b/test/models/onnx/provenance_input_tags.prototxt @@ -0,0 +1,45 @@ +ir_version: 3 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "initializer_of_A" + input: "input_B" + output: "output_of_add" + op_type: "Add" + name: "Add_node" + } + name: "test_graph" + initializer { + dims: 0 + data_type: 7 + int64_data: 1 + name: "initializer_of_A" + } + input { + name: "input_B" + type { + tensor_type { + elem_type: 7 + shape { + dim { + } + } + } + } + } + output { + name: "output_of_add" + type { + tensor_type { + elem_type: 7 + shape { + dim { + } + } + } + } + } +} +opset_import { + version: 9 +} diff --git a/test/models/onnx/provenance_multiple_outputs_op.prototxt b/test/models/onnx/provenance_multiple_outputs_op.prototxt new file mode 100644 index 00000000000..0369588e46b --- /dev/null +++ b/test/models/onnx/provenance_multiple_outputs_op.prototxt @@ -0,0 +1,77 @@ +ir_version: 4 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "x" + input: "k" + output: "values" + output: "indices" + op_type: "TopK" + name: "TOPK" + } + name: "test_graph" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "k" + type { + tensor_type { + elem_type: 7 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "values" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "indices" + type { + tensor_type { + elem_type: 7 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 10 +} diff --git a/test/models/onnx/provenance_node_name_and_outputs.prototxt b/test/models/onnx/provenance_node_name_and_outputs.prototxt new file mode 100644 index 00000000000..784a17a4129 --- /dev/null +++ b/test/models/onnx/provenance_node_name_and_outputs.prototxt @@ -0,0 +1,51 @@ +ir_version: 3 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "input_A" + input: "input_B" + output: "output_of_add" + op_type: "Add" + name: "Add_node" + } + name: "test_graph" + input { + name: "input_A" + type { + tensor_type { + elem_type: 1 + shape { + dim { + } + } + } + } + } + input { + name: "input_B" + type { + tensor_type { + elem_type: 1 + shape { + dim { + } + } + } + } + } + output { + name: "output_of_add" + type { + tensor_type { + elem_type: 1 + shape { + dim { + } + } + } + } + } +} +opset_import { + version: 9 +} diff --git a/test/models/onnx/provenance_only_outputs.prototxt b/test/models/onnx/provenance_only_outputs.prototxt new file mode 100644 index 00000000000..b8dc775c67c --- /dev/null +++ b/test/models/onnx/provenance_only_outputs.prototxt @@ -0,0 +1,50 @@ +ir_version: 3 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "input_A" + input: "input_B" + output: "output_of_add" + op_type: "Add" + } + name: "test_graph" + input { + name: "input_A" + type { + tensor_type { + elem_type: 1 + shape { + dim { + } + } + } + } + } + input { + name: "input_B" + type { + tensor_type { + elem_type: 1 + shape { + dim { + } + } + } + } + } + output { + name: "output_of_add" + type { + tensor_type { + elem_type: 1 + shape { + dim { + } + } + } + } + } +} +opset_import { + version: 9 +} diff --git a/test/onnx/onnx_import.in.cpp b/test/onnx/onnx_import.in.cpp index 1f41f0c6102..e92cd1719dd 100644 --- a/test/onnx/onnx_import.in.cpp +++ b/test/onnx/onnx_import.in.cpp @@ -356,21 +356,6 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, model_initializer_wo_input) EXPECT_TRUE(test::all_close_f(expected_output, output.front())); } -NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tag_text) -{ - auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_tag_add.prototxt")); - - auto ng_nodes = function->get_ordered_ops(); - for (auto ng_node : ng_nodes) - { - for (auto tag : ng_node->get_provenance_tags()) - { - EXPECT_HAS_SUBSTRING(tag, "ONNX"); - } - } -} - // ############################################################################ OPERATOR TESTS NGRAPH_TEST(onnx_${BACKEND_NAME}, model_addmul_abc) { diff --git a/test/onnx/onnx_import_provenance.in.cpp b/test/onnx/onnx_import_provenance.in.cpp new file mode 100644 index 00000000000..718b425d632 --- /dev/null +++ b/test/onnx/onnx_import_provenance.in.cpp @@ -0,0 +1,95 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "gtest/gtest.h" +#include "ngraph/file_util.hpp" +#include "ngraph/frontend/onnx_import/default_opset.hpp" +#include "ngraph/frontend/onnx_import/onnx.hpp" +#include "util/test_control.hpp" +#include "util/type_prop.hpp" + +using namespace ngraph; +using namespace ngraph::onnx_import; + +static std::string s_manifest = "${MANIFEST}"; + +NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tag_text) +{ + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_tag_add.prototxt")); + + const auto ng_nodes = function->get_ordered_ops(); + for (const auto ng_node : ng_nodes) + { + for (const auto tag : ng_node->get_provenance_tags()) + { + EXPECT_HAS_SUBSTRING(tag, "ONNX"); + } + } +} + +// the NodeToCheck parameter of this template is used to find a node in the whole subgraph +// that a particular unit test is supposed to check against the expected provenance tag +template +void test_provenance_tags(const std::string& model_path, const std::string& expected_provenance_tag) +{ + const auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, model_path)); + + for (const auto ng_node : function->get_ordered_ops()) + { + if (as_type_ptr(ng_node)) + { + const auto tags = ng_node->get_provenance_tags(); + ASSERT_EQ(tags.size(), 1) << "There should be exactly one provenance tag set for " + << ng_node; + + EXPECT_EQ(*(tags.cbegin()), expected_provenance_tag); + } + } +} + +NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_only_output) +{ + // the Add node in the model does not have a name, + // only its output name should be found in the provenance tags + test_provenance_tags("onnx/provenance_only_outputs.prototxt", + " output_of_add)>"); +} + +NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_node_name_and_outputs) +{ + test_provenance_tags("onnx/provenance_node_name_and_outputs.prototxt", + " output_of_add)>"); +} + +NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_multiple_outputs_op) +{ + test_provenance_tags("onnx/provenance_multiple_outputs_op.prototxt", + " values, indices)>"); +} + +NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tagging_constants) +{ + test_provenance_tags("onnx/provenance_input_tags.prototxt", + ""); +} + +NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tagging_parameters) +{ + test_provenance_tags("onnx/provenance_input_tags.prototxt", + ""); +} From cd21bddcbfba2fe539fd1b9cc7011c7dc3df6cdc Mon Sep 17 00:00:00 2001 From: Tomasz Socha Date: Tue, 14 Jan 2020 15:59:00 +0100 Subject: [PATCH 11/12] [ONNX] Remove unnecesary Mean in opset 8 (#4053) * [ONNX] Remove unnecesary Mean in opset 8 * Use opset1 instead of v1 * Fix merge artifact * Use opset1 Divide instead of opset0 Co-authored-by: Robert Kimball --- src/ngraph/frontend/onnx_import/op/mean.cpp | 27 ++----------------- src/ngraph/frontend/onnx_import/op/mean.hpp | 6 ----- .../frontend/onnx_import/ops_bridge.cpp | 1 - 3 files changed, 2 insertions(+), 32 deletions(-) diff --git a/src/ngraph/frontend/onnx_import/op/mean.cpp b/src/ngraph/frontend/onnx_import/op/mean.cpp index 45e91b185e9..0f6925e830c 100644 --- a/src/ngraph/frontend/onnx_import/op/mean.cpp +++ b/src/ngraph/frontend/onnx_import/op/mean.cpp @@ -16,10 +16,6 @@ #include "mean.hpp" #include "default_opset.hpp" -#include "ngraph/op/add.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/op/divide.hpp" -#include "ngraph/opsets/opset0.hpp" #include "utils/variadic.hpp" namespace ngraph @@ -29,25 +25,6 @@ namespace ngraph namespace op { namespace set_1 - { - NodeVector mean(const Node& node) - { - auto sum = variadic::make_ng_variadic_op(node).front(); - auto shape = sum->get_shape(); - - // Create a Constant representing the number of inputs with the same shape as - // sum - auto count = default_opset::Constant::create( - sum->get_element_type(), - shape, - std::vector(shape_size(shape), node.get_ng_inputs().size())); - - return {sum / count}; - } - - } // namespace set_1 - - namespace set_8 { NodeVector mean(const Node& node) { @@ -61,10 +38,10 @@ namespace ngraph shape, std::vector(shape_size(shape), node.get_ng_inputs().size())); - return {sum / count}; + return {std::make_shared(sum, count)}; } - } // namespace set_8 + } // namespace set_1 } // namespace op diff --git a/src/ngraph/frontend/onnx_import/op/mean.hpp b/src/ngraph/frontend/onnx_import/op/mean.hpp index 300b31f9048..955324e4922 100644 --- a/src/ngraph/frontend/onnx_import/op/mean.hpp +++ b/src/ngraph/frontend/onnx_import/op/mean.hpp @@ -31,12 +31,6 @@ namespace ngraph } // namespace set_1 - namespace set_8 - { - NodeVector mean(const Node& node); - - } // namespace set_1 - } // namespace op } // namespace onnx_import diff --git a/src/ngraph/frontend/onnx_import/ops_bridge.cpp b/src/ngraph/frontend/onnx_import/ops_bridge.cpp index 457c558305b..ea8f8fa675c 100644 --- a/src/ngraph/frontend/onnx_import/ops_bridge.cpp +++ b/src/ngraph/frontend/onnx_import/ops_bridge.cpp @@ -301,7 +301,6 @@ namespace ngraph REGISTER_OPERATOR("Max", 1, max); REGISTER_OPERATOR("Max", 8, max); REGISTER_OPERATOR("Mean", 1, mean); - REGISTER_OPERATOR("Mean", 8, mean); REGISTER_OPERATOR("MeanVarianceNormalization", 1, mean_variance_normalization); REGISTER_OPERATOR("MeanVarianceNormalization", 9, mean_variance_normalization); REGISTER_OPERATOR("Min", 1, min); From c748bda46af4a812c880764c935324ca179299e5 Mon Sep 17 00:00:00 2001 From: Robert Kimball Date: Tue, 14 Jan 2020 10:49:16 -0800 Subject: [PATCH 12/12] Revert "Remove conditional compile (#4106)" (#4178) This reverts commit 1e58565af8d8650f9c9637a95d07052b2d1e2c8f. --- src/contrib/mlir/core/pass/ng_dialect_builder.cpp | 4 ++++ src/ngraph/builder/make_constant.hpp | 4 ++++ src/ngraph/distributed/mlsl.hpp | 4 ++++ src/ngraph/distributed/open_mpi.hpp | 8 ++++++++ src/ngraph/op/constant.cpp | 12 ++++++++++++ src/ngraph/op/constant.hpp | 4 ++++ src/ngraph/op/experimental/range.cpp | 4 ++++ src/ngraph/op/non_max_suppression.cpp | 10 +++++++--- src/ngraph/op/topk.cpp | 10 +++++++--- src/ngraph/pass/constant_folding_convert.cpp | 8 ++++++++ src/ngraph/pass/constant_folding_reverse.cpp | 4 ++++ src/ngraph/pass/dyn_elimination.cpp | 4 ++++ src/ngraph/runtime/cpu/builder/random_uniform.cpp | 4 ++++ src/ngraph/runtime/gcpu/gcpu_executable.cpp | 4 ++++ src/ngraph/runtime/interpreter/int_executable.hpp | 5 ++++- src/ngraph/serializer.cpp | 10 ++++++++++ src/tools/nbench/benchmark_utils.cpp | 4 ++++ test/backend/distributed.in.cpp | 4 ++++ 18 files changed, 100 insertions(+), 7 deletions(-) diff --git a/src/contrib/mlir/core/pass/ng_dialect_builder.cpp b/src/contrib/mlir/core/pass/ng_dialect_builder.cpp index 5656d4b4327..8f96935476e 100644 --- a/src/contrib/mlir/core/pass/ng_dialect_builder.cpp +++ b/src/contrib/mlir/core/pass/ng_dialect_builder.cpp @@ -232,9 +232,11 @@ mlir::Type NgDialectConversionPass::getMlirType(const descriptor::Tensor* tensor // Converts an nGraph element type into an MLIR type. mlir::Type NgDialectConversionPass::getMlirType(const element::Type& type) { +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (type) { @@ -259,7 +261,9 @@ mlir::Type NgDialectConversionPass::getMlirType(const element::Type& type) NGRAPH_CHECK(false, "Unreachable"); return mlir::Type(); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } mlir::Type NgDialectConversionPass::getMlirType(const ngraph::Node* node) diff --git a/src/ngraph/builder/make_constant.hpp b/src/ngraph/builder/make_constant.hpp index 2ec5e857954..15a8be8bf56 100644 --- a/src/ngraph/builder/make_constant.hpp +++ b/src/ngraph/builder/make_constant.hpp @@ -31,9 +31,11 @@ namespace ngraph { std::shared_ptr val = nullptr; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (type) { case element::Type_t::f32: @@ -97,7 +99,9 @@ namespace ngraph case element::Type_t::undefined: throw ngraph_error("make_constant: Unsupported element type 'undefined'"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif if (shape.size() > 0) { diff --git a/src/ngraph/distributed/mlsl.hpp b/src/ngraph/distributed/mlsl.hpp index 6de80e9fbaa..34d1becfbbc 100644 --- a/src/ngraph/distributed/mlsl.hpp +++ b/src/ngraph/distributed/mlsl.hpp @@ -87,9 +87,11 @@ namespace ngraph } decltype(MLSL::RT_SUM) mlsl_reduce_type; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (reduce_type) { case reduction::Type::SUM: mlsl_reduce_type = MLSL::RT_SUM; break; @@ -99,7 +101,9 @@ namespace ngraph case reduction::Type::MIN: mlsl_reduce_type = MLSL::RT_MIN; break; case reduction::Type::MAX: mlsl_reduce_type = MLSL::RT_MAX; break; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif MLSL::Environment& env = MLSL::Environment::GetEnv(); MLSL::Distribution* distribution = env.CreateDistribution(env.GetProcessCount(), 1); diff --git a/src/ngraph/distributed/open_mpi.hpp b/src/ngraph/distributed/open_mpi.hpp index 4487283d946..c8aaf7fd28a 100644 --- a/src/ngraph/distributed/open_mpi.hpp +++ b/src/ngraph/distributed/open_mpi.hpp @@ -99,9 +99,11 @@ namespace ngraph } decltype(MPI_SUM) mpi_reduce_type; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (reduce_type) { case reduction::Type::SUM: mpi_reduce_type = MPI_SUM; break; @@ -109,7 +111,9 @@ namespace ngraph case reduction::Type::MIN: mpi_reduce_type = MPI_MIN; break; case reduction::Type::MAX: mpi_reduce_type = MPI_MAX; break; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif MPI_Allreduce(in, out, count, data_type, mpi_reduce_type, MPI_COMM_WORLD); } @@ -172,9 +176,11 @@ namespace ngraph MPI_Datatype ngraph_type_to_mpi_type(element::Type_t& n_type) { MPI_Datatype m_type = MPI_FLOAT; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (n_type) { case element::Type_t::boolean: m_type = MPI_BYTE; break; @@ -194,7 +200,9 @@ namespace ngraph case element::Type_t::undefined: case element::Type_t::dynamic: throw std::runtime_error("unsupported type"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif return m_type; } diff --git a/src/ngraph/op/constant.cpp b/src/ngraph/op/constant.cpp index c5dfc37ac78..665aa5eea5a 100644 --- a/src/ngraph/op/constant.cpp +++ b/src/ngraph/op/constant.cpp @@ -120,9 +120,11 @@ op::Constant::~Constant() string op::Constant::convert_value_to_string(size_t index) const { string rc; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (get_element_type()) { case element::Type_t::boolean: rc = to_string(get_vector()[index]); break; @@ -148,7 +150,9 @@ string op::Constant::convert_value_to_string(size_t index) const case element::Type_t::undefined: throw runtime_error("unsupported type"); case element::Type_t::dynamic: throw runtime_error("unsupported type"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif return rc; } @@ -156,9 +160,11 @@ vector op::Constant::get_value_strings() const { vector rc; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (get_element_type()) { case element::Type_t::boolean: @@ -243,7 +249,9 @@ vector op::Constant::get_value_strings() const case element::Type_t::undefined: throw runtime_error("unsupported type"); case element::Type_t::dynamic: throw runtime_error("unsupported type"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif return rc; } @@ -349,9 +357,11 @@ static bool test_bitwise_identical(const op::Constant* constant) bool op::Constant::are_all_data_elements_bitwise_identical() const { bool rc = false; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (get_element_type()) { case element::Type_t::boolean: @@ -387,7 +397,9 @@ bool op::Constant::are_all_data_elements_bitwise_identical() const case element::Type_t::undefined: case element::Type_t::dynamic: break; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif return rc; } diff --git a/src/ngraph/op/constant.hpp b/src/ngraph/op/constant.hpp index ea2057460bf..028907a1c0e 100644 --- a/src/ngraph/op/constant.hpp +++ b/src/ngraph/op/constant.hpp @@ -318,9 +318,11 @@ namespace ngraph { throw std::runtime_error("Constant initializer does not match shape"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (target_type) { case element::Type_t::boolean: @@ -366,7 +368,9 @@ namespace ngraph case element::Type_t::undefined: throw std::runtime_error("unsupported type"); case element::Type_t::dynamic: throw std::runtime_error("unsupported type"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } static constexpr size_t host_alignment() { return 64; } diff --git a/src/ngraph/op/experimental/range.cpp b/src/ngraph/op/experimental/range.cpp index e431dd2d914..6e9a7d0d4c2 100644 --- a/src/ngraph/op/experimental/range.cpp +++ b/src/ngraph/op/experimental/range.cpp @@ -210,9 +210,11 @@ void op::Range::validate_and_infer_types() PartialShape result_shape; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (result_et) { case element::Type_t::bf16: result_shape = infer_output_shape(this, result_et); break; @@ -235,7 +237,9 @@ void op::Range::validate_and_infer_types() this, false, "Internal nGraph error: unsupported element type: ", result_et); break; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif set_output_type(0, result_et, result_shape); } diff --git a/src/ngraph/op/non_max_suppression.cpp b/src/ngraph/op/non_max_suppression.cpp index 156d248147f..3d6ae79304e 100644 --- a/src/ngraph/op/non_max_suppression.cpp +++ b/src/ngraph/op/non_max_suppression.cpp @@ -154,8 +154,10 @@ int64_t op::v1::NonMaxSuppression::max_boxes_output_from_input() const const auto max_output_boxes_input = as_type_ptr(input_value(2).get_node_shared_ptr()); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wswitch-enum" +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wswitch-enum" +#endif switch (static_cast(max_output_boxes_input->get_element_type())) { case element::Type_t::i8: @@ -180,7 +182,9 @@ int64_t op::v1::NonMaxSuppression::max_boxes_output_from_input() const } default: break; } -#pragma GCC diagnostic pop +#if defined(__clang__) +#pragma clang diagnostic pop +#endif return max_output_boxes; } diff --git a/src/ngraph/op/topk.cpp b/src/ngraph/op/topk.cpp index 50889e16678..19d8f858e64 100644 --- a/src/ngraph/op/topk.cpp +++ b/src/ngraph/op/topk.cpp @@ -313,8 +313,10 @@ size_t op::v1::TopK::read_k_from_constant_node(const shared_ptr& node, size_t k = 0; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wswitch-enum" +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wswitch-enum" +#endif switch (static_cast(k_element_type)) { case element::Type_t::i8: k = validate_and_get_k(k_constant); break; @@ -322,7 +324,9 @@ size_t op::v1::TopK::read_k_from_constant_node(const shared_ptr& node, case element::Type_t::i64: k = validate_and_get_k(k_constant); break; default: break; } -#pragma GCC diagnostic pop +#if defined(__clang__) +#pragma clang diagnostic pop +#endif return k; } diff --git a/src/ngraph/pass/constant_folding_convert.cpp b/src/ngraph/pass/constant_folding_convert.cpp index 1e9aaac10db..ee1597c272f 100644 --- a/src/ngraph/pass/constant_folding_convert.cpp +++ b/src/ngraph/pass/constant_folding_convert.cpp @@ -45,9 +45,11 @@ template shared_ptr fold_constant_convert_helper0(shared_ptr constant, const element::Type& output_element_type) { +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (output_element_type) { case element::Type_t::undefined: @@ -88,7 +90,9 @@ shared_ptr fold_constant_convert_helper0(shared_ptr } NGRAPH_UNREACHABLE("Unexpected switch case"); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } static shared_ptr fold_constant_convert(shared_ptr constant, @@ -101,9 +105,11 @@ static shared_ptr fold_constant_convert(shared_ptr c return constant; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (input_element_type) { case element::Type_t::undefined: @@ -144,7 +150,9 @@ static shared_ptr fold_constant_convert(shared_ptr c } NGRAPH_UNREACHABLE("Unexpected switch case"); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } void pass::ConstantFolding::construct_constant_convert() diff --git a/src/ngraph/pass/constant_folding_reverse.cpp b/src/ngraph/pass/constant_folding_reverse.cpp index 827a8ea83de..4fa2d4b0011 100644 --- a/src/ngraph/pass/constant_folding_reverse.cpp +++ b/src/ngraph/pass/constant_folding_reverse.cpp @@ -40,9 +40,11 @@ static shared_ptr fold_constant_reverse(shared_ptr c { auto& input_element_type = constant->get_output_element_type(0); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (input_element_type) { case element::Type_t::undefined: @@ -80,7 +82,9 @@ static shared_ptr fold_constant_reverse(shared_ptr c NGRAPH_UNREACHABLE("Unexpected switch case"); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } void pass::ConstantFolding::construct_constant_reverse() diff --git a/src/ngraph/pass/dyn_elimination.cpp b/src/ngraph/pass/dyn_elimination.cpp index 74c48e94338..9e58cc40660 100644 --- a/src/ngraph/pass/dyn_elimination.cpp +++ b/src/ngraph/pass/dyn_elimination.cpp @@ -385,9 +385,11 @@ void pass::DynElimination::construct_range() std::shared_ptr replacement; +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (et) { case element::Type_t::bf16: @@ -433,7 +435,9 @@ void pass::DynElimination::construct_range() NGRAPH_CHECK(false, "Internal nGraph error: unsupported element type: ", et); break; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif replace_node(range_node, replacement); return true; diff --git a/src/ngraph/runtime/cpu/builder/random_uniform.cpp b/src/ngraph/runtime/cpu/builder/random_uniform.cpp index df84ad18f11..6fc15b3776a 100644 --- a/src/ngraph/runtime/cpu/builder/random_uniform.cpp +++ b/src/ngraph/runtime/cpu/builder/random_uniform.cpp @@ -97,9 +97,11 @@ namespace ngraph throw ngraph_error("Unsupported index 2 element type"); } auto element_type = args[0].get_element_type(); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (element_type) { case element::Type_t::undefined: @@ -155,7 +157,9 @@ namespace ngraph NGRAPH_UNREACHABLE("Unexpected switch case"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif functors.emplace_back(functor); } diff --git a/src/ngraph/runtime/gcpu/gcpu_executable.cpp b/src/ngraph/runtime/gcpu/gcpu_executable.cpp index 191a46b795c..2d3c855a32c 100644 --- a/src/ngraph/runtime/gcpu/gcpu_executable.cpp +++ b/src/ngraph/runtime/gcpu/gcpu_executable.cpp @@ -127,8 +127,10 @@ bool runtime::gcpu::GCPUExecutable::call(const vectorget_output_element_type(0); break; } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif if (m_performance_counters_enabled) { diff --git a/src/ngraph/runtime/interpreter/int_executable.hpp b/src/ngraph/runtime/interpreter/int_executable.hpp index 6cf3fb01e91..56e5f11b31a 100644 --- a/src/ngraph/runtime/interpreter/int_executable.hpp +++ b/src/ngraph/runtime/interpreter/int_executable.hpp @@ -204,10 +204,11 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable // We want to check that every OP_TYPEID enumeration is included in the list. // These GCC flags enable compile-time checking so that if an enumeration // is not in the list an error is generated. +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" - // #pragma GCC diagnostic error "-Wcovered-switch-default" +#endif switch (get_typeid(node)) { case OP_TYPEID::Abs: @@ -1889,7 +1890,9 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable case OP_TYPEID::TensorIterator: case OP_TYPEID::UnknownOp: throw unsupported_op("Unsupported op '" + node.description() + "'"); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } } }; diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp index 078dc68c410..15fed5f9f47 100644 --- a/src/ngraph/serializer.cpp +++ b/src/ngraph/serializer.cpp @@ -780,9 +780,12 @@ shared_ptr JSONDeserializer::deserialize_node(json node_js) vector node_outputs = get_value>(node_js, "outputs"); OutputVectorHelper args(deserialize_output_vector(node_js["inputs"])); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +// #pragma GCC diagnostic error "-Wimplicit-fallthrough" +#endif switch (get_typeid(type_info)) { @@ -3012,7 +3015,9 @@ shared_ptr JSONDeserializer::deserialize_node(json node_js) throw runtime_error(ss.str()); } } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif for (auto& control_dep : control_deps_inputs) { @@ -3152,9 +3157,12 @@ json JSONSerializer::serialize_node(const Node& n) node["provenance_tags"] = provenance_tags; } +#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8)) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +// #pragma GCC diagnostic error "-Wimplicit-fallthrough" +#endif switch (get_typeid(type_info)) { case OP_TYPEID::Abs: { break; @@ -4661,6 +4669,8 @@ json JSONSerializer::serialize_node(const Node& n) case OP_TYPEID::UnknownOp: { break; } } +#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8)) #pragma GCC diagnostic pop +#endif return node; } diff --git a/src/tools/nbench/benchmark_utils.cpp b/src/tools/nbench/benchmark_utils.cpp index eabaf5e3899..24d085d4921 100644 --- a/src/tools/nbench/benchmark_utils.cpp +++ b/src/tools/nbench/benchmark_utils.cpp @@ -80,9 +80,11 @@ void set_denormals_flush_to_zero() void random_init(shared_ptr tensor) { element::Type et = tensor->get_element_type(); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (et) { case element::Type_t::boolean: init_int_tensor(tensor, 0, 1); break; @@ -103,7 +105,9 @@ void random_init(shared_ptr tensor) case element::Type_t::f16: default: throw runtime_error("unsupported type"); } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif } default_random_engine& get_random_engine() diff --git a/test/backend/distributed.in.cpp b/test/backend/distributed.in.cpp index 747b41607a5..49a7a7ae06c 100644 --- a/test/backend/distributed.in.cpp +++ b/test/backend/distributed.in.cpp @@ -48,9 +48,11 @@ static void test_allreduce_common(reduction::Type reduce_type) auto a = backend->create_tensor(element::f32, shape); auto result = backend->create_tensor(element::f32, shape); +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wswitch" #pragma GCC diagnostic error "-Wswitch-enum" +#endif switch (reduce_type) { case reduction::Type::SUM: @@ -81,7 +83,9 @@ static void test_allreduce_common(reduction::Type reduce_type) v[i] = i + 2; } } +#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) #pragma GCC diagnostic pop +#endif auto handle = backend->compile(f); handle->call_with_validate({result}, {a});