From 3b30799a641388ec221a2066ec8e29cee294140d Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <mateusz.bencer@intel.com>
Date: Sat, 11 Jan 2020 02:52:55 +0100
Subject: [PATCH 01/12] Add Round op (#4124)

* Added round op

* Add CPU support, unit tests

* Disable UT for PlaidML

* Update year

Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
---
 src/ngraph/CMakeLists.txt                     |  2 +
 src/ngraph/op/op_version_tbl.hpp              |  1 +
 src/ngraph/op/round.cpp                       | 34 ++++++++
 src/ngraph/op/round.hpp                       | 49 +++++++++++
 src/ngraph/ops.hpp                            |  1 +
 src/ngraph/opsets/opset0_tbl.hpp              |  1 +
 src/ngraph/pass/constant_folding_unary.cpp    |  9 +-
 src/ngraph/runtime/cpu/cpu_builder.cpp        | 16 ++++
 src/ngraph/runtime/cpu/cpu_emitter.cpp        | 15 ++++
 .../runtime/cpu/cpu_external_function.cpp     |  2 +-
 src/ngraph/runtime/cpu/kernel/round.hpp       | 39 +++++++++
 .../runtime/interpreter/int_executable.hpp    |  8 ++
 src/ngraph/runtime/plaidml/unit_test.manifest |  4 +
 src/ngraph/runtime/reference/round.hpp        | 52 ++++++++++++
 src/ngraph/serializer.cpp                     |  8 +-
 test/CMakeLists.txt                           |  1 +
 test/backend/round.in.cpp                     | 83 +++++++++++++++++++
 test/op_is.cpp                                |  9 ++
 18 files changed, 331 insertions(+), 3 deletions(-)
 create mode 100644 src/ngraph/op/round.cpp
 create mode 100644 src/ngraph/op/round.hpp
 create mode 100644 src/ngraph/runtime/cpu/kernel/round.hpp
 create mode 100644 src/ngraph/runtime/reference/round.hpp
 create mode 100644 test/backend/round.in.cpp
diff --git a/src/ngraph/CMakeLists.txt b/src/ngraph/CMakeLists.txt
index f81aaa78830..0bbe6a42d3c 100644
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -291,6 +291,8 @@ set (SRC
     op/reduce_mean.hpp
     op/reduce_sum.cpp
     op/reduce_sum.hpp
+    op/round.cpp
+    op/round.hpp
     op/quantize.cpp
     op/quantize.hpp
     op/quantized_convolution.cpp
diff --git a/src/ngraph/op/op_version_tbl.hpp b/src/ngraph/op/op_version_tbl.hpp
index 3d62d6c9c4b..0194bcd50f4 100644
--- a/src/ngraph/op/op_version_tbl.hpp
+++ b/src/ngraph/op/op_version_tbl.hpp
@@ -205,6 +205,7 @@ NGRAPH_OP(Result, ngraph::op::v0, 0)
 NGRAPH_OP(Reverse, ngraph::op::v0, 0)
 NGRAPH_OP(Reverse, ngraph::op::v1, 1)
 NGRAPH_OP(ReverseSequence, ngraph::op::v0, 0)
+NGRAPH_OP(Round, ngraph::op::v0, 0)
 NGRAPH_OP(ScalarConstantLike, ngraph::op::v0, 0)
 NGRAPH_OP(ScaleShift, ngraph::op::v0, 0)
 NGRAPH_OP(ScatterAdd, ngraph::op::v0, 0)
diff --git a/src/ngraph/op/round.cpp b/src/ngraph/op/round.cpp
new file mode 100644
index 00000000000..6a39278b491
--- /dev/null
+++ b/src/ngraph/op/round.cpp
@@ -0,0 +1,34 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/op/round.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+constexpr NodeTypeInfo op::Round::type_info;
+
+op::Round::Round(const Output<Node>& arg)
+    : UnaryElementwiseArithmetic(arg)
+{
+    constructor_validate_and_infer_types();
+}
+
+shared_ptr<Node> op::Round::copy_with_new_args(const NodeVector& new_args) const
+{
+    check_new_args_count(this, new_args);
+    return make_shared<Round>(new_args.at(0));
+}
diff --git a/src/ngraph/op/round.hpp b/src/ngraph/op/round.hpp
new file mode 100644
index 00000000000..b1c1fc917e5
--- /dev/null
+++ b/src/ngraph/op/round.hpp
@@ -0,0 +1,49 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/op/util/unary_elementwise_arithmetic.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        namespace v0
+        {
+            /// \brief Elementwise round operation.
+            class NGRAPH_API Round : public util::UnaryElementwiseArithmetic
+            {
+            public:
+                static constexpr NodeTypeInfo type_info{"Round", 0};
+                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                /// \brief Constructs a round operation.
+                Round() = default;
+
+                /// \brief Constructs a round operation. The output is round to the nearest integer
+                /// for each value. In case of halfs, the rule is to round them to the nearest even
+                /// integer.
+                ///
+                /// \param arg Node that produces the input tensor.
+                Round(const Output<Node>& arg);
+
+                virtual std::shared_ptr<Node>
+                    copy_with_new_args(const NodeVector& new_args) const override;
+            };
+        }
+        using v0::Round;
+    }
+}
diff --git a/src/ngraph/ops.hpp b/src/ngraph/ops.hpp
index f5f9dca8189..7b5b76c5849 100644
--- a/src/ngraph/ops.hpp
+++ b/src/ngraph/ops.hpp
@@ -161,6 +161,7 @@
 #include "ngraph/op/result.hpp"
 #include "ngraph/op/reverse.hpp"
 #include "ngraph/op/reverse_sequence.hpp"
+#include "ngraph/op/round.hpp"
 #include "ngraph/op/scatter_add.hpp"
 #include "ngraph/op/scatter_nd_add.hpp"
 #include "ngraph/op/select.hpp"
diff --git a/src/ngraph/opsets/opset0_tbl.hpp b/src/ngraph/opsets/opset0_tbl.hpp
index aa2de1ed469..5085235f7ad 100644
--- a/src/ngraph/opsets/opset0_tbl.hpp
+++ b/src/ngraph/opsets/opset0_tbl.hpp
@@ -173,6 +173,7 @@ NGRAPH_OP(Result, ngraph::op)
 NGRAPH_OP(Reverse, ngraph::op)
 NGRAPH_OP(ReverseSequence, ngraph::op)
 NGRAPH_OP(RNNCell, ngraph::op)
+NGRAPH_OP(Round, ngraph::op)
 NGRAPH_OP(ScalarConstantLike, ngraph::op)
 NGRAPH_OP(ScaleShift, ngraph::op)
 NGRAPH_OP(ScatterAdd, ngraph::op)
diff --git a/src/ngraph/pass/constant_folding_unary.cpp b/src/ngraph/pass/constant_folding_unary.cpp
index f6e9d7d26cb..925e981c40e 100644
--- a/src/ngraph/pass/constant_folding_unary.cpp
+++ b/src/ngraph/pass/constant_folding_unary.cpp
@@ -23,6 +23,7 @@
 #include "ngraph/op/negative.hpp"
 #include "ngraph/op/not.hpp"
 #include "ngraph/op/relu.hpp"
+#include "ngraph/op/round.hpp"
 #include "ngraph/op/sign.hpp"
 #include "ngraph/op/sqrt.hpp"
 #include "ngraph/runtime/reference/abs.hpp"
@@ -32,6 +33,7 @@
 #include "ngraph/runtime/reference/negate.hpp"
 #include "ngraph/runtime/reference/not.hpp"
 #include "ngraph/runtime/reference/relu.hpp"
+#include "ngraph/runtime/reference/round.hpp"
 #include "ngraph/runtime/reference/sign.hpp"
 #include "ngraph/runtime/reference/sqrt.hpp"
 
@@ -42,7 +44,7 @@ bool is_supported_unary_op(std::shared_ptr<Node> n)
 {
     return is_type<op::Abs>(n) || is_type<op::Ceiling>(n) || is_type<op::Floor>(n) ||
            is_type<op::Negative>(n) || is_type<op::Not>(n) || is_type<op::Relu>(n) ||
-           is_type<op::Sign>(n) || is_type<op::Sqrt>(n);
+           is_type<op::Round>(n) || is_type<op::Sign>(n) || is_type<op::Sqrt>(n);
 }
 
 template <class T>
@@ -109,6 +111,11 @@ shared_ptr<op::Constant> fold_constant_unary(shared_ptr<op::Constant> constant,
             runtime::reference::relu<T>(
                 constant->get_data_ptr<T>(), buffer.get_ptr<T>(), shape_size(out_shape));
         }
+        else if (is_type<op::Round>(unary))
+        {
+            runtime::reference::round<T>(
+                constant->get_data_ptr<T>(), buffer.get_ptr<T>(), shape_size(out_shape));
+        }
         else if (is_type<op::Sign>(unary))
         {
             runtime::reference::sign<T>(
diff --git a/src/ngraph/runtime/cpu/cpu_builder.cpp b/src/ngraph/runtime/cpu/cpu_builder.cpp
index c1ad65eabeb..2650cbb0f8d 100644
--- a/src/ngraph/runtime/cpu/cpu_builder.cpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.cpp
@@ -59,6 +59,7 @@
 #include "ngraph/op/power.hpp"
 #include "ngraph/op/relu.hpp"
 #include "ngraph/op/result.hpp"
+#include "ngraph/op/round.hpp"
 #include "ngraph/op/sign.hpp"
 #include "ngraph/op/sin.hpp"
 #include "ngraph/op/sinh.hpp"
@@ -100,6 +101,7 @@
 #include "ngraph/runtime/cpu/kernel/or.hpp"
 #include "ngraph/runtime/cpu/kernel/relu.hpp"
 #include "ngraph/runtime/cpu/kernel/result.hpp"
+#include "ngraph/runtime/cpu/kernel/round.hpp"
 #include "ngraph/runtime/cpu/kernel/sign.hpp"
 #include "ngraph/runtime/cpu/kernel/sin.hpp"
 #include "ngraph/runtime/cpu/kernel/sinh.hpp"
@@ -366,6 +368,12 @@ namespace ngraph
                 BUILD_UNARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::floor);
             }
 
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::Round)
+            {
+                BUILD_UNARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::round);
+            }
+
             template <>
             void Builder::BUILDER_DECL(ngraph::op::Negative)
             {
@@ -566,6 +574,12 @@ namespace ngraph
                 BUILD_UNARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::floor);
             }
 
+            template <>
+            NodeExecutorTy Builder::BUILDER_CF_DECL(ngraph::op::Round)
+            {
+                BUILD_UNARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::round);
+            }
+
             template <>
             NodeExecutorTy Builder::BUILDER_CF_DECL(ngraph::op::Ceiling)
             {
@@ -699,6 +713,7 @@ namespace ngraph
                 REGISTER_OP_BUILDER(Negative);
                 REGISTER_OP_BUILDER(Exp);
                 REGISTER_OP_BUILDER(Log);
+                REGISTER_OP_BUILDER(Round);
                 REGISTER_OP_BUILDER(Sqrt);
                 REGISTER_OP_BUILDER(Sign);
                 REGISTER_OP_BUILDER(Sin);
@@ -740,6 +755,7 @@ namespace ngraph
                 REGISTER_CF_BUILDER(And);
                 REGISTER_CF_BUILDER(Or);
                 REGISTER_CF_BUILDER(Xor);
+                REGISTER_CF_BUILDER(Round);
                 REGISTER_CF_BUILDER(Sign);
                 REGISTER_CF_BUILDER(Not);
                 REGISTER_CF_BUILDER(Power);
diff --git a/src/ngraph/runtime/cpu/cpu_emitter.cpp b/src/ngraph/runtime/cpu/cpu_emitter.cpp
index 1c1ce682902..20a47ba49fb 100644
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -2330,6 +2330,21 @@ namespace ngraph
                 writer.block_end();
             }
 
+            template <>
+            void CPU_Emitter::EMITTER_DECL(ngraph::op::Round)
+            {
+                (void)external_function;
+                (void)node;
+                writer.block_begin();
+                size_t element_count = out[0].get_size();
+                writer << "#pragma omp parallel for\n";
+                writer << "for (size_t i = 0; i < " << element_count << "; i++)\n";
+                writer.block_begin();
+                writer << out[0].get_name() << "[i] = round(" << args[0].get_name() << "[i]);\n";
+                writer.block_end();
+                writer.block_end();
+            }
+
             template <>
             void CPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
             {
diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
index 74428f26fa2..0d957725fc7 100644
--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -454,7 +454,7 @@ static const runtime::cpu::OpMap dispatcher{
     {TI(ngraph::op::Tile), &runtime::cpu::CPU_Emitter::emit<op::Tile>},
     {TI(ngraph::op::Gelu), &runtime::cpu::CPU_Emitter::emit<op::Gelu>},
     {TI(ngraph::op::GeluBackprop), &runtime::cpu::CPU_Emitter::emit<op::GeluBackprop>},
-};
+    {TI(ngraph::op::Round), &runtime::cpu::CPU_Emitter::emit<op::Round>}};
 
 static void
     generate_isnan_isinf_check(CodeWriter& writer,
diff --git a/src/ngraph/runtime/cpu/kernel/round.hpp b/src/ngraph/runtime/cpu/kernel/round.hpp
new file mode 100644
index 00000000000..aa7b93bd59d
--- /dev/null
+++ b/src/ngraph/runtime/cpu/kernel/round.hpp
@@ -0,0 +1,39 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/runtime/reference/round.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType>
+                void round(void* arg, void* output, size_t count, int arena)
+                {
+                    reference::round<ElementType>(static_cast<const ElementType*>(arg),
+                                                  static_cast<ElementType*>(output),
+                                                  count);
+                }
+            }
+        }
+    }
+}
diff --git a/src/ngraph/runtime/interpreter/int_executable.hpp b/src/ngraph/runtime/interpreter/int_executable.hpp
index cedca3b4b1c..d1bc86fae8f 100644
--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
@@ -96,6 +96,7 @@
 #include "ngraph/runtime/reference/result.hpp"
 #include "ngraph/runtime/reference/reverse.hpp"
 #include "ngraph/runtime/reference/reverse_sequence.hpp"
+#include "ngraph/runtime/reference/round.hpp"
 #include "ngraph/runtime/reference/scatter_add.hpp"
 #include "ngraph/runtime/reference/scatter_nd_add.hpp"
 #include "ngraph/runtime/reference/select.hpp"
@@ -1590,6 +1591,13 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable
             }
             break;
         }
+        case OP_TYPEID::Round:
+        {
+            size_t element_count = shape_size(node.get_output_shape(0));
+            reference::round<T>(
+                args[0]->get_data_ptr<const T>(), out[0]->get_data_ptr<T>(), element_count);
+            break;
+        }
         case OP_TYPEID::ScatterAdd:
         {
             if (node.get_input_element_type(1) == element::i64)
diff --git a/src/ngraph/runtime/plaidml/unit_test.manifest b/src/ngraph/runtime/plaidml/unit_test.manifest
index f58d9e7e1ef..dfc0f1bcb85 100644
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -253,6 +253,10 @@ model_cum_sum_2d_axis_input
 model_cum_sum_2d_dynamic_axis_input
 model_cum_sum_3d_exclusive_reverse
 
+# unsupported op: `Round`
+round
+round_2D
+
 # onnx tests
 model_quant_conv_linear_2d
 model_quant_conv_linear_3d
diff --git a/src/ngraph/runtime/reference/round.hpp b/src/ngraph/runtime/reference/round.hpp
new file mode 100644
index 00000000000..50949ab5e3f
--- /dev/null
+++ b/src/ngraph/runtime/reference/round.hpp
@@ -0,0 +1,52 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cmath>
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            template <typename T>
+            T round_to_nearest_even(const T arg)
+            {
+                const auto floor_arg = std::floor(arg);
+                const auto diff = arg - floor_arg;
+                if (diff < 0.5f || (diff == 0.5f && static_cast<int>(floor_arg) % 2 == 0))
+                {
+                    return floor_arg;
+                }
+                else
+                {
+                    return floor_arg + 1.0f;
+                }
+            }
+
+            template <typename T>
+            void round(const T* arg, T* out, size_t count)
+            {
+                for (size_t i = 0; i < count; ++i)
+                {
+                    out[i] = round_to_nearest_even(arg[i]);
+                }
+            }
+        }
+    }
+}
diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp
index 30c3e49b820..11ea86f4eda 100644
--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -2654,7 +2654,11 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
         }
         case OP_TYPEID::ReorgYolo: { break;
         }
-
+        case OP_TYPEID::Round:
+        {
+            node = make_shared<op::Round>(args[0]);
+            break;
+        }
         case OP_TYPEID::ScalarConstantLike:
         {
             double value = node_js.at("value").get<double>();
@@ -3553,6 +3557,8 @@ json JSONSerializer::serialize_node(const Node& n)
     }
     case OP_TYPEID::ReorgYolo: { break;
     }
+    case OP_TYPEID::Round: { break;
+    }
     case OP_TYPEID::DeformableConvolution_v1:
     {
         const auto tmp = static_cast<const op::v1::DeformableConvolution*>(&n);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 9f4fd127a07..2c19b25e704 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -350,6 +350,7 @@ set(MULTI_TEST_SRC
     backend/reshape.in.cpp
     backend/reverse_sequence.in.cpp
     backend/reverse.in.cpp
+    backend/round.in.cpp
     backend/scatter.in.cpp
     backend/select.in.cpp
     backend/shape_of.in.cpp
diff --git a/test/backend/round.in.cpp b/test/backend/round.in.cpp
new file mode 100644
index 00000000000..e18a26d0c32
--- /dev/null
+++ b/test/backend/round.in.cpp
@@ -0,0 +1,83 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/all_close.hpp"
+#include "util/all_close_f.hpp"
+#include "util/ndarray.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+static string s_manifest = "${MANIFEST}";
+
+NGRAPH_TEST(${BACKEND_NAME}, round)
+{
+    Shape shape{5};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto f = make_shared<Function>(make_shared<op::Round>(A), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a, vector<float>{0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
+    auto result = backend->create_tensor(element::f32, shape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_TRUE(test::all_close_f((vector<float>{1.0f, 2.0f, 2.0f, 2.0f, -4.0f}),
+                                  read_vector<float>(result),
+                                  MIN_FLOAT_TOLERANCE_BITS));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, round_2D)
+{
+    Shape shape{3, 5};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto f = make_shared<Function>(make_shared<op::Round>(A), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a,
+              vector<float>{0.1f,
+                            0.5f,
+                            0.9f,
+                            1.2f,
+                            1.5f,
+                            1.8f,
+                            2.3f,
+                            2.5f,
+                            2.7f,
+                            -1.1f,
+                            -1.5f,
+                            -1.9f,
+                            -2.2f,
+                            -2.5f,
+                            -2.8f});
+    auto result = backend->create_tensor(element::f32, shape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_TRUE(test::all_close_f(
+        (vector<float>{
+            0.f, 0.f, 1.f, 1.f, 2.f, 2.f, 2.f, 2.f, 3.f, -1.f, -2.f, -2.f, -2.f, -2.f, -3.f}),
+        read_vector<float>(result),
+        MIN_FLOAT_TOLERANCE_BITS));
+}
diff --git a/test/op_is.cpp b/test/op_is.cpp
index 5753b4ce38e..e14b0271e0f 100644
--- a/test/op_is.cpp
+++ b/test/op_is.cpp
@@ -1131,6 +1131,15 @@ namespace
         EXPECT_FALSE(node.is_binary_elementwise_logical());
     }
 
+    void op_is_Round()
+    {
+        op::Round node;
+        EXPECT_TRUE(node.is_unary_elementwise_arithmetic());
+        EXPECT_FALSE(node.is_binary_elementwise_arithmetic());
+        EXPECT_FALSE(node.is_binary_elementwise_comparison());
+        EXPECT_FALSE(node.is_binary_elementwise_logical());
+    }
+
     void op_is_ScalarConstantLike()
     {
         op::ScalarConstantLike node;

From 7b3b1b6cebfb4ce1fcce51828ccccfd949e6d623 Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <mateusz.bencer@intel.com>
Date: Sat, 11 Jan 2020 04:19:37 +0100
Subject: [PATCH 02/12] Change LogSoftmax to produce v1 and remove FusedOp
 (#4139)

* Removed LogSoftmax FusedOp, changed onnx to produce v1

* Code review remakrs introduced

* fix after merge from master

Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
---
 src/ngraph/CMakeLists.txt                     |  2 -
 .../frontend/onnx_import/op/log_softmax.cpp   | 16 ++++--
 src/ngraph/op/fused/log_softmax.cpp           | 54 -------------------
 src/ngraph/op/fused/log_softmax.hpp           | 54 -------------------
 src/ngraph/op/op_version_tbl.hpp              |  1 -
 src/ngraph/ops.hpp                            |  1 -
 src/ngraph/opsets/opset0_tbl.hpp              |  1 -
 .../runtime/interpreter/int_executable.hpp    |  1 -
 src/ngraph/serializer.cpp                     | 12 -----
 test/CMakeLists.txt                           |  1 -
 test/op_is.cpp                                |  9 ----
 test/type_prop/log_softmax.cpp                | 42 ---------------
 12 files changed, 11 insertions(+), 183 deletions(-)
 delete mode 100644 src/ngraph/op/fused/log_softmax.cpp
 delete mode 100644 src/ngraph/op/fused/log_softmax.hpp
 delete mode 100644 test/type_prop/log_softmax.cpp

diff --git a/src/ngraph/CMakeLists.txt b/src/ngraph/CMakeLists.txt
index 0bbe6a42d3c..d402e58ddde 100644
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -385,8 +385,6 @@ set (SRC
     op/fused/gru_cell.hpp
     op/fused/layer_norm.cpp
     op/fused/layer_norm.hpp
-    op/fused/log_softmax.cpp
-    op/fused/log_softmax.hpp
     op/fused/lstm_cell.cpp
     op/fused/lstm_cell.hpp
     op/fused/lstm_sequence.cpp
diff --git a/src/ngraph/frontend/onnx_import/op/log_softmax.cpp b/src/ngraph/frontend/onnx_import/op/log_softmax.cpp
index 5800c9351cb..457c655b47a 100644
--- a/src/ngraph/frontend/onnx_import/op/log_softmax.cpp
+++ b/src/ngraph/frontend/onnx_import/op/log_softmax.cpp
@@ -16,8 +16,9 @@
 
 #include <memory>
 
+#include "default_opset.hpp"
 #include "log_softmax.hpp"
-#include "ngraph/opsets/opset0.hpp"
+#include "ngraph/validation_util.hpp"
 
 namespace ngraph
 {
@@ -30,11 +31,16 @@ namespace ngraph
                 NodeVector log_softmax(const Node& node)
                 {
                     NodeVector inputs{node.get_ng_inputs()};
-                    auto data = inputs.at(0);
-                    auto data_shape = data->get_shape();
-                    int axis = node.get_attribute_value<int64_t>("axis", 1);
+                    const auto data = inputs.at(0);
+                    const auto data_shape = data->get_shape();
 
-                    return {std::make_shared<ngraph::opset0::LogSoftmax>(data, axis)};
+                    const auto axis = node.get_attribute_value<int64_t>("axis", 1);
+                    const auto normalized_axis =
+                        ngraph::normalize_axis(node.get_description(), axis, data_shape.size());
+
+                    const auto softmax =
+                        std::make_shared<default_opset::Softmax>(data, normalized_axis);
+                    return {std::make_shared<default_opset::Log>(softmax)};
                 }
 
             } // namespace set_1
diff --git a/src/ngraph/op/fused/log_softmax.cpp b/src/ngraph/op/fused/log_softmax.cpp
deleted file mode 100644
index 8625626cb12..00000000000
--- a/src/ngraph/op/fused/log_softmax.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-#include <numeric>
-
-#include "ngraph/op/fused/log_softmax.hpp"
-#include "ngraph/op/log.hpp"
-#include "ngraph/op/softmax.hpp"
-#include "ngraph/validation_util.hpp"
-
-using namespace std;
-using namespace ngraph;
-
-constexpr NodeTypeInfo op::LogSoftmax::type_info;
-
-op::LogSoftmax::LogSoftmax(const Output<Node>& data, int64_t axis)
-    : FusedOp({data})
-    , m_axis(axis)
-{
-    constructor_validate_and_infer_types();
-}
-
-NodeVector op::LogSoftmax::decompose_op() const
-{
-    const auto data = input_value(0);
-    const auto data_shape = data.get_shape();
-
-    auto axis = ngraph::normalize_axis(this, m_axis, data_shape.size());
-
-    std::vector<size_t> axes(data_shape.size() - axis);
-    std::iota(std::begin(axes), std::end(axes), axis);
-
-    auto softmax = std::make_shared<ngraph::op::Softmax>(data, axes);
-
-    return {std::make_shared<ngraph::op::Log>(softmax)};
-}
-
-shared_ptr<Node> op::LogSoftmax::copy_with_new_args(const NodeVector& new_args) const
-{
-    check_new_args_count(this, new_args);
-    return make_shared<LogSoftmax>(new_args.at(0), m_axis);
-}
diff --git a/src/ngraph/op/fused/log_softmax.hpp b/src/ngraph/op/fused/log_softmax.hpp
deleted file mode 100644
index 584d6a1ec54..00000000000
--- a/src/ngraph/op/fused/log_softmax.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include "ngraph/node.hpp"
-#include "ngraph/op/op.hpp"
-#include "ngraph/op/util/fused_op.hpp"
-
-namespace ngraph
-{
-    namespace op
-    {
-        namespace v0
-        {
-            /// \brief LogSoftmax operation
-            class NGRAPH_API LogSoftmax : public ngraph::op::util::FusedOp
-            {
-            public:
-                static constexpr NodeTypeInfo type_info{"LogSoftmax", 0};
-                LogSoftmax() = default;
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
-                /// \brief Constructs a LogSoftmax node.
-                ///
-                /// \param data Node that produces the first input tensor
-                /// \param axis Describes the axis of the inputs when coerced to 2D
-                LogSoftmax(const Output<Node>& data, int64_t axis);
-
-                virtual NodeVector decompose_op() const override;
-
-                virtual std::shared_ptr<Node>
-                    copy_with_new_args(const NodeVector& new_args) const override;
-
-                int64_t get_axis() const { return m_axis; }
-            protected:
-                int64_t m_axis;
-            };
-        }
-        using v0::LogSoftmax;
-    } // namespace op
-} // namespace ngraph
diff --git a/src/ngraph/op/op_version_tbl.hpp b/src/ngraph/op/op_version_tbl.hpp
index 0194bcd50f4..f2b0ab05329 100644
--- a/src/ngraph/op/op_version_tbl.hpp
+++ b/src/ngraph/op/op_version_tbl.hpp
@@ -132,7 +132,6 @@ NGRAPH_OP(Less, ngraph::op::v1, 1)
 NGRAPH_OP(LessEq, ngraph::op::v0, 0)
 NGRAPH_OP(LessEqual, ngraph::op::v1, 1)
 NGRAPH_OP(Log, ngraph::op, 0)
-NGRAPH_OP(LogSoftmax, ngraph::op::v0, 0)
 NGRAPH_OP(LogicalAnd, ngraph::op::v1, 1)
 NGRAPH_OP(LogicalNot, ngraph::op::v1, 1)
 NGRAPH_OP(LogicalOr, ngraph::op::v1, 1)
diff --git a/src/ngraph/ops.hpp b/src/ngraph/ops.hpp
index 7b5b76c5849..1727c9aedcd 100644
--- a/src/ngraph/ops.hpp
+++ b/src/ngraph/ops.hpp
@@ -98,7 +98,6 @@
 #include "ngraph/op/fused/gru_cell.hpp"
 #include "ngraph/op/fused/hard_sigmoid.hpp"
 #include "ngraph/op/fused/layer_norm.hpp"
-#include "ngraph/op/fused/log_softmax.hpp"
 #include "ngraph/op/fused/lstm_cell.hpp"
 #include "ngraph/op/fused/lstm_sequence.hpp"
 #include "ngraph/op/fused/matmul.hpp"
diff --git a/src/ngraph/opsets/opset0_tbl.hpp b/src/ngraph/opsets/opset0_tbl.hpp
index 5085235f7ad..b66861f7464 100644
--- a/src/ngraph/opsets/opset0_tbl.hpp
+++ b/src/ngraph/opsets/opset0_tbl.hpp
@@ -127,7 +127,6 @@ NGRAPH_OP(LayerNormBackprop, ngraph::op)
 NGRAPH_OP(Less, ngraph::op)
 NGRAPH_OP(LessEq, ngraph::op)
 NGRAPH_OP(Log, ngraph::op)
-NGRAPH_OP(LogSoftmax, ngraph::op)
 NGRAPH_OP(LRN, ngraph::op)
 NGRAPH_OP(LSTMCell, ngraph::op)
 NGRAPH_OP(LSTMSequence, ngraph::op)
diff --git a/src/ngraph/runtime/interpreter/int_executable.hpp b/src/ngraph/runtime/interpreter/int_executable.hpp
index d1bc86fae8f..6cf3fb01e91 100644
--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
@@ -1865,7 +1865,6 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable
         case OP_TYPEID::Interpolate:
         case OP_TYPEID::LayerNorm:
         case OP_TYPEID::LayerNormBackprop:
-        case OP_TYPEID::LogSoftmax:
         case OP_TYPEID::LSTMCell:
         case OP_TYPEID::LSTMSequence:
         case OP_TYPEID::MVN:
diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp
index 11ea86f4eda..b0709425cd2 100644
--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -1939,12 +1939,6 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
                 args[0], args[1], read_auto_broadcast(node_js, "auto_broadcast"));
             break;
         }
-        case OP_TYPEID::LogSoftmax:
-        {
-            auto axis = node_js.at("axis").get<int64_t>();
-            node = make_shared<op::LogSoftmax>(args[0], axis);
-            break;
-        }
         case OP_TYPEID::LRN:
         {
             auto alpha = node_js.at("alpha").get<double>();
@@ -3960,12 +3954,6 @@ json JSONSerializer::serialize_node(const Node& n)
         }
         break;
     }
-    case OP_TYPEID::LogSoftmax:
-    {
-        auto tmp = static_cast<const op::LogSoftmax*>(&n);
-        node["axis"] = tmp->get_axis();
-        break;
-    }
     case OP_TYPEID::LRN:
     {
         auto tmp = static_cast<const op::LRN*>(&n);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2c19b25e704..2921766d5ac 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -148,7 +148,6 @@ set(SRC
     type_prop/hard_sigmoid.cpp
     type_prop/index_reduction.cpp
     type_prop/layer_norm.cpp
-    type_prop/log_softmax.cpp
     type_prop/lrn.cpp
     type_prop/lstm_cell.cpp
     type_prop/lstm_sequence.cpp
diff --git a/test/op_is.cpp b/test/op_is.cpp
index e14b0271e0f..d9c0bb6d75f 100644
--- a/test/op_is.cpp
+++ b/test/op_is.cpp
@@ -717,15 +717,6 @@ namespace
         EXPECT_FALSE(node.is_binary_elementwise_logical());
     }
 
-    void op_is_LogSoftmax()
-    {
-        op::LogSoftmax node;
-        EXPECT_FALSE(node.is_unary_elementwise_arithmetic());
-        EXPECT_FALSE(node.is_binary_elementwise_arithmetic());
-        EXPECT_FALSE(node.is_binary_elementwise_comparison());
-        EXPECT_FALSE(node.is_binary_elementwise_logical());
-    }
-
     void op_is_LRN()
     {
         op::LRN node;
diff --git a/test/type_prop/log_softmax.cpp b/test/type_prop/log_softmax.cpp
deleted file mode 100644
index a0dd0724bf3..00000000000
--- a/test/type_prop/log_softmax.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-#include "util/type_prop.hpp"
-
-using namespace std;
-using namespace ngraph;
-
-TEST(type_prop, log_softmax)
-{
-    const auto data = make_shared<op::Parameter>(element::f64, Shape{2, 2});
-    const auto axis = 2;
-    try
-    {
-        const auto log_softmax = make_shared<op::LogSoftmax>(data, axis);
-        // Should have thrown, so fail if it didn't
-        FAIL() << "Invalid axis value not detected";
-    }
-    catch (const ngraph_error& error)
-    {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter axis "));
-    }
-    catch (...)
-    {
-        FAIL() << "Log softmax failed for unexpected reason";
-    }
-}

From ef553de3cd682065fe95835b221a8d3e16724aef Mon Sep 17 00:00:00 2001
From: Robert Kimball <robert.kimball@intel.com>
Date: Sun, 12 Jan 2020 06:31:01 -0800
Subject: [PATCH 03/12] Move GCPU to runtime/gcpu so that unit test manifest
 works (#4150)

* Move GCPU to runtime/gcpu so that unit test manifest works

* style

* Add tests to manifest

* Fix CPU only check

* Update GCPU dependency

Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
---
 CMakeLists.txt                                |   7 +-
 src/ngraph/runtime/CMakeLists.txt             |   2 +-
 .../{generic_cpu => gcpu}/CMakeLists.txt      |   6 +-
 .../{generic_cpu => gcpu}/gcpu_backend.cpp    |   6 +-
 .../{generic_cpu => gcpu}/gcpu_backend.hpp    |   0
 .../gcpu_backend_visibility.hpp               |   0
 .../{generic_cpu => gcpu}/gcpu_executable.cpp |   2 +-
 src/ngraph/runtime/gcpu/gcpu_executable.hpp   | 114 ++++++++++
 .../{generic_cpu => gcpu}/kernel/dot.hpp      |   0
 .../{generic_cpu => gcpu}/unit_test.manifest  |   3 +
 .../runtime/generic_cpu/gcpu_executable.hpp   | 198 ------------------
 11 files changed, 125 insertions(+), 213 deletions(-)
 rename src/ngraph/runtime/{generic_cpu => gcpu}/CMakeLists.txt (87%)
 rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_backend.cpp (93%)
 rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_backend.hpp (100%)
 rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_backend_visibility.hpp (100%)
 rename src/ngraph/runtime/{generic_cpu => gcpu}/gcpu_executable.cpp (99%)
 create mode 100644 src/ngraph/runtime/gcpu/gcpu_executable.hpp
 rename src/ngraph/runtime/{generic_cpu => gcpu}/kernel/dot.hpp (100%)
 rename src/ngraph/runtime/{generic_cpu => gcpu}/unit_test.manifest (63%)
 delete mode 100644 src/ngraph/runtime/generic_cpu/gcpu_executable.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fda32e33b81..d3e2a30598a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -169,7 +169,7 @@ option(NGRAPH_USE_LEGACY_MKLDNN "Use legacy MKLDNN" FALSE)
 option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE)
 option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
 option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE)
-option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" FALSE)
+option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" TRUE)
 option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE)
 option(NGRAPH_DEPRECATED_ENABLE "Enable compiler deprecation pragmas for deprecated APIs (recommended only for development use)" FALSE)
 option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" FALSE)
@@ -199,10 +199,7 @@ if (NGRAPH_STATIC_LIB_ENABLE)
     set(NGRAPH_EXPORT_TARGETS_ENABLE OFF)
 endif()
 
-if (NGRAPH_CPU_ENABLE
-        AND
-    (NOT NGRAPH_GENERIC_CPU_ENABLE)
-   )
+if (NGRAPH_CPU_ENABLE)
     set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON)
 endif()
 
diff --git a/src/ngraph/runtime/CMakeLists.txt b/src/ngraph/runtime/CMakeLists.txt
index 7d22a723568..f11b51922cb 100755
--- a/src/ngraph/runtime/CMakeLists.txt
+++ b/src/ngraph/runtime/CMakeLists.txt
@@ -28,7 +28,7 @@ if (NGRAPH_NOP_ENABLE)
 endif()
 
 if (NGRAPH_GENERIC_CPU_ENABLE)
-    add_subdirectory(generic_cpu)
+    add_subdirectory(gcpu)
 endif()
 
 if (NGRAPH_PLAIDML_ENABLE)
diff --git a/src/ngraph/runtime/generic_cpu/CMakeLists.txt b/src/ngraph/runtime/gcpu/CMakeLists.txt
similarity index 87%
rename from src/ngraph/runtime/generic_cpu/CMakeLists.txt
rename to src/ngraph/runtime/gcpu/CMakeLists.txt
index be7c5b52944..91ba90ef47e 100644
--- a/src/ngraph/runtime/generic_cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gcpu/CMakeLists.txt
@@ -15,17 +15,13 @@
 # ******************************************************************************
 
 if (NGRAPH_GENERIC_CPU_ENABLE)
-    # find_package(OpenMP)
-    # if (OPENMP_FOUND)
-    #     add_compile_options(${OpenMP_CXX_FLAGS})
-    # endif()
     add_library(gcpu_backend SHARED gcpu_backend.cpp gcpu_executable.cpp)
     if(NGRAPH_LIB_VERSIONING_ENABLE)
         set_target_properties(gcpu_backend PROPERTIES
             VERSION ${NGRAPH_VERSION}
             SOVERSION ${NGRAPH_API_VERSION})
     endif()
-    target_link_libraries(gcpu_backend PRIVATE ngraph libeigen)
+    target_link_libraries(gcpu_backend PRIVATE ngraph interpreter_backend)
     target_compile_definitions(gcpu_backend PRIVATE GCPU_BACKEND_DLL_EXPORTS)
 
     install(TARGETS gcpu_backend
diff --git a/src/ngraph/runtime/generic_cpu/gcpu_backend.cpp b/src/ngraph/runtime/gcpu/gcpu_backend.cpp
similarity index 93%
rename from src/ngraph/runtime/generic_cpu/gcpu_backend.cpp
rename to src/ngraph/runtime/gcpu/gcpu_backend.cpp
index 3b7ddbc8a54..748242b0d4e 100644
--- a/src/ngraph/runtime/generic_cpu/gcpu_backend.cpp
+++ b/src/ngraph/runtime/gcpu/gcpu_backend.cpp
@@ -14,12 +14,12 @@
 // limitations under the License.
 //*****************************************************************************
 
-#include "ngraph/runtime/generic_cpu/gcpu_backend_visibility.hpp"
+#include "ngraph/runtime/gcpu/gcpu_backend_visibility.hpp"
 
 #include "ngraph/except.hpp"
 #include "ngraph/runtime/backend_manager.hpp"
-#include "ngraph/runtime/generic_cpu/gcpu_backend.hpp"
-#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp"
+#include "ngraph/runtime/gcpu/gcpu_backend.hpp"
+#include "ngraph/runtime/gcpu/gcpu_executable.hpp"
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/util.hpp"
 
diff --git a/src/ngraph/runtime/generic_cpu/gcpu_backend.hpp b/src/ngraph/runtime/gcpu/gcpu_backend.hpp
similarity index 100%
rename from src/ngraph/runtime/generic_cpu/gcpu_backend.hpp
rename to src/ngraph/runtime/gcpu/gcpu_backend.hpp
diff --git a/src/ngraph/runtime/generic_cpu/gcpu_backend_visibility.hpp b/src/ngraph/runtime/gcpu/gcpu_backend_visibility.hpp
similarity index 100%
rename from src/ngraph/runtime/generic_cpu/gcpu_backend_visibility.hpp
rename to src/ngraph/runtime/gcpu/gcpu_backend_visibility.hpp
diff --git a/src/ngraph/runtime/generic_cpu/gcpu_executable.cpp b/src/ngraph/runtime/gcpu/gcpu_executable.cpp
similarity index 99%
rename from src/ngraph/runtime/generic_cpu/gcpu_executable.cpp
rename to src/ngraph/runtime/gcpu/gcpu_executable.cpp
index d718813d495..191a46b795c 100644
--- a/src/ngraph/runtime/generic_cpu/gcpu_executable.cpp
+++ b/src/ngraph/runtime/gcpu/gcpu_executable.cpp
@@ -14,7 +14,7 @@
 // limitations under the License.
 //*****************************************************************************
 
-#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp"
+#include "ngraph/runtime/gcpu/gcpu_executable.hpp"
 #include "ngraph/cpio.hpp"
 #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
 #include "ngraph/except.hpp"
diff --git a/src/ngraph/runtime/gcpu/gcpu_executable.hpp b/src/ngraph/runtime/gcpu/gcpu_executable.hpp
new file mode 100644
index 00000000000..5debb75c429
--- /dev/null
+++ b/src/ngraph/runtime/gcpu/gcpu_executable.hpp
@@ -0,0 +1,114 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <initializer_list>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "ngraph/ops.hpp"
+#include "ngraph/runtime/aligned_buffer.hpp"
+#include "ngraph/runtime/backend.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+#include "ngraph/runtime/interpreter/int_executable.hpp"
+#include "ngraph/runtime/opt_kernel/broadcast.hpp"
+#include "ngraph/runtime/opt_kernel/reshape.hpp"
+#include "ngraph/runtime/tensor.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace gcpu
+        {
+            class GCPUBackend;
+            class GCPUExecutable;
+
+            namespace
+            {
+                // This expands the op list in op_tbl.hpp into a list of enumerations that look like
+                // this:
+                // Abs,
+                // Acos,
+                // ...
+                enum class OP_TYPEID
+                {
+#define NGRAPH_OP(NAME, NAMESPACE) NAME,
+#include "ngraph/opsets/opset0_tbl.hpp"
+#undef NGRAPH_OP
+                    UnknownOp
+                };
+            }
+        }
+    }
+}
+
+class ngraph::runtime::gcpu::GCPUExecutable : public runtime::interpreter::INTExecutable
+{
+    friend class GCPUBackend;
+
+public:
+    GCPUExecutable(const std::shared_ptr<Function>& function,
+                   bool enable_performance_collection = false);
+
+    bool call(const std::vector<std::shared_ptr<Tensor>>& outputs,
+              const std::vector<std::shared_ptr<Tensor>>& intputs) override;
+
+private:
+    int get_alignment() const { return 64; }
+    void generate_calls(const element::Type& type,
+                        const Node& op,
+                        const std::vector<std::shared_ptr<HostTensor>>& outputs,
+                        const std::vector<std::shared_ptr<HostTensor>>& inputs) override;
+
+    template <typename T>
+    void gop_engine(const Node& node,
+                    const std::vector<std::shared_ptr<HostTensor>>& out,
+                    const std::vector<std::shared_ptr<HostTensor>>& args)
+    {
+        switch (INTExecutable::get_typeid(node))
+        {
+        case ngraph::runtime::interpreter::OP_TYPEID::Broadcast:
+        {
+            const op::Broadcast* broadcast = static_cast<const op::Broadcast*>(&node);
+            Shape in_shape = node.get_input_shape(0);
+            Shape out_shape = node.get_output_shape(0);
+            AxisSet broadcast_axes = broadcast->get_broadcast_axes();
+            reference::broadcast<T>(args[0]->get_data_ptr<const T>(),
+                                    out[0]->get_data_ptr<T>(),
+                                    in_shape,
+                                    out_shape,
+                                    broadcast_axes);
+            break;
+        }
+        case ngraph::runtime::interpreter::OP_TYPEID::Reshape:
+        {
+            const op::Reshape* reshape = static_cast<const op::Reshape*>(&node);
+            reference::reshape(args[0]->get_data_ptr<const T>(),
+                               out[0]->get_data_ptr<T>(),
+                               node.get_input_shape(0),
+                               reshape->get_input_order(),
+                               node.get_output_shape(0));
+            break;
+        }
+        default: op_engine<T>(node, out, args); break;
+        }
+    }
+};
diff --git a/src/ngraph/runtime/generic_cpu/kernel/dot.hpp b/src/ngraph/runtime/gcpu/kernel/dot.hpp
similarity index 100%
rename from src/ngraph/runtime/generic_cpu/kernel/dot.hpp
rename to src/ngraph/runtime/gcpu/kernel/dot.hpp
diff --git a/src/ngraph/runtime/generic_cpu/unit_test.manifest b/src/ngraph/runtime/gcpu/unit_test.manifest
similarity index 63%
rename from src/ngraph/runtime/generic_cpu/unit_test.manifest
rename to src/ngraph/runtime/gcpu/unit_test.manifest
index a54f9764ece..1a5623c7834 100644
--- a/src/ngraph/runtime/generic_cpu/unit_test.manifest
+++ b/src/ngraph/runtime/gcpu/unit_test.manifest
@@ -3,3 +3,6 @@ tile_3d_few_repeats
 fake_quantize_pdpd
 convert_float32_bf16
 convert_bf16_float32
+
+onnx_GCPU.model_quant_conv_linear
+onnx_GCPU.top_k_opset_10
diff --git a/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp b/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp
deleted file mode 100644
index 6b8085aa189..00000000000
--- a/src/ngraph/runtime/generic_cpu/gcpu_executable.hpp
+++ /dev/null
@@ -1,198 +0,0 @@
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-#pragma once
-
-#include <initializer_list>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "ngraph/ops.hpp"
-#include "ngraph/runtime/aligned_buffer.hpp"
-#include "ngraph/runtime/backend.hpp"
-#include "ngraph/runtime/generic_cpu/kernel/broadcast.hpp"
-#include "ngraph/runtime/generic_cpu/kernel/dot.hpp"
-#include "ngraph/runtime/generic_cpu/kernel/reshape.hpp"
-#include "ngraph/runtime/host_tensor.hpp"
-#include "ngraph/runtime/interpreter/int_executable.hpp"
-#include "ngraph/runtime/reference/abs.hpp"
-#include "ngraph/runtime/reference/acos.hpp"
-#include "ngraph/runtime/reference/add.hpp"
-#include "ngraph/runtime/reference/all.hpp"
-#include "ngraph/runtime/reference/allreduce.hpp"
-#include "ngraph/runtime/reference/and.hpp"
-#include "ngraph/runtime/reference/any.hpp"
-#include "ngraph/runtime/reference/argmax.hpp"
-#include "ngraph/runtime/reference/argmin.hpp"
-#include "ngraph/runtime/reference/asin.hpp"
-#include "ngraph/runtime/reference/atan.hpp"
-#include "ngraph/runtime/reference/atan2.hpp"
-#include "ngraph/runtime/reference/avg_pool.hpp"
-#include "ngraph/runtime/reference/batch_mat_mul.hpp"
-#include "ngraph/runtime/reference/batch_norm.hpp"
-#include "ngraph/runtime/reference/broadcast.hpp"
-#include "ngraph/runtime/reference/broadcast_distributed.hpp"
-#include "ngraph/runtime/reference/ceiling.hpp"
-#include "ngraph/runtime/reference/concat.hpp"
-#include "ngraph/runtime/reference/constant.hpp"
-#include "ngraph/runtime/reference/convert.hpp"
-#include "ngraph/runtime/reference/convolution.hpp"
-#include "ngraph/runtime/reference/copy.hpp"
-#include "ngraph/runtime/reference/cos.hpp"
-#include "ngraph/runtime/reference/cosh.hpp"
-#include "ngraph/runtime/reference/dequantize.hpp"
-#include "ngraph/runtime/reference/divide.hpp"
-#include "ngraph/runtime/reference/dot.hpp"
-#include "ngraph/runtime/reference/embedding_lookup.hpp"
-#include "ngraph/runtime/reference/equal.hpp"
-#include "ngraph/runtime/reference/erf.hpp"
-#include "ngraph/runtime/reference/exp.hpp"
-#include "ngraph/runtime/reference/floor.hpp"
-#include "ngraph/runtime/reference/gather.hpp"
-#include "ngraph/runtime/reference/gather_nd.hpp"
-#include "ngraph/runtime/reference/generate_mask.hpp"
-#include "ngraph/runtime/reference/greater.hpp"
-#include "ngraph/runtime/reference/greater_eq.hpp"
-#include "ngraph/runtime/reference/less.hpp"
-#include "ngraph/runtime/reference/less_eq.hpp"
-#include "ngraph/runtime/reference/log.hpp"
-#include "ngraph/runtime/reference/lrn.hpp"
-#include "ngraph/runtime/reference/max.hpp"
-#include "ngraph/runtime/reference/max_pool.hpp"
-#include "ngraph/runtime/reference/maximum.hpp"
-#include "ngraph/runtime/reference/min.hpp"
-#include "ngraph/runtime/reference/minimum.hpp"
-#include "ngraph/runtime/reference/multiply.hpp"
-#include "ngraph/runtime/reference/negate.hpp"
-#include "ngraph/runtime/reference/not.hpp"
-#include "ngraph/runtime/reference/not_equal.hpp"
-#include "ngraph/runtime/reference/one_hot.hpp"
-#include "ngraph/runtime/reference/or.hpp"
-#include "ngraph/runtime/reference/pad.hpp"
-#include "ngraph/runtime/reference/power.hpp"
-#include "ngraph/runtime/reference/product.hpp"
-#include "ngraph/runtime/reference/quantize.hpp"
-#include "ngraph/runtime/reference/recv.hpp"
-#include "ngraph/runtime/reference/relu.hpp"
-#include "ngraph/runtime/reference/replace_slice.hpp"
-#include "ngraph/runtime/reference/reshape.hpp"
-#include "ngraph/runtime/reference/result.hpp"
-#include "ngraph/runtime/reference/reverse.hpp"
-#include "ngraph/runtime/reference/reverse_sequence.hpp"
-#include "ngraph/runtime/reference/scatter_add.hpp"
-#include "ngraph/runtime/reference/scatter_nd_add.hpp"
-#include "ngraph/runtime/reference/select.hpp"
-#include "ngraph/runtime/reference/send.hpp"
-#include "ngraph/runtime/reference/shape_of.hpp"
-#include "ngraph/runtime/reference/sigmoid.hpp"
-#include "ngraph/runtime/reference/sign.hpp"
-#include "ngraph/runtime/reference/sin.hpp"
-#include "ngraph/runtime/reference/sinh.hpp"
-#include "ngraph/runtime/reference/slice.hpp"
-#include "ngraph/runtime/reference/softmax.hpp"
-#include "ngraph/runtime/reference/sqrt.hpp"
-#include "ngraph/runtime/reference/subtract.hpp"
-#include "ngraph/runtime/reference/sum.hpp"
-#include "ngraph/runtime/reference/tan.hpp"
-#include "ngraph/runtime/reference/tanh.hpp"
-#include "ngraph/runtime/reference/topk.hpp"
-#include "ngraph/runtime/reference/xor.hpp"
-#include "ngraph/runtime/tensor.hpp"
-#include "ngraph/state/bernoulli_rng_state.hpp"
-
-namespace ngraph
-{
-    namespace runtime
-    {
-        namespace gcpu
-        {
-            class GCPUBackend;
-            class GCPUExecutable;
-
-            namespace
-            {
-                // This expands the op list in op_tbl.hpp into a list of enumerations that look like
-                // this:
-                // Abs,
-                // Acos,
-                // ...
-                enum class OP_TYPEID
-                {
-#define NGRAPH_OP(NAME, NAMESPACE) NAME,
-#include "ngraph/opsets/opset0_tbl.hpp"
-#undef NGRAPH_OP
-                    UnknownOp
-                };
-            }
-        }
-    }
-}
-
-class ngraph::runtime::gcpu::GCPUExecutable : public runtime::interpreter::INTExecutable
-{
-    friend class GCPUBackend;
-
-public:
-    GCPUExecutable(const std::shared_ptr<Function>& function,
-                   bool enable_performance_collection = false);
-
-    bool call(const std::vector<std::shared_ptr<Tensor>>& outputs,
-              const std::vector<std::shared_ptr<Tensor>>& intputs) override;
-
-private:
-    int get_alignment() const { return 64; }
-    void generate_calls(const element::Type& type,
-                        const Node& op,
-                        const std::vector<std::shared_ptr<HostTensor>>& outputs,
-                        const std::vector<std::shared_ptr<HostTensor>>& inputs) override;
-
-    template <typename T>
-    void gop_engine(const Node& node,
-                    const std::vector<std::shared_ptr<HostTensor>>& out,
-                    const std::vector<std::shared_ptr<HostTensor>>& args)
-    {
-        switch (INTExecutable::get_typeid(node))
-        {
-        case ngraph::runtime::interpreter::OP_TYPEID::Broadcast:
-        {
-            const op::Broadcast* broadcast = static_cast<const op::Broadcast*>(&node);
-            Shape in_shape = node.get_input_shape(0);
-            Shape out_shape = node.get_output_shape(0);
-            AxisSet broadcast_axes = broadcast->get_broadcast_axes();
-            reference::broadcast<T>(args[0]->get_data_ptr<const T>(),
-                                    out[0]->get_data_ptr<T>(),
-                                    in_shape,
-                                    out_shape,
-                                    broadcast_axes);
-            break;
-        }
-        case ngraph::runtime::interpreter::OP_TYPEID::Reshape:
-        {
-            const op::Reshape* reshape = static_cast<const op::Reshape*>(&node);
-            reference::reshape(args[0]->get_data_ptr<const T>(),
-                               out[0]->get_data_ptr<T>(),
-                               node.get_input_shape(0),
-                               reshape->get_input_order(),
-                               node.get_output_shape(0));
-            break;
-        }
-        default: op_engine<T>(node, out, args); break;
-        }
-    }
-};

From f6fe6aca125f8295262d3eb1625e77e5f09cd542 Mon Sep 17 00:00:00 2001
From: Scott Cyphers <diyessi@users.noreply.github.com>
Date: Mon, 13 Jan 2020 09:35:34 -0800
Subject: [PATCH 04/12] =?UTF-8?q?Restore=20constant=20folding=20for=20DynR?=
 =?UTF-8?q?eshape=20until=20users=20are=20converted=20to=20=E2=80=A6=20(#4?=
 =?UTF-8?q?164)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Restore constant folding for DynReshape until users are converted to v1 Reshape

* Disbale test when no serialization

Co-authored-by: baojun <32073718+baojun-nervana@users.noreply.github.com>
---
 .../pass/constant_folding_dyn_reshape.cpp     |  165 +-
 test/dyn_elimination.cpp                      |   27 +-
 test/models/paddlepaddle/transpose.json       | 1924 +++++++++++++++++
 3 files changed, 2043 insertions(+), 73 deletions(-)
 create mode 100644 test/models/paddlepaddle/transpose.json

diff --git a/src/ngraph/pass/constant_folding_dyn_reshape.cpp b/src/ngraph/pass/constant_folding_dyn_reshape.cpp
index 8a02a4b145f..415d95247e8 100644
--- a/src/ngraph/pass/constant_folding_dyn_reshape.cpp
+++ b/src/ngraph/pass/constant_folding_dyn_reshape.cpp
@@ -17,6 +17,7 @@
 #include <numeric>
 
 #include "constant_folding.hpp"
+#include "ngraph/op/experimental/dyn_reshape.hpp"
 #include "ngraph/op/reshape.hpp"
 #include "ngraph/runtime/reference/reshape.hpp"
 #include "ngraph/type/element_type.hpp"
@@ -24,9 +25,9 @@
 using namespace std;
 using namespace ngraph;
 
-template <class T>
+template <typename T, typename R>
 shared_ptr<op::Constant> fold_constant_dyn_reshape(shared_ptr<op::Constant> constant_data,
-                                                   shared_ptr<op::v1::Reshape> dyn_reshape)
+                                                   R dyn_reshape)
 {
     const Shape& out_shape = dyn_reshape->get_shape();
     runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(T));
@@ -44,18 +45,102 @@ shared_ptr<op::Constant> fold_constant_dyn_reshape(shared_ptr<op::Constant> cons
     return make_shared<op::Constant>(dyn_reshape->get_element_type(), out_shape, data_ptr);
 }
 
+template <typename R>
+std::shared_ptr<Node> do_fold(R dyn_reshape_match, shared_ptr<op::Constant> constant_data_match)
+{
+    std::shared_ptr<Node> replacement;
+    auto type = dyn_reshape_match->get_element_type();
+    switch (type)
+    {
+    case element::Type_t::undefined:
+        NGRAPH_CHECK(false,
+                     "Encountered 'undefined' element type in constant_dyn_reshape_callback");
+        break;
+    case element::Type_t::dynamic:
+        NGRAPH_CHECK(false, "Encountered 'dynamic' element type in constant_dyn_reshape_callback");
+        break;
+    case element::Type_t::u1:
+        NGRAPH_CHECK(false, "Encountered 'u1' element type in constant_dyn_reshape_callback");
+        break;
+    case element::Type_t::boolean:
+        replacement = fold_constant_dyn_reshape<char>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::bf16:
+        replacement = fold_constant_dyn_reshape<bfloat16>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::f16:
+        replacement = fold_constant_dyn_reshape<float16>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::f32:
+        replacement = fold_constant_dyn_reshape<float>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::f64:
+        replacement = fold_constant_dyn_reshape<double>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::i8:
+        replacement = fold_constant_dyn_reshape<int8_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::i16:
+        replacement = fold_constant_dyn_reshape<int16_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::i32:
+        replacement = fold_constant_dyn_reshape<int32_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::i64:
+        replacement = fold_constant_dyn_reshape<int64_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::u8:
+        replacement = fold_constant_dyn_reshape<uint8_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::u16:
+        replacement = fold_constant_dyn_reshape<uint16_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::u32:
+        replacement = fold_constant_dyn_reshape<uint32_t>(constant_data_match, dyn_reshape_match);
+        break;
+    case element::Type_t::u64:
+        replacement = fold_constant_dyn_reshape<uint64_t>(constant_data_match, dyn_reshape_match);
+        break;
+    }
+    return replacement;
+}
+
 void pass::ConstantFolding::construct_constant_dyn_reshape()
 {
     auto constant_data_label = make_shared<pattern::op::Label>(
         element::f32, Shape{2, 4}, pattern::has_class<op::Constant>());
     auto constant_shape_label =
         make_shared<pattern::op::Label>(element::i64, Shape{1}, pattern::has_class<op::Constant>());
-    auto dyn_reshape =
+    auto reshape_v1 =
         make_shared<op::v1::Reshape>(constant_data_label, constant_shape_label, false);
+    auto dyn_reshape =
+        make_shared<op::v0::DynReshape>(constant_data_label, constant_shape_label, false);
 
     // Note: No need to capture or consider constant_shape_label, because
     // shape propagation will have transferred the info to dyn_reshape's
     // output.
+    auto constant_reshape_v1_callback = [constant_data_label](pattern::Matcher& m) {
+        NGRAPH_DEBUG << "In callback for constant_reshape_v1_callback against node = "
+                     << m.get_match_root()->get_name();
+
+        auto pattern_map = m.get_pattern_map();
+
+        auto constant_data_match =
+            static_pointer_cast<op::Constant>(pattern_map[constant_data_label]);
+        auto match_root = m.get_match_root();
+        NGRAPH_CHECK(revalidate_and_ensure_static(match_root));
+        shared_ptr<Node> replacement;
+        replacement =
+            do_fold(static_pointer_cast<op::v1::Reshape>(match_root), constant_data_match);
+        replace_node(m.get_match_root(), replacement);
+        return true;
+    };
+
+    auto reshape_v1_matcher =
+        make_shared<pattern::Matcher>(reshape_v1, "ConstantFolding.ConstantReshapev1");
+    this->add_matcher(
+        reshape_v1_matcher, constant_reshape_v1_callback, PassProperty::CHANGE_DYNAMIC_STATE);
+
     auto constant_dyn_reshape_callback = [constant_data_label](pattern::Matcher& m) {
         NGRAPH_DEBUG << "In callback for constant_dyn_reshape_callback against node = "
                      << m.get_match_root()->get_name();
@@ -64,75 +149,11 @@ void pass::ConstantFolding::construct_constant_dyn_reshape()
 
         auto constant_data_match =
             static_pointer_cast<op::Constant>(pattern_map[constant_data_label]);
-        auto dyn_reshape_match = static_pointer_cast<op::v1::Reshape>(m.get_match_root());
-
-        NGRAPH_CHECK(revalidate_and_ensure_static(dyn_reshape_match));
-
-        std::shared_ptr<Node> replacement;
-        auto type = dyn_reshape_match->get_element_type();
-        switch (type)
-        {
-        case element::Type_t::undefined:
-            NGRAPH_CHECK(false,
-                         "Encountered 'undefined' element type in constant_dyn_reshape_callback");
-            break;
-        case element::Type_t::dynamic:
-            NGRAPH_CHECK(false,
-                         "Encountered 'dynamic' element type in constant_dyn_reshape_callback");
-            break;
-        case element::Type_t::u1:
-            NGRAPH_CHECK(false, "Encountered 'u1' element type in constant_dyn_reshape_callback");
-            break;
-        case element::Type_t::boolean:
-            replacement = fold_constant_dyn_reshape<char>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::bf16:
-            replacement =
-                fold_constant_dyn_reshape<bfloat16>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::f16:
-            replacement =
-                fold_constant_dyn_reshape<float16>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::f32:
-            replacement = fold_constant_dyn_reshape<float>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::f64:
-            replacement = fold_constant_dyn_reshape<double>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::i8:
-            replacement = fold_constant_dyn_reshape<int8_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::i16:
-            replacement =
-                fold_constant_dyn_reshape<int16_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::i32:
-            replacement =
-                fold_constant_dyn_reshape<int32_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::i64:
-            replacement =
-                fold_constant_dyn_reshape<int64_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::u8:
-            replacement =
-                fold_constant_dyn_reshape<uint8_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::u16:
-            replacement =
-                fold_constant_dyn_reshape<uint16_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::u32:
-            replacement =
-                fold_constant_dyn_reshape<uint32_t>(constant_data_match, dyn_reshape_match);
-            break;
-        case element::Type_t::u64:
-            replacement =
-                fold_constant_dyn_reshape<uint64_t>(constant_data_match, dyn_reshape_match);
-            break;
-        }
-
+        auto match_root = m.get_match_root();
+        NGRAPH_CHECK(revalidate_and_ensure_static(match_root));
+        shared_ptr<Node> replacement;
+        replacement =
+            do_fold(static_pointer_cast<op::v0::DynReshape>(match_root), constant_data_match);
         replace_node(m.get_match_root(), replacement);
         return true;
     };
diff --git a/test/dyn_elimination.cpp b/test/dyn_elimination.cpp
index 4c6eb9d2a19..d519616b4cb 100644
--- a/test/dyn_elimination.cpp
+++ b/test/dyn_elimination.cpp
@@ -14,10 +14,13 @@
 // limitations under the License.
 //*****************************************************************************
 
-#include "ngraph/pass/dyn_elimination.hpp"
 #include "gtest/gtest.h"
+
 #include "ngraph/ngraph.hpp"
+#include "ngraph/pass/constant_folding.hpp"
+#include "ngraph/pass/dyn_elimination.hpp"
 #include "ngraph/pass/manager.hpp"
+#include "ngraph/pass/opset0_downgrade.hpp"
 #include "util/all_close_f.hpp"
 #include "util/test_tools.hpp"
 
@@ -266,3 +269,25 @@ TEST(dyn_elimination, range_f64)
     ASSERT_TRUE(test::all_close_f(
         vals, vector<double>{-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75}));
 }
+
+#ifndef NGRAPH_JSON_DISABLE
+TEST(dyn_elimination, paddlepaddle_transpose)
+{
+    string model = "paddlepaddle/transpose.json";
+    const string json_path = file_util::path_join(SERIALIZED_ZOO, model);
+    const string json_string = file_util::read_file_to_string(json_path);
+    shared_ptr<Function> f = ngraph::deserialize(json_string);
+
+    vector<element::Type> arg_element_types = {element::f64, element::f64};
+    vector<PartialShape> arg_shapes = {{3, 4}, {4, 3}};
+    std::vector<void*> arg_value_base_pointers = {nullptr, nullptr};
+    auto clone = specialize_function(f, arg_element_types, arg_shapes, arg_value_base_pointers);
+
+    pass::Manager passes;
+    passes.register_pass<pass::ConstantFolding>();
+    passes.register_pass<pass::DynElimination>();
+    passes.register_pass<pass::Opset0Downgrade>(); // Converts dynamic v1 variants to v0 ops
+    passes.set_per_pass_validation(false);
+    passes.run_passes(clone);
+}
+#endif
diff --git a/test/models/paddlepaddle/transpose.json b/test/models/paddlepaddle/transpose.json
new file mode 100644
index 00000000000..984116bd08b
--- /dev/null
+++ b/test/models/paddlepaddle/transpose.json
@@ -0,0 +1,1924 @@
+[
+    {
+        "name": "Function_0",
+        "ops": [
+            {
+                "cacheable": false,
+                "element_type": "double",
+                "name": "Parameter_1",
+                "op": "Parameter",
+                "op_version": 0,
+                "outputs": [
+                    "Parameter_1_0"
+                ],
+                "shape": null,
+                "type_info": {
+                    "name": "Parameter",
+                    "version": 0
+                }
+            },
+            {
+                "cacheable": false,
+                "element_type": "double",
+                "name": "Parameter_0",
+                "op": "Parameter",
+                "op_version": 0,
+                "outputs": [
+                    "Parameter_0_0"
+                ],
+                "shape": null,
+                "type_info": {
+                    "name": "Parameter",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_0"
+                ],
+                "name": "ShapeOf_80",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_80_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_84",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_84_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_79",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_79_0"
+                ],
+                "shape": [],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_82",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_82_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "inputs": [
+                    "Constant_79",
+                    "Constant_82"
+                ],
+                "name": "DynReshape_85",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_85_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_83",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_83_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_80",
+                    "Constant_84",
+                    "DynReshape_85",
+                    "Constant_83"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_86",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_86_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_88",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_88_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_86",
+                    "Constant_88"
+                ],
+                "name": "Product_89",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_89_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_89"
+                ],
+                "name": "Reshape_90",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_90_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_80"
+                ],
+                "name": "ShapeOf_81",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_81_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_80",
+                    "DynReshape_85",
+                    "ShapeOf_81",
+                    "Constant_83"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_87",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_87_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_91",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_91_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_87",
+                    "Constant_91"
+                ],
+                "name": "Product_92",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_92_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_92"
+                ],
+                "name": "Reshape_93",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_93_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "axis": 0,
+                "inputs": [
+                    "Reshape_90",
+                    "Reshape_93"
+                ],
+                "name": "Concat_94",
+                "op": "Concat",
+                "op_version": 0,
+                "outputs": [
+                    "Concat_94_0"
+                ],
+                "type_info": {
+                    "name": "Concat",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_0",
+                    "Concat_94"
+                ],
+                "name": "DynReshape_95",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_95_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_113",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_113_0"
+                ],
+                "shape": [
+                    2
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1",
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynReshape_95",
+                    "Constant_113"
+                ],
+                "name": "Transpose_114",
+                "op": "Transpose",
+                "op_version": 0,
+                "outputs": [
+                    "Transpose_114_0"
+                ],
+                "type_info": {
+                    "name": "Transpose",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "double",
+                "name": "Constant_63",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_63_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "input_order": [
+                    0
+                ],
+                "inputs": [
+                    "Constant_63"
+                ],
+                "name": "Reshape_68",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [],
+                "outputs": [
+                    "Reshape_68_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_0"
+                ],
+                "name": "ShapeOf_11",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_11_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_15",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_15_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_10",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_10_0"
+                ],
+                "shape": [],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_13",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_13_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "inputs": [
+                    "Constant_10",
+                    "Constant_13"
+                ],
+                "name": "DynReshape_16",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_16_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_14",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_14_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_11",
+                    "Constant_15",
+                    "DynReshape_16",
+                    "Constant_14"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_17",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_17_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_19",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_19_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_17",
+                    "Constant_19"
+                ],
+                "name": "Product_20",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_20_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_20"
+                ],
+                "name": "Reshape_21",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_21_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_11"
+                ],
+                "name": "ShapeOf_12",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_12_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_11",
+                    "DynReshape_16",
+                    "ShapeOf_12",
+                    "Constant_14"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_18",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_18_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_22",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_22_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_18",
+                    "Constant_22"
+                ],
+                "name": "Product_23",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_23_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_23"
+                ],
+                "name": "Reshape_24",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_24_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "axis": 0,
+                "inputs": [
+                    "Reshape_21",
+                    "Reshape_24"
+                ],
+                "name": "Concat_25",
+                "op": "Concat",
+                "op_version": 0,
+                "outputs": [
+                    "Concat_25_0"
+                ],
+                "type_info": {
+                    "name": "Concat",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_0",
+                    "Concat_25"
+                ],
+                "name": "DynReshape_26",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_26_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "inputs": [
+                    "Parameter_1"
+                ],
+                "name": "ShapeOf_28",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_28_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_32",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_32_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_27",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_27_0"
+                ],
+                "shape": [],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_30",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_30_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "inputs": [
+                    "Constant_27",
+                    "Constant_30"
+                ],
+                "name": "DynReshape_33",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_33_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_31",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_31_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_28",
+                    "Constant_32",
+                    "DynReshape_33",
+                    "Constant_31"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_34",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_34_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_36",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_36_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_34",
+                    "Constant_36"
+                ],
+                "name": "Product_37",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_37_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_37"
+                ],
+                "name": "Reshape_38",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_38_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_28"
+                ],
+                "name": "ShapeOf_29",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_29_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_28",
+                    "DynReshape_33",
+                    "ShapeOf_29",
+                    "Constant_31"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_35",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_35_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_39",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_39_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_35",
+                    "Constant_39"
+                ],
+                "name": "Product_40",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_40_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_40"
+                ],
+                "name": "Reshape_41",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_41_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "axis": 0,
+                "inputs": [
+                    "Reshape_38",
+                    "Reshape_41"
+                ],
+                "name": "Concat_42",
+                "op": "Concat",
+                "op_version": 0,
+                "outputs": [
+                    "Concat_42_0"
+                ],
+                "type_info": {
+                    "name": "Concat",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_1",
+                    "Concat_42"
+                ],
+                "name": "DynReshape_43",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_43_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "inputs": [
+                    "DynReshape_26",
+                    "DynReshape_43"
+                ],
+                "name": "Dot_44",
+                "op": "Dot",
+                "op_version": 0,
+                "outputs": [
+                    "Dot_44_0"
+                ],
+                "reduction_axes_count": 1,
+                "type_info": {
+                    "name": "Dot",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_0"
+                ],
+                "name": "ShapeOf_7",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_7_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_7"
+                ],
+                "lower_bounds": [
+                    0
+                ],
+                "name": "Slice_46",
+                "op": "Slice",
+                "op_version": 0,
+                "outputs": [
+                    "Slice_46_0"
+                ],
+                "strides": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Slice",
+                    "version": 0
+                },
+                "upper_bounds": [
+                    1
+                ]
+            },
+            {
+                "inputs": [
+                    "Parameter_1"
+                ],
+                "name": "ShapeOf_8",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_8_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_47",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_47_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "inputs": [
+                    "ShapeOf_8"
+                ],
+                "name": "ShapeOf_9",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_9_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_45",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_45_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_8",
+                    "Constant_47",
+                    "ShapeOf_9",
+                    "Constant_45"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_48",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_48_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "axis": 0,
+                "inputs": [
+                    "Slice_46",
+                    "DynSlice_48"
+                ],
+                "name": "Concat_49",
+                "op": "Concat",
+                "op_version": 0,
+                "outputs": [
+                    "Concat_49_0"
+                ],
+                "type_info": {
+                    "name": "Concat",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Dot_44",
+                    "Concat_49"
+                ],
+                "name": "DynReshape_50",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_50_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "inputs": [
+                    "DynReshape_50"
+                ],
+                "name": "ShapeOf_64",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_64_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_65",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_65_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "ShapeOf_64",
+                    "Constant_65"
+                ],
+                "name": "Product_66",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_66_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Product_66"
+                ],
+                "name": "Convert_67",
+                "op": "Convert",
+                "op_version": 0,
+                "outputs": [
+                    "Convert_67_0"
+                ],
+                "target_type": "double",
+                "type_info": {
+                    "name": "Convert",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Reshape_68",
+                    "Convert_67"
+                ],
+                "name": "Divide_69",
+                "op": "Divide",
+                "op_version": 0,
+                "outputs": [
+                    "Divide_69_0"
+                ],
+                "pythondiv": true,
+                "type_info": {
+                    "name": "Divide",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_73",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_73_0"
+                ],
+                "shape": [],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "ShapeOf_64"
+                ],
+                "name": "ShapeOf_70",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_70_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [
+                    0
+                ],
+                "inputs": [
+                    "ShapeOf_70"
+                ],
+                "name": "Reshape_71",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [],
+                "outputs": [
+                    "Reshape_71_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_72",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_72_0"
+                ],
+                "shape": [],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "inputs": [
+                    "Constant_73",
+                    "Reshape_71",
+                    "Constant_72"
+                ],
+                "name": "Range_74",
+                "op": "Range",
+                "op_version": 0,
+                "outputs": [
+                    "Range_74_0"
+                ],
+                "type_info": {
+                    "name": "Range",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Divide_69",
+                    "ShapeOf_64",
+                    "Range_74"
+                ],
+                "name": "DynBroadcast_75",
+                "op": "DynBroadcast",
+                "op_version": 0,
+                "outputs": [
+                    "DynBroadcast_75_0"
+                ],
+                "type_info": {
+                    "name": "DynBroadcast",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "DynReshape_95"
+                ],
+                "name": "ShapeOf_117",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_117_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_117"
+                ],
+                "lower_bounds": [
+                    0
+                ],
+                "name": "Slice_118",
+                "op": "Slice",
+                "op_version": 0,
+                "outputs": [
+                    "Slice_118_0"
+                ],
+                "strides": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Slice",
+                    "version": 0
+                },
+                "upper_bounds": [
+                    1
+                ]
+            },
+            {
+                "inputs": [
+                    "Parameter_1"
+                ],
+                "name": "ShapeOf_97",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_97_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_101",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_101_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_96",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_96_0"
+                ],
+                "shape": [],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_99",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_99_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "inputs": [
+                    "Constant_96",
+                    "Constant_99"
+                ],
+                "name": "DynReshape_102",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_102_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_100",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_100_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "1"
+                ]
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_97",
+                    "Constant_101",
+                    "DynReshape_102",
+                    "Constant_100"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_103",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_103_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_105",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_105_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_103",
+                    "Constant_105"
+                ],
+                "name": "Product_106",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_106_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_106"
+                ],
+                "name": "Reshape_107",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_107_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_97"
+                ],
+                "name": "ShapeOf_98",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_98_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "ellipsis_mask": [],
+                "inputs": [
+                    "ShapeOf_97",
+                    "DynReshape_102",
+                    "ShapeOf_98",
+                    "Constant_100"
+                ],
+                "lower_bounds_mask": [],
+                "name": "DynSlice_104",
+                "new_axis": [],
+                "op": "DynSlice",
+                "op_version": 0,
+                "outputs": [
+                    "DynSlice_104_0"
+                ],
+                "shrink_axis": [],
+                "type_info": {
+                    "name": "DynSlice",
+                    "version": 0
+                },
+                "upper_bounds_mask": []
+            },
+            {
+                "element_type": "int64_t",
+                "name": "Constant_108",
+                "op": "Constant",
+                "op_version": 0,
+                "outputs": [
+                    "Constant_108_0"
+                ],
+                "shape": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Constant",
+                    "version": 0
+                },
+                "value": [
+                    "0"
+                ]
+            },
+            {
+                "inputs": [
+                    "DynSlice_104",
+                    "Constant_108"
+                ],
+                "name": "Product_109",
+                "op": "Product",
+                "op_version": 0,
+                "outputs": [
+                    "Product_109_0"
+                ],
+                "reduction_axes": [
+                    0
+                ],
+                "type_info": {
+                    "name": "Product",
+                    "version": 0
+                }
+            },
+            {
+                "input_order": [],
+                "inputs": [
+                    "Product_109"
+                ],
+                "name": "Reshape_110",
+                "op": "Reshape",
+                "op_version": 0,
+                "output_shape": [
+                    1
+                ],
+                "outputs": [
+                    "Reshape_110_0"
+                ],
+                "type_info": {
+                    "name": "Reshape",
+                    "version": 0
+                }
+            },
+            {
+                "axis": 0,
+                "inputs": [
+                    "Reshape_107",
+                    "Reshape_110"
+                ],
+                "name": "Concat_111",
+                "op": "Concat",
+                "op_version": 0,
+                "outputs": [
+                    "Concat_111_0"
+                ],
+                "type_info": {
+                    "name": "Concat",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_1",
+                    "Concat_111"
+                ],
+                "name": "DynReshape_112",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_112_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "inputs": [
+                    "DynReshape_112"
+                ],
+                "name": "ShapeOf_119",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_119_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "ShapeOf_119"
+                ],
+                "lower_bounds": [
+                    1
+                ],
+                "name": "Slice_120",
+                "op": "Slice",
+                "op_version": 0,
+                "outputs": [
+                    "Slice_120_0"
+                ],
+                "strides": [
+                    1
+                ],
+                "type_info": {
+                    "name": "Slice",
+                    "version": 0
+                },
+                "upper_bounds": [
+                    2
+                ]
+            },
+            {
+                "axis": 0,
+                "inputs": [
+                    "Slice_118",
+                    "Slice_120"
+                ],
+                "name": "Concat_121",
+                "op": "Concat",
+                "op_version": 0,
+                "outputs": [
+                    "Concat_121_0"
+                ],
+                "type_info": {
+                    "name": "Concat",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "DynBroadcast_75",
+                    "Concat_121"
+                ],
+                "name": "DynReshape_122",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_122_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "inputs": [
+                    "Transpose_114",
+                    "DynReshape_122"
+                ],
+                "name": "Dot_123",
+                "op": "Dot",
+                "op_version": 0,
+                "outputs": [
+                    "Dot_123_0"
+                ],
+                "reduction_axes_count": 1,
+                "type_info": {
+                    "name": "Dot",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Parameter_1"
+                ],
+                "name": "ShapeOf_77",
+                "op": "ShapeOf",
+                "op_version": 0,
+                "outputs": [
+                    "ShapeOf_77_0"
+                ],
+                "type_info": {
+                    "name": "ShapeOf",
+                    "version": 0
+                }
+            },
+            {
+                "inputs": [
+                    "Dot_123",
+                    "ShapeOf_77"
+                ],
+                "name": "DynReshape_124",
+                "op": "DynReshape",
+                "op_version": 0,
+                "outputs": [
+                    "DynReshape_124_0"
+                ],
+                "type_info": {
+                    "name": "DynReshape",
+                    "version": 0
+                },
+                "zero_flag": false
+            },
+            {
+                "inputs": [
+                    "DynReshape_124"
+                ],
+                "name": "Result_125",
+                "needs_default_layout": true,
+                "op": "Result",
+                "op_version": 0,
+                "outputs": [
+                    "Result_125_0"
+                ],
+                "type_info": {
+                    "name": "Result",
+                    "version": 0
+                }
+            }
+        ],
+        "parameters": [
+            "Parameter_0",
+            "Parameter_1"
+        ],
+        "result": [
+            "Result_125"
+        ]
+    }
+]

From 19a7d71054b99b3695472e9c2b01173531a27db3 Mon Sep 17 00:00:00 2001
From: Robert Kimball <robert.kimball@intel.com>
Date: Mon, 13 Jan 2020 14:51:38 -0800
Subject: [PATCH 05/12] Fix topk deserialize to be backwards compatible (#4171)

---
 src/ngraph/serializer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp
index b0709425cd2..078dc68c410 100644
--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -2943,7 +2943,8 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
         {
             auto compute_max = node_js.at("compute_max").get<bool>();
             auto target_type = read_element_type(node_js.at("index_element_type"));
-            op::TopKSortType sort = node_js.at("sort").get<op::TopKSortType>();
+            op::TopKSortType sort =
+                get_or_default<op::TopKSortType>(node_js, "sort", op::TopKSortType::SORT_VALUES);
             if (has_key(node_js, "top_k_axis"))
             {
                 auto top_k_axis = node_js.at("top_k_axis").get<size_t>();

From 660ae5acd86e06a765123293895b147b3479a929 Mon Sep 17 00:00:00 2001
From: Leona C <leona.cook@intel.com>
Date: Mon, 13 Jan 2020 16:23:57 -0800
Subject: [PATCH 06/12] Update doc to 0.28  (#4166)

* Update sitemap to not use a page title

* Document Release Notes for 0.28.0; update section on Distributed Training

Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
---
 doc/sphinx/source/conf.py                   |  5 ++--
 doc/sphinx/source/ops/constant.rst          |  2 +-
 doc/sphinx/source/ops/parameter.rst         |  8 +++---
 doc/sphinx/source/ops/result.rst            |  2 +-
 doc/sphinx/source/project/release-notes.rst | 22 +++++++++++------
 doc/sphinx/source/training/index.rst        |  6 ++++-
 doc/sphinx/source/training/overview.rst     | 27 ++-------------------
 7 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py
index b90857c9212..b7500ca5db0 100644
--- a/doc/sphinx/source/conf.py
+++ b/doc/sphinx/source/conf.py
@@ -73,11 +73,11 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.27'
+version = '0.28'
 
 # The Documentation full version, including alpha/beta/rc tags. Some features
 # available in the latest code will not necessarily be documented first
-release = '0.27.1'
+release = '0.28.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -143,6 +143,7 @@
     ]
 }
 
+html_last_updated_fmt= ''
 
 # -- Options for HTMLHelp output ------------------------------------------
 
diff --git a/doc/sphinx/source/ops/constant.rst b/doc/sphinx/source/ops/constant.rst
index fa931d28dd0..b8ae174420f 100644
--- a/doc/sphinx/source/ops/constant.rst
+++ b/doc/sphinx/source/ops/constant.rst
@@ -43,6 +43,6 @@ Outputs
 C++ Interface
 =============
 
-.. doxygenclass:: ngraph::op::Constant
+.. doxygenclass:: ngraph::op::v0::Constant
    :project: ngraph
    :members:
diff --git a/doc/sphinx/source/ops/parameter.rst b/doc/sphinx/source/ops/parameter.rst
index 0e9ddc07656..efec9007d4d 100644
--- a/doc/sphinx/source/ops/parameter.rst
+++ b/doc/sphinx/source/ops/parameter.rst
@@ -4,7 +4,7 @@
 Parameter
 #########
 
-.. code-block: cpp
+.. code-block:: cpp
 
    Parameter // A function parameter.
 
@@ -38,7 +38,9 @@ Outputs
 | ``output`` | ``element_type`` | ``shape``  |
 +------------+------------------+------------+
 
-A ``Parameter`` produces the value of the tensor passed to the function in the position of the parameter in the function's arguments. The passed tensor must have the element type and shape specified by the parameter.
+A ``Parameter`` produces the value of the tensor passed to the function 
+in the position of the parameter in the function's arguments. The passed 
+tensor must have the element type and shape specified by the parameter.
 
 Backprop
 ========
@@ -51,6 +53,6 @@ Backprop
 C++ Interface
 =============
 
-.. doxygenclass:: ngraph::op::Parameter
+.. doxygenclass:: ngraph::op::v0::Parameter
    :project: ngraph
    :members:
diff --git a/doc/sphinx/source/ops/result.rst b/doc/sphinx/source/ops/result.rst
index 6537c0676ec..23eeaf5d477 100644
--- a/doc/sphinx/source/ops/result.rst
+++ b/doc/sphinx/source/ops/result.rst
@@ -45,6 +45,6 @@ Mathematical Definition
 C++ Interface
 =============
 
-.. doxygenclass:: ngraph::op::Result
+.. doxygenclass:: ngraph::op::v0::Result
    :project: ngraph
    :members:
diff --git a/doc/sphinx/source/project/release-notes.rst b/doc/sphinx/source/project/release-notes.rst
index be14a68d376..04f7f7fe4bd 100644
--- a/doc/sphinx/source/project/release-notes.rst
+++ b/doc/sphinx/source/project/release-notes.rst
@@ -21,19 +21,15 @@ We are pleased to announce the release of version |version|.
 Core updates for |version|
 --------------------------
 
-+ New ops
-+ Provenance improvements from 0.25.1
-+ More dynamic shape ops
-+ More informative errors
-
 
 
 Latest documentation updates
 ----------------------------
 
-+ Additional details on quantization
-+ Index updates
-+ API updates
++ Dynamic Shapes and APIs
++ Provenance
++ Add linkages and overview for quantization APIs
++ New ngraph.ai themed illustrations
 
 .. important:: Pre-releases (``-rc-0.*``) have newer features, and are less stable.  
 
@@ -42,6 +38,16 @@ Latest documentation updates
 Changelog on Previous Releases
 ==============================
 
+0.27.1
+
++ Fixes broken serializer for Sum and Product
++ New ops
++ Provenance improvements from 0.25.1
++ More dynamic shape ops
++ More informative errors
++ Additional details on quantization
++ Index updates
++ API updates
 + All ops support ``Output<Node>`` arguments
 + Additional ops
 + ONNX handling unknown domains
diff --git a/doc/sphinx/source/training/index.rst b/doc/sphinx/source/training/index.rst
index 44d5e3b2782..bdc665e08ed 100644
--- a/doc/sphinx/source/training/index.rst
+++ b/doc/sphinx/source/training/index.rst
@@ -5,10 +5,14 @@
 Distributed Training
 ####################
 
+.. important:: Distributed training for CPU backend is not supported. Distributed 
+   training support is provided only with the Intel® Nervana™ Neural Network Processor
+   for Training (NNP-T).
 
 .. toctree::
    :maxdepth: 1
    
-   overview.rst
    data_ingest.rst
 
+
+
diff --git a/doc/sphinx/source/training/overview.rst b/doc/sphinx/source/training/overview.rst
index 14f3cd136e8..6b088250092 100644
--- a/doc/sphinx/source/training/overview.rst
+++ b/doc/sphinx/source/training/overview.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. training/overview.rst:
 
 .. _overview:
@@ -5,28 +7,3 @@
 Basic Concepts
 ==============
 
-.. important:: Distributed training is not officially supported as of version
-   |version|; however, some configuration options have worked for nGraph 
-   devices in testing environments.
-
-
-Data scientists with locally-scalable rack or cloud-based resources will likely
-find it worthwhile to experiment with different modes or variations of
-distributed training. Deployments using nGraph Library with supported backends
-can be configured to train with data parallelism and will soon work with model
-parallelism. Distributing workloads is increasingly important, as more data and
-bigger models mean the ability to :doc:`../core/constructing-graphs/distribute-train`
-work with larger and larger datasets, or to work with models having many layers
-that aren't designed to fit to a single device.
-
-Distributed training with data parallelism splits the data and each worker
-node has the same model; during each iteration, the gradients are aggregated
-across all workers with an op that performs "allreduce", and applied to update
-the weights.
-
-Using multiple machines helps to scale and speed up deep learning. With large 
-mini-batch training, one could train ResNet-50 with Imagenet-1k data to the
-*Top 5* classifier in minutes using thousands of CPU nodes. See
-`arxiv.org/abs/1709.05011`_.
-
-.. _arxiv.org/abs/1709.05011: https://arxiv.org/format/1709.05011
\ No newline at end of file

From b3db038eb592e7ef8b8648d82c62ade503481d69 Mon Sep 17 00:00:00 2001
From: Chris Sullivan <chris.sullivan@intel.com>
Date: Mon, 13 Jan 2020 20:23:35 -0800
Subject: [PATCH 07/12] Fix op::Pad::set_padding_above to actually set
 m_padding_above. (#4169)

Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
---
 src/ngraph/op/pad.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ngraph/op/pad.hpp b/src/ngraph/op/pad.hpp
index 08a2bdac5d6..fa7b13586cd 100644
--- a/src/ngraph/op/pad.hpp
+++ b/src/ngraph/op/pad.hpp
@@ -61,7 +61,7 @@ namespace ngraph
                 const CoordinateDiff& get_padding_above() const { return m_padding_above; }
                 void set_padding_above(const CoordinateDiff& padding_above)
                 {
-                    m_padding_below = padding_above;
+                    m_padding_above = padding_above;
                 }
 
                 /// \brief DEPRECATED. This is just a stub for backends that used to implement the

From 6de4893b4f95883956cfbc03c56913a001168f0d Mon Sep 17 00:00:00 2001
From: Nagy Mostafa <nagy.h.mostafa@intel.com>
Date: Mon, 13 Jan 2020 20:55:08 -0800
Subject: [PATCH 08/12] [MLIR] In-place memory optimization for elt-wise and
 concat ops. (#3832)

* AliasMap WIP

* Added liveness info

* WIP

* WIP: Tests

* WIP: LIT tests

* Added knobs for mem optimization pass. More LIT tests

* Revert affine_lowerer change

* More elaborate comment

* Minor fixes

* style-apply

* Rename liveness

* Switch to Analysis framework

* Fix optimization conditions

* Remove LIT test

* style

* Switch to equivalence relationship impl of non-alias relationship

* refined comment

* Switch non-alias to equivalence relationship

* Fix bad merge

* Adding tests. WIP

* Added buffer size tracking and unit-tests

* Added LIT and unit-tests

* Turn optimization ON

* style

* fix unit-tests

* Fix useCount

* Fix copyright and typo

* Refine few comments, remove new lines

* style fix

Co-authored-by: Scott Cyphers <diyessi@users.noreply.github.com>
Co-authored-by: Sang Ik Lee <sang.ik.lee@intel.com>
---
 src/contrib/mlir/CMakeLists.txt               |   2 +-
 .../mlir/backend/analysis/memory_analysis.cpp | 613 ++++++++++++++++++
 .../mlir/backend/analysis/memory_analysis.hpp |  79 +++
 src/contrib/mlir/backend/cpu/cpu_backend.cpp  |  17 -
 .../mlir/backend/pass/affine_lowerer.cpp      | 121 +++-
 .../mlir/backend/pass/memory_optimization.cpp | 160 -----
 .../mlir/backend/pass/memory_optimization.hpp |  27 -
 src/contrib/mlir/backend/pass/op_lowerers.inc |   2 +-
 src/contrib/mlir/core/ngraph_dialect/ops.cpp  |  22 -
 src/contrib/mlir/core/ngraph_dialect/ops.hpp  |   4 -
 test/backend/concat.in.cpp                    | 137 ++++
 test/mlir/affine_conversion/memory_opt.mlir   | 128 ++++
 12 files changed, 1053 insertions(+), 259 deletions(-)
 create mode 100644 src/contrib/mlir/backend/analysis/memory_analysis.cpp
 create mode 100644 src/contrib/mlir/backend/analysis/memory_analysis.hpp
 delete mode 100644 src/contrib/mlir/backend/pass/memory_optimization.cpp
 delete mode 100644 src/contrib/mlir/backend/pass/memory_optimization.hpp
 create mode 100644 test/mlir/affine_conversion/memory_opt.mlir

diff --git a/src/contrib/mlir/CMakeLists.txt b/src/contrib/mlir/CMakeLists.txt
index 10494225c48..a405ae924dc 100644
--- a/src/contrib/mlir/CMakeLists.txt
+++ b/src/contrib/mlir/CMakeLists.txt
@@ -25,7 +25,7 @@ add_subdirectory(tools/ngraph-opt)
 set(SRC
     backend/cpu/cpu_backend.cpp
     backend/pass/affine_lowerer.cpp
-    backend/pass/memory_optimization.cpp
+    backend/analysis/memory_analysis.cpp
     core/compiler.cpp
     core/ngraph_dialect/dialect.cpp
     core/ngraph_dialect/type.cpp
diff --git a/src/contrib/mlir/backend/analysis/memory_analysis.cpp b/src/contrib/mlir/backend/analysis/memory_analysis.cpp
new file mode 100644
index 00000000000..17fd28102c0
--- /dev/null
+++ b/src/contrib/mlir/backend/analysis/memory_analysis.cpp
@@ -0,0 +1,613 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
+
+#include "memory_analysis.hpp"
+#include "contrib/mlir/core/compiler.hpp"
+#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
+#include "contrib/mlir/core/ngraph_dialect/type.hpp"
+
+#include <llvm/ADT/BitVector.h>
+#include <llvm/ADT/DenseSet.h>
+#include <map>
+#include <mlir/EDSC/Builders.h>
+#include <mlir/EDSC/Helpers.h>
+#include <mlir/EDSC/Intrinsics.h>
+#include <mlir/IR/AffineExpr.h>
+#include <mlir/IR/IntegerSet.h>
+#include <mlir/IR/MLIRContext.h>
+#include <mlir/IR/StandardTypes.h>
+#include <mlir/Pass/Pass.h>
+#include <mlir/Transforms/DialectConversion.h>
+
+static llvm::cl::opt<bool> clEnableNgInPlaceMemory(
+    "ngraph-memory-opt",
+    llvm::cl::init(true),
+    llvm::cl::desc("Enable ngraph dialect in-place memory optimization pass"));
+
+static llvm::cl::opt<bool>
+    clEnableNgInPlaceConcat("ngraph-memory-opt-concat",
+                            llvm::cl::init(true),
+                            llvm::cl::desc("Enable inplace concat optimization"));
+
+static llvm::cl::opt<bool>
+    clEnableNgInPlaceEltWise("ngraph-memory-opt-eltwise",
+                             llvm::cl::init(true),
+                             llvm::cl::desc("Enable inplace element wise optimization"));
+
+// anonymous namespace
+// no need to expose any of the following outside of this file
+namespace
+{
+    using namespace ngraph::runtime;
+    using namespace ngraph::runtime::ngmlir;
+    using namespace mlir;
+
+    // A helper data-structure to track cannot alias relationship between
+    // tensor syms. If NoAlias[T] contains S, then T and S cannot alias.
+    // The relationship is an equivalence (transitive, symmetric, reflexive)
+    // Initially each sym is put in its own equivalence class (set).
+    // If two syms a and b are found to be non-alias (equivalent),
+    // their equivalence classes are unioned
+    class AliasRelation
+    {
+    public:
+        /// Initialize the relationship for a number of syms
+        void init(std::unordered_set<Value*>& symbols);
+        /// Checks if values a and b can alias
+        bool canAlias(Value* a, Value* b);
+        void insertNoAlias(Value* a, Value* b);
+
+    private:
+        using BV = llvm::BitVector;
+        std::unordered_map<Value*, unsigned> m_valueToIdx;
+        std::unordered_map<unsigned, Value*> m_idxToValue;
+        std::unordered_map<Value*, BV*> m_valueToSet;
+        SmallVector<BV, 10> m_sets;
+    };
+
+    // Simple single basic block liveness analysis
+    // TODO: Replace with MLIR's liveness analysis
+    class LivenessAnalysis
+    {
+    public:
+        bool isLive(Value* v);
+        void setLive(Value* v);
+        void kill(Value* v);
+        void getLiveValues(llvm::SmallVectorImpl<Value*>& values);
+        void reset();
+
+    private:
+        unsigned m_maxIdx = 0;
+        SmallVector<bool, 10> m_liveness;
+        std::unordered_map<Value*, unsigned> m_valueToIdx;
+    };
+
+    // Memory Assignment analysis
+    // Tries to find operations that can be done in place where applicable
+    // by assigning a virtual buffer ID to values.
+    // The buffer assignment is used later in affine lowering pass to create
+    // or re-use memrefs
+    class MemoryAssignment
+    {
+    public:
+        MemoryAssignment(MemoryAnalysis* memAnalysis)
+            : m_memAnalysis(memAnalysis)
+        {
+            m_inplaceOps = {
+#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
+#include "contrib/mlir/backend/pass/op_lowerers.inc"
+            };
+            m_bufferId = 0;
+        }
+        void run(ModuleOp* module);
+
+    private:
+        void processDestructiveInPlace(mlir::Operation* op);
+        void processConcat(mlir::Operation* op);
+        bool isSafeInPlace(mlir::Operation* op);
+        bool isInputOrOutputValue(mlir::Value* value);
+        LivenessAnalysis m_liveness;
+        AliasRelation m_aliasRelation;
+        std::unordered_map<std::string, bool> m_inplaceOps;
+        int m_bufferId;
+        MemoryAnalysis* m_memAnalysis;
+    };
+
+    // helpers
+    // Determines the buffer size a value needs based on its type
+    // offset is where that value should start in the buffer
+    static unsigned getBufferSizeForOperand(mlir::Value* value, int offset);
+
+    // Go backwards over instructions
+    //
+    // Re-use buffers if none of the dst/srcs are input/output of the sub-graph
+    //
+    // For destructive in-place ops (elt-wise):
+    //      - Find first src where it is last use (src is dead).
+    //        If all srcs are last-use, then pick one with lower number of uses.
+    //        If no src is found, bail out.
+    //      - If dst has pre-assigned buffer/offset, then copy them to src.
+    //        If not, assign new buffer to both dst and src.
+    //      - Mark all live syms at this point to not alias src
+    //
+    // For non-Destructive in-place ops:
+    //      Concat:
+    //          - Reuse buffer if
+    //              - Concat axis is most-significant non-one axis, and
+    //              - all operands can alias dest.
+    //          - If dst has an assignment, copy it over to srcs as long as
+    //          there is no conflicting src pre-assignment
+    //          - If dst has no assignment, and all srcs have no assignment,
+    //          assign new buffer to dst and srcs
+    //
+    //      Slice: TBD
+    //      Reshape: TBD
+    //
+    // Update liveness info
+    void MemoryAssignment::run(ModuleOp* module)
+    {
+        if (!clEnableNgInPlaceMemory)
+        {
+            // Optimization disabled
+            return;
+        }
+        SmallVector<FuncOp, 2> funcOps(module->getOps<FuncOp>());
+
+        if (funcOps.size() > 1 || funcOps.empty())
+        {
+            // single func for now
+            return;
+        }
+        auto f = funcOps.back();
+        auto& blocks = f.getBlocks();
+        if (blocks.size() != 1)
+        {
+            // single block func for now
+            return;
+        }
+        auto& block = *(blocks.begin());
+
+        // count number of syms in the code and initialize alias relationship
+        std::unordered_set<Value*> syms;
+
+        for (auto it = block.begin(); it != block.end(); it++)
+        {
+            Operation* op = &(*it);
+            for (auto it : op->getResults())
+            {
+                Value* v = it;
+                if (syms.find(v) == syms.end())
+                {
+                    syms.insert(v);
+                }
+            }
+            for (auto it : op->getOperands())
+            {
+                Value* v = it;
+                if (syms.find(v) == syms.end())
+                {
+                    syms.insert(v);
+                }
+            }
+        }
+        m_aliasRelation.init(syms);
+        // scan instructions backwards
+        for (auto it = block.rbegin(); it != block.rend(); it++)
+        {
+            Operation* op = &(*it);
+
+            if (isSafeInPlace(op))
+            {
+                // TODO: replace with Op Interface check
+                if (dyn_cast<NGConcatOp>(op))
+                {
+                    if (clEnableNgInPlaceConcat)
+                        processConcat(op);
+                }
+                else
+                {
+                    if (clEnableNgInPlaceEltWise)
+                        processDestructiveInPlace(op);
+                }
+            }
+            // update liveness info
+            for (auto dit : op->getResults())
+            {
+                m_liveness.kill(dit);
+            }
+            for (auto uit : op->getOperands())
+            {
+                m_liveness.setLive(uit);
+            }
+        }
+    }
+
+    void MemoryAssignment::processConcat(mlir::Operation* op)
+    {
+        auto concat = cast<mlir::NGConcatOp>(op);
+        {
+            // concat on the highest non-one axis
+            auto concatAxis = concat.concatenation_axis();
+            auto result = concat.getResult();
+            auto shape = (result->getType().cast<NGTensorType>()).getShape();
+            std::vector<int> opndOffsets;
+            BufferInfo bufferInfo;
+            int bufferId = -1, baseOffset = 0;
+            unsigned bufferSize = 0;
+
+            if (isInputOrOutputValue(op->getResult(0)))
+            {
+                // dst is output, bail out
+                return;
+            };
+
+            for (auto i = 0; i < shape.size(); i++)
+            {
+                if (i == concatAxis)
+                {
+                    break;
+                }
+                if (shape[i] != 1)
+                {
+                    return;
+                }
+            }
+            // check that all operands and dst can alias
+            // and that none is input or output
+            for (auto opnd : op->getOperands())
+            {
+                if (!m_aliasRelation.canAlias(result, opnd) || isInputOrOutputValue(opnd))
+                {
+                    return;
+                }
+            }
+            // calculate relative offsets in the output buffer
+            int opndOffset = 0;
+            for (auto i = 0; i < op->getNumOperands(); i++)
+            {
+                if (i == 0)
+                {
+                    opndOffsets.push_back(0);
+                }
+                else
+                {
+                    auto opnd = op->getOperand(i - 1);
+                    auto tensorType = opnd->getType().cast<NGTensorType>();
+                    opndOffset += tensorType.getNumElements();
+                    opndOffsets.push_back(opndOffset);
+                }
+            }
+            // check for consistent pre-existing buffer assignments
+            bufferInfo = m_memAnalysis->getBufferInfo(op);
+            // if dest has an assignment
+            if (bufferInfo.isValid())
+            {
+                // set buffer ID and base offset to that of dest's
+                bufferId = bufferInfo.m_bufferId;
+                baseOffset = bufferInfo.m_offset;
+                // check if we can re-use it for all src operands
+                int bufferOffset = 0;
+                for (auto i = 0; i < op->getNumOperands(); i++)
+                {
+                    auto opnd = op->getOperand(i);
+                    auto defOp = opnd->getDefiningOp();
+                    NGRAPH_CHECK(defOp != nullptr, "Defining operation expected");
+                    // calculate expected absolute offset in the buffer
+                    bufferOffset = baseOffset + opndOffsets[i];
+
+                    bufferInfo = m_memAnalysis->getBufferInfo(defOp);
+                    if (bufferInfo.isValid())
+                    {
+                        if (bufferInfo.m_bufferId != bufferId ||
+                            bufferInfo.m_offset != bufferOffset)
+                        {
+                            // buffer ID or offset mismatch, bailout
+                            return;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // dst has no buffer assignment
+                // TODO:
+                // We can re-use an existing assignment of a src operand if
+                // Every other src either:
+                //    a. has a matching pre-assigned buffer ID and offset, or
+                //    b. is unassigned a buffer/offset, and the computed offset is valid
+                //       (non-negative), and no other live tensor aliases the chunk
+                //       of the buffer we want to assign.
+                //       To achieve this, we need to track buffer->{tensor,offset,size} and
+                //       perform the check
+                //
+                // Example:
+                // V1   = Concat    S0 (?), S1{0,16}, S2 (?)
+                // R0   = ...
+                // R2   = ...
+                // V2   = Concat    R0{0, 0}, S1 {0,16}, R2{0,32}
+                //
+                // For the first concat, we could use the assignment of S1 (from second concat)
+                // to define assignments for S0 and S2, and since R0, R2 are dead, no live tensors
+                // alias into the buffer, and the assignment is valid.
+                //
+                // On the other hand, the following is invalid
+                // Example:
+                // R0   = ...
+                // V1   = Concat    S0(?), S1(0,16), S2(?)
+                // R2   = ...
+                // V2   = Concat    R0, S1{0,16}, R2
+                // Reusing assignment of S1 in the first concat will cause S0 and R0 to alias.
+                // And since R0 is alive the write to R0 will overwrite S0.
+                // For now, assign only if all srcs have no prior assignments
+                for (auto opnd : op->getOperands())
+                {
+                    if (m_memAnalysis->getBufferInfo(opnd->getDefiningOp()).isValid())
+                    {
+                        return;
+                    }
+                }
+            }
+            // We didn't find any pre-existing buffer assignment, create a new buffer
+            if (bufferId == -1)
+            {
+                bufferId = m_bufferId++;
+                baseOffset = 0;
+            }
+
+            // adjust the buffer size based on this instruction
+            // max size is determined from dst offset and type
+            bufferSize = getBufferSizeForOperand(op->getResult(0), baseOffset);
+            m_memAnalysis->setBufferSize(bufferId, bufferSize);
+
+            // Update analysis map. No need to check if we are over-writing previous entries
+            // since they should all match.
+            m_memAnalysis->setBufferInfo(op, {bufferId, baseOffset});
+            for (auto i = 0; i < op->getNumOperands(); i++)
+            {
+                auto opnd = op->getOperand(i);
+                auto defOp = opnd->getDefiningOp();
+                NGRAPH_CHECK(defOp != nullptr, "Defining operation expected");
+                auto opndOffset = baseOffset + opndOffsets[i];
+                m_memAnalysis->setBufferInfo(defOp, {bufferId, opndOffset});
+            }
+        }
+    }
+
+    void MemoryAssignment::processDestructiveInPlace(mlir::Operation* op)
+    {
+        NGRAPH_CHECK(op->getNumResults() == 1, "Destructive in-place with multi-def ?");
+        Value* use = nullptr;
+        int useCount = -1;
+
+        if (isInputOrOutputValue(op->getResult(0)))
+        {
+            // dst is output, bail out
+            return;
+        };
+        // pick a dead operand that is not an input or output with the least number of uses
+        for (auto opnd : op->getOperands())
+        {
+            if (!m_liveness.isLive(opnd) && !isInputOrOutputValue(opnd))
+            {
+                int uses = 0;
+                for (auto& i : opnd->getUses())
+                {
+                    uses++;
+                }
+                if (useCount == -1 || uses < useCount)
+                {
+                    use = opnd;
+                    useCount = uses;
+                }
+            }
+        }
+        if (!use)
+        {
+            return;
+        }
+        // assign new buffer or copy buffer info from dst
+        auto bufferInfo = m_memAnalysis->getBufferInfo(op);
+        if (!bufferInfo.isValid())
+        {
+            // attach a new buffer id, and 0 offset on obth src and result
+            bufferInfo = {m_bufferId++, 0};
+            m_memAnalysis->setBufferInfo(op, bufferInfo);
+            m_memAnalysis->setBufferInfo(use->getDefiningOp(), bufferInfo);
+        }
+        else
+        {
+            // copy result buffer id and offset to src
+            m_memAnalysis->setBufferInfo(use->getDefiningOp(), bufferInfo);
+        }
+        auto bufferSize = 0;
+        bufferSize = getBufferSizeForOperand(op->getResult(0), bufferInfo.m_offset);
+        m_memAnalysis->setBufferSize(bufferInfo.m_bufferId, bufferSize);
+        // update aliasing info
+        // use value cannot alias any live value
+        SmallVector<Value*, 10> liveValues;
+        m_liveness.getLiveValues(liveValues);
+        for (auto& value : liveValues)
+        {
+            m_aliasRelation.insertNoAlias(use, value);
+        }
+    }
+    bool MemoryAssignment::isInputOrOutputValue(mlir::Value* value)
+    {
+        auto defOp = value->getDefiningOp();
+        // If no defining op, then this is a block arg, skip operand
+        //
+        // TODO: This check is assuming single BB function, improve to handle control-flow.
+        // In which case, we have to track block args to all pred branches that feed them,
+        // all the way up to the initial def, if any, or entry block arg. This is preferably
+        // done as a pre-pass to capture all inputs/output values.
+        if (!defOp)
+        {
+            return true;
+        }
+        // If the defined value is an output of the sub-graph, cannot do it in place
+        //
+        // TODO: Improve to support control flow. Track value use-chain along branches/block-args,
+        // if we hit a use in a return, it is an output value.
+        for (auto& use : value->getUses())
+        {
+            auto useOp = use.getOwner();
+            if (isa<NGReturnOp>(useOp))
+            {
+                return true;
+            }
+        }
+        return false;
+    }
+    // TODO Change this to use interfaces.
+    bool MemoryAssignment::isSafeInPlace(mlir::Operation* op)
+    {
+        auto it = m_inplaceOps.find(op->getName().getStringRef().str());
+
+        return it != m_inplaceOps.end() ? it->second : false;
+    }
+
+    void AliasRelation::init(std::unordered_set<Value*>& symbols)
+    {
+        unsigned numSyms = symbols.size();
+        m_sets.resize(numSyms);
+        for (auto& bv : m_sets)
+        {
+            bv.resize(numSyms);
+        }
+        // populate id->value and value->id maps
+        unsigned i = 0;
+        for (auto v : symbols)
+        {
+            m_idxToValue[i] = v;
+            m_valueToIdx[v] = i;
+            m_valueToSet[v] = &m_sets[i];
+            // set bit for that value
+            m_sets[i].set(i);
+            i++;
+        }
+    }
+
+    bool AliasRelation::canAlias(Value* a, Value* b)
+    {
+        // check if a and b are in the same set
+        return m_valueToSet[a] != m_valueToSet[b];
+    }
+
+    void AliasRelation::insertNoAlias(Value* a, Value* b)
+    {
+        // union the two sets that a and b belong to
+        // update the maps accordingly
+        if (!canAlias(a, b))
+        {
+            // nothing to do
+            return;
+        }
+        // union the two sets of a and b
+        BV* aSet = m_valueToSet[a];
+        BV* bSet = m_valueToSet[b];
+        BV uSet = (*aSet);
+        uSet |= (*bSet);
+        // replace aSet with union
+        auto pSet = m_valueToSet[a];
+        *pSet = uSet;
+        // update value to set maps
+        for (auto it = pSet->set_bits_begin(); it != pSet->set_bits_end(); it++)
+        {
+            unsigned id = *it;
+            auto value = m_idxToValue[id];
+            m_valueToSet[value] = pSet;
+        }
+    }
+
+    void LivenessAnalysis::reset()
+    {
+        m_valueToIdx.clear();
+        m_liveness.clear();
+        m_maxIdx = 0;
+    }
+
+    void LivenessAnalysis::getLiveValues(llvm::SmallVectorImpl<Value*>& values)
+    {
+        for (auto& entry : m_valueToIdx)
+        {
+            if (m_liveness[entry.second])
+            {
+                values.push_back(entry.first);
+            }
+        }
+    }
+
+    bool LivenessAnalysis::isLive(Value* v)
+    {
+        auto it = m_valueToIdx.find(v);
+        if (it == m_valueToIdx.end())
+        {
+            return false;
+        }
+        return m_liveness[it->second];
+    }
+
+    void LivenessAnalysis::setLive(Value* v)
+    {
+        auto it = m_valueToIdx.find(v);
+        if (it == m_valueToIdx.end())
+        {
+            m_valueToIdx[v] = m_maxIdx++;
+            m_liveness.push_back(true);
+            NGRAPH_CHECK(m_liveness.size() == m_maxIdx);
+        }
+        else
+        {
+            m_liveness[it->second] = true;
+        }
+    }
+
+    void LivenessAnalysis::kill(Value* v)
+    {
+        auto it = m_valueToIdx.find(v);
+        if (it == m_valueToIdx.end())
+        {
+            // already dead
+            return;
+        }
+        m_liveness[it->second] = false;
+    }
+    // helpers
+    unsigned getBufferSizeForOperand(mlir::Value* value, int offset)
+    {
+        auto tensorType = value->getType().dyn_cast<NGTensorType>();
+        NGRAPH_CHECK(tensorType, "Invalid type to find buffer size for");
+
+        unsigned bufferSize = offset * std::ceil(tensorType.getElementBitWidth() / 8);
+        bufferSize += tensorType.getSizeInBytes();
+
+        return bufferSize;
+    }
+}
+
+namespace mlir
+{
+    MemoryAnalysis::MemoryAnalysis(Operation* op)
+    {
+        MemoryAssignment memoryAssignment(this);
+        auto moduleOp = dyn_cast<ModuleOp>(op);
+        NGRAPH_CHECK(moduleOp != nullptr, "Expecting FuncOp for anaylsis");
+        memoryAssignment.run(&moduleOp);
+    }
+} // namespace mlir
diff --git a/src/contrib/mlir/backend/analysis/memory_analysis.hpp b/src/contrib/mlir/backend/analysis/memory_analysis.hpp
new file mode 100644
index 00000000000..9881ce2774d
--- /dev/null
+++ b/src/contrib/mlir/backend/analysis/memory_analysis.hpp
@@ -0,0 +1,79 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
+
+#pragma once
+
+#include <mlir/Pass/Pass.h>
+#include <unordered_map>
+#include "ngraph/check.hpp"
+
+namespace mlir
+{
+    // BufferInfo
+    struct BufferInfo
+    {
+        // Buffer Id. If -1 then invalid buffer.
+        int m_bufferId;
+        // Offset into the buffer
+        int m_offset;
+        bool isValid() const { return m_bufferId != -1; }
+    };
+
+    struct MemoryAnalysis
+    {
+        using BufferInfoMap = std::unordered_map<Operation*, BufferInfo>;
+        using BufferSizeMap = std::unordered_map<unsigned, unsigned>;
+        // Compute this analysis with the provided operation.
+        MemoryAnalysis(Operation* op);
+        BufferInfo getBufferInfo(Operation* op)
+        {
+            auto it = m_bufferInfo.find(op);
+            if (it == m_bufferInfo.end())
+            {
+                return {-1, -1};
+            }
+            return it->second;
+        }
+        void setBufferInfo(Operation* op, BufferInfo bufferInfo) { m_bufferInfo[op] = bufferInfo; }
+        void setBufferSize(unsigned bufferId, unsigned size)
+        {
+            auto it = m_bufferSize.find(bufferId);
+            if (it != m_bufferSize.end())
+            {
+                it->second = (size > it->second) ? size : it->second;
+            }
+            else
+            {
+                m_bufferSize[bufferId] = size;
+            }
+        }
+        unsigned getBufferSize(unsigned bufferId)
+        {
+            auto it = m_bufferSize.find(bufferId);
+            NGRAPH_CHECK(it != m_bufferSize.end(), "Buffer has no size!");
+            return it->second;
+        }
+
+    private:
+        // Records assignment of BufferInfo to each inplace op
+        BufferInfoMap m_bufferInfo;
+        // Records buffer size required for each buffer id in bytes
+        BufferSizeMap m_bufferSize;
+    };
+}
diff --git a/src/contrib/mlir/backend/cpu/cpu_backend.cpp b/src/contrib/mlir/backend/cpu/cpu_backend.cpp
index 017d38761d9..18b60d4b616 100644
--- a/src/contrib/mlir/backend/cpu/cpu_backend.cpp
+++ b/src/contrib/mlir/backend/cpu/cpu_backend.cpp
@@ -19,7 +19,6 @@
 
 #include "cpu_backend.hpp"
 #include "contrib/mlir/backend/pass/affine_lowerer.hpp"
-#include "contrib/mlir/backend/pass/memory_optimization.hpp"
 #include "contrib/mlir/utils.hpp"
 #include "ngraph/check.hpp"
 
@@ -160,7 +159,6 @@ void MLIRCPUBackend::init()
 
 void MLIRCPUBackend::codegen()
 {
-    optimizeNgDialect();
     lowerNgDialect();
 }
 
@@ -261,18 +259,3 @@ void MLIRCPUBackend::optimizeAffineDialect()
     // Run Std dialect optimizations.
     // TODO
 }
-
-void MLIRCPUBackend::optimizeNgDialect()
-{
-    mlir::PassManager pm(&m_context);
-    mlir::applyPassManagerCLOptions(pm);
-    if (clEnableNgInPlaceMemoryOpt)
-    {
-        pm.addPass(mlir::createMemoryOptimizationPass());
-    }
-
-    if (failed(pm.run(m_module.get())))
-    {
-        NGRAPH_CHECK(false, "MLIR pass manager failed");
-    }
-}
diff --git a/src/contrib/mlir/backend/pass/affine_lowerer.cpp b/src/contrib/mlir/backend/pass/affine_lowerer.cpp
index 4843e04e63d..fe995790210 100644
--- a/src/contrib/mlir/backend/pass/affine_lowerer.cpp
+++ b/src/contrib/mlir/backend/pass/affine_lowerer.cpp
@@ -19,11 +19,13 @@
 
 #include "affine_lowerer.hpp"
 
+#include "contrib/mlir/backend/analysis/memory_analysis.hpp"
 #include "contrib/mlir/core/ngraph_dialect/ops.hpp"
 #include "contrib/mlir/core/ngraph_dialect/type.hpp"
 #include "ngraph/assertion.hpp"
 
 #include <llvm/ADT/DenseSet.h>
+#include <llvm/Support/Debug.h>
 #include <mlir/EDSC/Builders.h>
 #include <mlir/EDSC/Helpers.h>
 #include <mlir/EDSC/Intrinsics.h>
@@ -165,6 +167,8 @@ namespace
     ValueHandle createZeroConstant(mlir::Type type);
     ValueHandle createOneConstant(mlir::Type type);
 
+    bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass);
+
     /// Conversion from types in the nGraph dialect to the Standard dialect.
     class NGraphTypeConverter : public TypeConverter
     {
@@ -184,29 +188,25 @@ namespace
         void runOnModule() override;
 
         SmallVector<Value*, 4> buildOutputDefs(Operation* op, PatternRewriter& rewriter);
-        /// Allocates a linear buffer for a temporary tensor
-        Value* createTempBuffer(Type type, PatternRewriter& rewriter);
-
+        /// Allocates a linear buffer for a temporary memref that shares its
+        /// underlying memory. Used in conjunction with createTempMemref
+        Value* createTempBuffer(int bufferId, PatternRewriter& rewriter);
         /// Creates an allocation or view of a memref.
         /// type     MemRef Type
         /// buffer   Optional buffer value to create view over
         /// offset   Optional offset into the buffer this view starts at
         ///
-        /// If buffer is null, a new allocation of a memref is created.
-        /// Offset is ignored.  If buffer is non-null, then we create a temp
-        /// view over a pre-allocated buffer (see createTempBuffer)
-
+        /// If buffer is null it allocates a Memref directly and Offset is ignored.
+        /// If not, it creates a view over the pre-allocated buffer at the given offset.
         Value*
             createTempMemref(Type type, Value* buffer, unsigned offset, PatternRewriter& rewriter);
-
         /// Inserts dealloc Ops for each temporary allocated by AllocOp
         void insertDeallocs(PatternRewriter& rewriter);
-
         NGraphTypeConverter& getTypeConverter() { return typeConverter; }
+        MemoryAnalysis* getMemAnalysis() const { return m_memAnalysis; }
     private:
         /// Collect a set of patterns to convert from the nGraph dialect to Affine dialect.
         void populateNGraphToAffineConversionPatterns(OwningRewritePatternList& patterns);
-
         void findOutputValues();
         void insertNoAliasArgAttrs();
 
@@ -219,7 +219,7 @@ namespace
         // Track pre-assigned buffers  for each Value and re-use it if one is available.
         using IdToMemRefMap = std::unordered_map<unsigned, Value*>;
         IdToMemRefMap m_id_to_memref;
-
+        MemoryAnalysis* m_memAnalysis;
         // TODO: Workaround for findOutputValues and buildOutputDefs. See NGCPU-470.
         std::string funcName;
     };
@@ -232,6 +232,9 @@ namespace
 
         populateNGraphToAffineConversionPatterns(patterns);
 
+        // Get Memory analysis for in-place memory optimizations
+        m_memAnalysis = &getAnalysis<MemoryAnalysis>();
+
         // Create target that defines legal ops for nGraph dialect to be lowered to.
         ConversionTarget target(getContext());
 
@@ -336,24 +339,25 @@ namespace
                 // will re-use the same buffer.
                 auto tensorType = origResult->getType().cast<NGTensorType>();
                 Value* newResult = nullptr;
-                Attribute bufferIdAttr = getBufferId(op);
+                auto bufferInfo = m_memAnalysis->getBufferInfo(op);
                 Type memRefType = typeConverter.convertType(tensorType);
-
                 Value* bufferValue = nullptr;
-                if (!bufferIdAttr)
+
+                if (!bufferInfo.isValid())
                 {
                     // Allocate new memref
                     newResult = createTempMemref(memRefType, nullptr, 0, rewriter);
                 }
                 else
                 {
-                    unsigned bufferId = bufferIdAttr.cast<IntegerAttr>().getInt();
+                    unsigned bufferId = bufferInfo.m_bufferId;
+                    unsigned offset = bufferInfo.m_offset;
                     // Re-use a buffer if it exist, else create a new one and update map
                     IdToMemRefMap::iterator it = m_id_to_memref.find(bufferId);
                     if (it == m_id_to_memref.end())
                     {
                         // create a new buffer
-                        bufferValue = createTempBuffer(memRefType, rewriter);
+                        bufferValue = createTempBuffer(bufferId, rewriter);
                         m_id_to_memref[bufferId] = bufferValue;
                     }
                     else
@@ -361,7 +365,7 @@ namespace
                         bufferValue = it->second;
                     }
                     // Create a temp view over the linear buffer
-                    newResult = createTempMemref(memRefType, bufferValue, 0, rewriter);
+                    newResult = createTempMemref(memRefType, bufferValue, offset, rewriter);
                 }
                 NGRAPH_CHECK(newResult != nullptr, "Temp memref value is not set");
                 newResults.push_back(newResult);
@@ -370,18 +374,17 @@ namespace
         return newResults;
     }
 
-    Value* DialectLoweringPass::createTempBuffer(Type type, PatternRewriter& rewriter)
+    Value* DialectLoweringPass::createTempBuffer(int bufferId, PatternRewriter& rewriter)
     {
-        MemRefType memRefType = type.cast<MemRefType>();
-
-        NGRAPH_CHECK(memRefType.hasStaticShape(), "Dynamic shapes are not supported");
-
-        // deduce linear buffer shape
-        unsigned sizeInBytes = memRefType.getSizeInBits() / 8;
+        unsigned sizeInBytes = getMemAnalysis()->getBufferSize(bufferId);
+        NGRAPH_CHECK(bufferId >= 0, "Invalid buffer id to allocate");
+        NGRAPH_CHECK(sizeInBytes > 0, "Zero buffer allocation?");
 
+        LLVM_DEBUG(llvm::dbgs() << "Allocating buffer of size " << sizeInBytes << " bytes\n");
         MemRefType bufferType =
-            MemRefType::get({sizeInBytes}, IntegerType::get(8, type.getContext()), {});
+            MemRefType::get({sizeInBytes}, IntegerType::get(8, rewriter.getContext()), {});
 
+        // TODO: Set alignment
         Value* alloc = rewriter.create<mlir::AllocOp>(rewriter.getUnknownLoc(), bufferType);
 
         memRefsToDealloc.push_back(alloc);
@@ -404,7 +407,6 @@ namespace
                                                  unsigned offset,
                                                  PatternRewriter& rewriter)
     {
-        NGRAPH_CHECK(offset == 0, "Only zero offset is supported");
         MemRefType memRefType = type.cast<MemRefType>();
         if (buffer)
         {
@@ -414,7 +416,7 @@ namespace
             // linear
             // buffer
             // This is simply (d0, d1, d2, .. dN-1) --> d0 * S0 + d1 * S1 ... + dN-1 * SN-1
-            // Where Si is the stride along the i_th dimension
+            // Where Si is the stride along the i_th dimension in elements
             auto shape = memRefType.getShape();
             SmallVector<int64_t, 4> strides(shape.size(), 0);
             strides[shape.size() - 1] = 1;
@@ -1503,6 +1505,71 @@ namespace
         }
         NGRAPH_UNREACHABLE("Unsupported type");
     }
+
+    // Given a concat op, it will check if dst and operands have
+    // a valid buffer/offset assignment that will make this op
+    // valid in-place
+    bool isInPlaceConcat(mlir::Operation* op, DialectLoweringPass& pass)
+    {
+        NGRAPH_CHECK(isa<NGConcatOp>(op), "Expecting concat operation");
+        auto concat = cast<NGConcatOp>(op);
+        auto concatAxis = concat.concatenation_axis();
+        auto result = concat.getResult();
+        auto shape = (result->getType().cast<NGTensorType>()).getShape();
+        auto memAnalysis = pass.getMemAnalysis();
+        BufferInfo bufferInfo = memAnalysis->getBufferInfo(op);
+
+        if (!bufferInfo.isValid())
+        {
+            // no buffer assignment to dst, nothing to do
+            return false;
+        }
+
+        auto dstBufferId = bufferInfo.m_bufferId;
+        auto dstOffset = bufferInfo.m_offset;
+
+        LLVM_DEBUG(llvm::dbgs() << ">> Check in-place concat\n");
+        LLVM_DEBUG(op->dump());
+        for (auto i = 0; i < shape.size(); i++)
+        {
+            if (i == concatAxis)
+            {
+                break;
+            }
+            if (shape[i] != 1)
+            {
+                LLVM_DEBUG(llvm::dbgs() << "Axis FAIL. Skipping instruction\n");
+                return false;
+            }
+        }
+        LLVM_DEBUG(llvm::dbgs() << "Axis OK\n");
+
+        // Check if the buffer id and offsets are consistent with what's exepcted
+        LLVM_DEBUG(llvm::dbgs() << "Dst (id, offset) = (" << dstBufferId << ", " << dstOffset
+                                << ")\n");
+        // relative offset in the buffer
+        int opndOffset = 0;
+        for (auto opnd : op->getOperands())
+        {
+            bufferInfo = memAnalysis->getBufferInfo(opnd->getDefiningOp());
+            auto srcBufferId = bufferInfo.m_bufferId;
+            auto srcOffset = bufferInfo.m_offset;
+            LLVM_DEBUG(llvm::dbgs() << "Src (id, offset) = (" << srcBufferId << ", " << srcOffset
+                                    << ")\n");
+            if (!bufferInfo.isValid() || srcBufferId != dstBufferId ||
+                srcOffset != (opndOffset + dstOffset))
+            {
+                // mismatch in buffer IDs or offsets
+                LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets FAIL. Skipping instruction\n");
+                return false;
+            }
+            auto tensorType = opnd->getType().cast<NGTensorType>();
+            opndOffset += tensorType.getNumElements();
+        }
+        LLVM_DEBUG(llvm::dbgs() << "Buffer ID and Offsets OK\n");
+
+        return true;
+    }
 } // namespace
 
 namespace mlir
diff --git a/src/contrib/mlir/backend/pass/memory_optimization.cpp b/src/contrib/mlir/backend/pass/memory_optimization.cpp
deleted file mode 100644
index 004dfd1a7d8..00000000000
--- a/src/contrib/mlir/backend/pass/memory_optimization.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-// NOTE: This file follows nGraph format style and MLIR naming convention since it does
-// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
-
-#include "contrib/mlir/core/compiler.hpp"
-#include "contrib/mlir/core/ngraph_dialect/ops.hpp"
-#include "contrib/mlir/core/ngraph_dialect/type.hpp"
-
-#include "ngraph/assertion.hpp"
-
-#include <llvm/ADT/DenseSet.h>
-#include <map>
-#include <mlir/EDSC/Builders.h>
-#include <mlir/EDSC/Helpers.h>
-#include <mlir/EDSC/Intrinsics.h>
-#include <mlir/IR/AffineExpr.h>
-#include <mlir/IR/IntegerSet.h>
-#include <mlir/IR/MLIRContext.h>
-#include <mlir/IR/StandardTypes.h>
-#include <mlir/Pass/Pass.h>
-#include <mlir/Transforms/DialectConversion.h>
-
-// anonymous namespace
-// no need to expose any of the following outside of this file
-namespace
-{
-    using namespace ngraph::runtime;
-    using namespace ngraph::runtime::ngmlir;
-    using namespace mlir;
-
-    /// Memory Optimization pass
-    /// - Tries to perform operations in place where applicable by assigning a virtual buffer ID
-    ///    to values. Those are used later in affine lowering pass to create or re-use memrefs
-    class MemoryOptimizationPass : public mlir::FunctionPass<MemoryOptimizationPass>
-    {
-    public:
-        MemoryOptimizationPass()
-        {
-            m_inplaceOps = {
-#define MLIR_OP(OP, INPLACE) {OP::getOperationName().str(), INPLACE},
-#include "contrib/mlir/backend/pass/op_lowerers.inc"
-            };
-        }
-        void runOnFunction() override;
-
-    private:
-        bool isSafeInPlace(mlir::Operation* op);
-        std::unordered_map<std::string, bool> m_inplaceOps;
-        static unsigned bufferId;
-    };
-
-    unsigned MemoryOptimizationPass::bufferId = 0;
-
-    void MemoryOptimizationPass::runOnFunction()
-    {
-        auto f = getFunction();
-
-        f.walk([&](mlir::Operation* op) {
-            if (!isSafeInPlace(op))
-            {
-                return;
-            }
-
-            if (op->getNumResults() > 1)
-            {
-                return;
-            }
-
-            auto defVal = op->getResult(0);
-
-            // If the defined value is an output of the sub-graph, cannot do it in place
-            for (auto use = defVal->use_begin(); use != defVal->use_end(); use++)
-            {
-                auto useOp = use->getOwner();
-                if (isa<NGReturnOp>(useOp))
-                {
-                    return;
-                }
-            }
-
-            // Check if we can re-use the buffer of any of the inputs. Conjunction of the following:
-            // - single use value or all uses in the current op
-            // - not an input argument
-
-            // TODO: Check instead if last post-dominating (dataflow-wise) use.
-            for (auto opnd = op->operand_begin(); opnd != op->operand_end(); opnd++)
-            {
-                auto val = *opnd;
-                // we optimize if the val has one use or if all uses are in the current op
-                bool optimize;
-
-                optimize = val->hasOneUse();
-
-                if (!optimize)
-                {
-                    optimize = true;
-                    // check if all uses are in the current op
-                    for (auto use = val->use_begin(); use != val->use_end(); use++)
-                    {
-                        if (use->getOwner() != op)
-                        {
-                            optimize = false;
-                        }
-                    }
-                }
-
-                if (optimize)
-                {
-                    // do we have a buffer id attached to this value
-                    auto defOp = val->getDefiningOp();
-                    // If no defining op, then this is a block arg, skip operand
-                    if (!defOp)
-                    {
-                        continue;
-                    }
-                    IntegerAttr attr = getBufferId(defOp);
-
-                    if (!attr)
-                    {
-                        // attach a new buffer id
-                        attr = setBufferId(defOp, this->bufferId++);
-                    }
-                    // propagate attribute to dst, and we are done
-                    setBufferId(op, attr);
-
-                    return;
-                }
-            }
-        });
-    }
-
-    bool MemoryOptimizationPass::isSafeInPlace(mlir::Operation* op)
-    {
-        auto it = m_inplaceOps.find(op->getName().getStringRef().str());
-        return it != m_inplaceOps.end() ? it->second : false;
-    }
-}
-
-namespace mlir
-{
-    std::unique_ptr<Pass> createMemoryOptimizationPass()
-    {
-        return std::make_unique<MemoryOptimizationPass>();
-    }
-} // namespace mlir
diff --git a/src/contrib/mlir/backend/pass/memory_optimization.hpp b/src/contrib/mlir/backend/pass/memory_optimization.hpp
deleted file mode 100644
index b4041b40b8e..00000000000
--- a/src/contrib/mlir/backend/pass/memory_optimization.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//*****************************************************************************
-// Copyright 2017-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-// NOTE: This file follows nGraph format style and MLIR naming convention since it does
-// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
-
-#pragma once
-
-#include <mlir/Pass/Pass.h>
-
-namespace mlir
-{
-    std::unique_ptr<Pass> createMemoryOptimizationPass();
-}
diff --git a/src/contrib/mlir/backend/pass/op_lowerers.inc b/src/contrib/mlir/backend/pass/op_lowerers.inc
index bbf39097640..d906d8de2b4 100644
--- a/src/contrib/mlir/backend/pass/op_lowerers.inc
+++ b/src/contrib/mlir/backend/pass/op_lowerers.inc
@@ -27,7 +27,7 @@
 MLIR_OP(NGAddOp             , true                  )
 MLIR_OP(NGArgMaxRedOp       , false                 )
 MLIR_OP(NGArgMinRedOp       , false                 )
-MLIR_OP(NGConcatOp          , false                 )
+MLIR_OP(NGConcatOp          , true                  )
 MLIR_OP(NGConvolutionOp     , false                 )
 MLIR_OP(NGDivOp             , true                  )
 MLIR_OP(NGDotOp             , false                 )
diff --git a/src/contrib/mlir/core/ngraph_dialect/ops.cpp b/src/contrib/mlir/core/ngraph_dialect/ops.cpp
index 0b646e854b5..7e6509a42c1 100644
--- a/src/contrib/mlir/core/ngraph_dialect/ops.cpp
+++ b/src/contrib/mlir/core/ngraph_dialect/ops.cpp
@@ -309,28 +309,6 @@ mlir::LogicalResult verifyOp(NGConvolutionOp* op)
     return mlir::success();
 }
 
-static std::string getBufferIdAttrName()
-{
-    return "ng.buffer_id";
-}
-
-void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr)
-{
-    op->setAttr(getBufferIdAttrName(), attr);
-}
-
-mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val)
-{
-    auto attr = mlir::IntegerAttr::get(IntegerType::get(32, op->getContext()), val);
-    setBufferId(op, attr);
-    return attr;
-}
-
-mlir::IntegerAttr getBufferId(mlir::Operation* op)
-{
-    return op->getAttrOfType<mlir::IntegerAttr>(getBufferIdAttrName());
-}
-
 namespace mlir
 {
 #include "ops_interfaces.cpp.inc"
diff --git a/src/contrib/mlir/core/ngraph_dialect/ops.hpp b/src/contrib/mlir/core/ngraph_dialect/ops.hpp
index bef4d1c1fe7..4066daa14ad 100644
--- a/src/contrib/mlir/core/ngraph_dialect/ops.hpp
+++ b/src/contrib/mlir/core/ngraph_dialect/ops.hpp
@@ -41,7 +41,3 @@ namespace mlir
 #include "ops.h.inc"
 #undef GET_OP_CLASSES
 }
-
-void setBufferId(mlir::Operation* op, mlir::IntegerAttr attr);
-mlir::IntegerAttr setBufferId(mlir::Operation* op, unsigned val);
-mlir::IntegerAttr getBufferId(mlir::Operation* op);
diff --git a/test/backend/concat.in.cpp b/test/backend/concat.in.cpp
index 4fb2c7a38ae..46f0be51ce7 100644
--- a/test/backend/concat.in.cpp
+++ b/test/backend/concat.in.cpp
@@ -349,6 +349,143 @@ NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor)
         (vector<float>{3, 7, 2}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_1)
+{
+    Shape shape{1, 2, 2};
+    Shape shape_r{1, 4, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto add1 = make_shared<op::Add>(A, B);
+    auto add2 = make_shared<op::Add>(A, B);
+    auto concat = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
+    auto f = make_shared<Function>(make_shared<op::Add>(concat, concat), ParameterVector{A, B});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a, vector<float>{1, 1, 1, 1});
+    auto b = backend->create_tensor(element::f32, shape);
+    copy_data(b, vector<float>{1, 1, 1, 1});
+
+    auto result = backend->create_tensor(element::f32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    vector<float> expected;
+    expected.resize(8, 4);
+
+    EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_2)
+{
+    Shape shape{1, 2, 2};
+    Shape shape_r{1, 8, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto add1 = make_shared<op::Add>(A, B);
+    auto add2 = make_shared<op::Add>(A, B);
+    auto concat1 = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
+    auto concat2 = make_shared<op::Concat>(NodeVector{add1, add2}, 1);
+    auto concat12 = make_shared<op::Concat>(NodeVector{concat1, concat2}, 1);
+    auto f = make_shared<Function>(make_shared<op::Add>(concat12, concat12), ParameterVector{A, B});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a, vector<float>{1, 1, 1, 1});
+    auto b = backend->create_tensor(element::f32, shape);
+    copy_data(b, vector<float>{1, 1, 1, 1});
+    auto result = backend->create_tensor(element::f32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    vector<float> expected;
+    expected.resize(16, 4);
+
+    EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_tree_3)
+{
+    Shape shape{1, 2, 2};
+    Shape shape_r{1, 16, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto concat1 = make_shared<op::Concat>(NodeVector{A, B}, 1);
+    auto concat2 = make_shared<op::Concat>(NodeVector{A, B}, 1);
+    auto concat3 = make_shared<op::Concat>(NodeVector{A, B}, 1);
+    auto concat4 = make_shared<op::Concat>(NodeVector{A, B}, 1);
+    auto concat12 = make_shared<op::Concat>(NodeVector{concat1, concat2}, 1);
+    auto concat34 = make_shared<op::Concat>(NodeVector{concat3, concat4}, 1);
+    auto concat14 = make_shared<op::Concat>(NodeVector{concat12, concat34}, 1);
+    auto f = make_shared<Function>(make_shared<op::Add>(concat14, concat14), ParameterVector{A, B});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a, vector<float>{1, 1, 1, 1});
+    auto b = backend->create_tensor(element::f32, shape);
+    copy_data(b, vector<float>{1, 1, 1, 1});
+    auto result = backend->create_tensor(element::f32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    vector<float> expected;
+    expected.resize(32, 2);
+
+    EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat)
+{
+    Shape shape{2, 2};
+    Shape shape_r{4, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto add1 = make_shared<op::Add>(A, B);
+    auto add2 = make_shared<op::Add>(add1, add1);
+    auto concat = make_shared<op::Concat>(NodeVector{add1, add2}, 0);
+    auto add3 = make_shared<op::Add>(concat, concat);
+    auto f = make_shared<Function>(add3, ParameterVector{A, B});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a, vector<float>{1, 1, 1, 1});
+    auto b = backend->create_tensor(element::f32, shape);
+    copy_data(b, vector<float>{1, 1, 1, 1});
+    auto result = backend->create_tensor(element::f32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    vector<float> expected = {4, 4, 4, 4, 8, 8, 8, 8};
+    EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_add_concat_2)
+{
+    Shape shape{1, 2, 2};
+    Shape shape_r{1, 6, 2};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto add1 = make_shared<op::Add>(A, B);
+    auto add2 = make_shared<op::Add>(A, B);
+    auto add3 = make_shared<op::Add>(A, B);
+    auto add4 = make_shared<op::Add>(A, B);
+    auto add5 = make_shared<op::Add>(A, B);
+
+    auto concat1 = make_shared<op::Concat>(NodeVector{add1, add2, add3}, 1);
+
+    auto concat2 = make_shared<op::Concat>(NodeVector{add4, add2, add5}, 1);
+
+    auto add6 = make_shared<op::Add>(concat1, concat2);
+    auto f = make_shared<Function>(add6, ParameterVector{A, B});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a, vector<float>{1, 1, 1, 1});
+    auto b = backend->create_tensor(element::f32, shape);
+    copy_data(b, vector<float>{1, 1, 1, 1});
+    auto result = backend->create_tensor(element::f32, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    vector<float> expected = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
+    EXPECT_TRUE(test::all_close_f(expected, read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
+}
 // from numpy import *
 // a=linspace(1,2*3*4*3*2,2*3*4*3*2)
 // b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
diff --git a/test/mlir/affine_conversion/memory_opt.mlir b/test/mlir/affine_conversion/memory_opt.mlir
new file mode 100644
index 00000000000..147807ea5ad
--- /dev/null
+++ b/test/mlir/affine_conversion/memory_opt.mlir
@@ -0,0 +1,128 @@
+// RUN: ngraph-opt %s --split-input-file --ngraph-memory-opt --ngraph-memory-opt-concat --ngraph-memory-opt-eltwise  -convert-ngraph-to-affine  | FileCheck %s
+
+// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1)
+// CHECK-LABEL: test0	
+// CHECK: %[[B:.*]] = alloc() : memref<16xi8>
+// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
+// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
+// CHECK: std.view %[[B]][][] : memref<16xi8> to memref<2x2xf32, #[[MAP0]]>
+// CHECK: dealloc %[[B]] : memref<16xi8>
+func @test0(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32> {
+  %0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
+  %1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
+  %2 = "ng.add"(%1, %1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
+  %3 = "ng.add"(%2, %2) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
+  "ng.return"(%3) : (!ng.tensor<2x2xf32>) -> ()
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1)
+// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * 2 + d1 + 4)
+// CHECK-LABEL: test1
+// CHECK: %[[B:.*]] = alloc() : memref<32xi8>
+// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP0]]>
+// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<2x2xf32, #[[MAP1]]>
+// CHECK: std.view %[[B]][][] : memref<32xi8> to memref<4x2xf32, #[[MAP0]]>
+// CHECK: dealloc %[[B]] : memref<32xi8>
+func @test1(%arg0: !ng.tensor<2x2xf32>, %arg1: !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32> {
+  %0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
+  %1 = "ng.add"(%0, %0) : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<2x2xf32>
+  %2 = "ng.concat"(%0, %1) {concatenation_axis = 0} : (!ng.tensor<2x2xf32>, !ng.tensor<2x2xf32>) -> !ng.tensor<4x2xf32>
+  %3 = "ng.add"(%2, %2) : (!ng.tensor<4x2xf32>, !ng.tensor<4x2xf32>) -> !ng.tensor<4x2xf32>
+  "ng.return"(%3) : (!ng.tensor<4x2xf32>) -> ()
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2)
+// CHECK-DAG: #[[MAP1:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4)
+// CHECK-DAG: #[[MAP2:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2)
+// CHECK-DAG: #[[MAP3:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2)
+// CHECK-LABEL: test2
+// CHECK: %[[B1:.*]] = alloc() : memref<32xi8>
+// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP0]]>
+// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x2x2xf32, #[[MAP1]]>
+// CHECK: std.view %[[B1]][][] : memref<32xi8> to memref<1x4x2xf32, #[[MAP2]]>
+// CHECK: %[[B2:.*]] = alloc() : memref<64xi8>
+// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]>
+// CHECK: std.view %[[B2]][][] : memref<64xi8> to memref<1x8x2xf32, #[[MAP3]]>
+func @test2(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>){
+  %0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+  %1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+  // inplace
+  %2 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
+  // cannot be done inplace, %3 and %2 cannot alias
+  %3 = "ng.concat"(%0, %1, %2) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
+  // inplace destructive. %3 and %2 cannot alias
+  %4 = "ng.add"(%3, %3) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32>
+  
+  // no inplace, result is output
+  %5 = "ng.add"(%2, %2) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x4x2xf32>
+  // no inplace, result is output
+  %6 = "ng.add"(%4, %4) : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x8x2xf32>
+  "ng.return"(%5, %6) : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x8x2xf32>) -> ()
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2)
+// CHECK-DAG: #[[MAP8:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 8)
+// CHECK-DAG: #[[MAP9:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 16)
+// CHECK-DAG: #[[MAP10:[a-zA-Z0-9]+]]  = (d0, d1, d2) -> (d0 * 8 + d1 * 2 + d2 + 24)
+// CHECK-DAG: #[[MAP11:[a-zA-Z0-9]+]]  = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2)
+// CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]]  = (d0, d1, d2) -> (d0 * 16 + d1 * 2 + d2 + 16)
+// CHECK-DAG: #[[MAP13:[a-zA-Z0-9]+]]  = (d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2)
+// CHECK-LABEL: test3
+// CHECK: %[[B:.*]] = alloc() : memref<128xi8>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP0]]>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP8]]>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP9]]>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x4x2xf32, #[[MAP10]]>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP11]]>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x8x2xf32, #[[MAP12]]>
+// CHECK: std.view %[[B]][][] : memref<128xi8> to memref<1x16x2xf32, #[[MAP13]]>
+// CHECK: dealloc %[[B]] : memref<128xi8>
+func @test3(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x16x2xf32> {
+  %0 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
+  %1 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
+  %2 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
+  %3 = "ng.concat"(%arg0, %arg1) {concatenation_axis = 1} : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x4x2xf32>
+  %4 = "ng.concat"(%0, %1) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
+  %5 = "ng.concat"(%2, %3) {concatenation_axis = 1} : (!ng.tensor<1x4x2xf32>, !ng.tensor<1x4x2xf32>) -> !ng.tensor<1x8x2xf32>
+  %6 = "ng.concat"(%4, %5) {concatenation_axis = 1} : (!ng.tensor<1x8x2xf32>, !ng.tensor<1x8x2xf32>) -> !ng.tensor<1x16x2xf32>
+  %7 = "ng.add"(%6, %6) : (!ng.tensor<1x16x2xf32>, !ng.tensor<1x16x2xf32>) -> !ng.tensor<1x16x2xf32>
+  "ng.return"(%7) : (!ng.tensor<1x16x2xf32>) -> ()
+}
+
+// -----
+
+//CHECK-DAG: #[[MAP4:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 4)
+//CHECK-DAG: #[[MAP5:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2)
+//CHECK-DAG: #[[MAP6:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 4 + d1 * 2 + d2 + 8)
+//CHECK-DAG: #[[MAP12:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0 * 12 + d1 * 2 + d2)
+// CHECK-LABEL: test4
+//CHECK: %[[B1:.*]] = alloc() : memref<1x2x2xf32>
+//CHECK: %[[B2:.*]] = alloc() : memref<48xi8>
+//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP4]]>
+//CHECK: %[[B3:.*]] = alloc() : memref<1x2x2xf32>
+//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP5]]>
+//CHECK: std.view %[[B2]][][] : memref<48xi8> to memref<1x2x2xf32, #[[MAP6]]>
+//CHECK: %[[B4:.*]] = alloc() : memref<1x6x2xf32>
+//CHECK: std.view %1[][] : memref<48xi8> to memref<1x6x2xf32, #[[MAP12]]>
+//CHECK: dealloc %[[B1]] : memref<1x2x2xf32>
+//CHECK: dealloc %[[B2]] : memref<48xi8>
+//CHECK: dealloc %[[B3]] : memref<1x2x2xf32>
+//CHECK: dealloc %[[B4]] : memref<1x6x2xf32>
+func @test4(%arg0: !ng.tensor<1x2x2xf32>, %arg1: !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x8x2xf32> {
+    %S0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+    %S1 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+    %S2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+    %R0 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+    %R2 = "ng.add"(%arg0, %arg1) : (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x2x2xf32>
+    // pre-existing assignment of S1 in %D2 prevents assignment for %D1 concat
+    %D1 = "ng.concat"(%S0, %S1, %S2) {concatenation_axis = 1} :  (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32>
+    %D2 = "ng.concat"(%R0, %S1, %R2) {concatenation_axis = 1} :  (!ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>, !ng.tensor<1x2x2xf32>) -> !ng.tensor<1x6x2xf32>
+    %D3 = "ng.add"(%D1, %D2)    : (!ng.tensor<1x6x2xf32>, !ng.tensor<1x6x2xf32>) -> !ng.tensor<1x6x2xf32>
+    "ng.return"(%D3) : (!ng.tensor<1x6x2xf32>) -> ()
+}

From f08372ba99bec78804d8b4ab2ce221603e4a9cd4 Mon Sep 17 00:00:00 2001
From: Tomasz Socha <tomasz.socha@intel.com>
Date: Tue, 14 Jan 2020 06:38:21 +0100
Subject: [PATCH 09/12] [PY] Add flag to compile python API in debug mode
 (#4052)

* [PY] Add flag to compile python API in debug mode

* Move debug and release flags to helper function

* Add missing Docstring

Co-authored-by: Sang Ik Lee <sang.ik.lee@intel.com>
---
 python/setup.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/setup.py b/python/setup.py
index b0d08f82e9a..5ff0dff4431 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -26,6 +26,7 @@
 PYNGRAPH_ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
 NGRAPH_DEFAULT_INSTALL_DIR = os.environ.get('HOME')
 NGRAPH_ONNX_IMPORT_ENABLE = os.environ.get('NGRAPH_ONNX_IMPORT_ENABLE')
+NGRAPH_PYTHON_DEBUG = os.environ.get('NGRAPH_PYTHON_DEBUG')
 
 
 def find_ngraph_dist_dir():
@@ -367,6 +368,13 @@ def _add_extra_compile_arg(self, flag, compile_args):
             return True
         return False
 
+    def add_debug_or_release_flags(self):
+        """Return compiler flags for Release and Debug build types."""
+        if NGRAPH_PYTHON_DEBUG in ['TRUE', 'ON', True]:
+            return ['-O0', '-g']
+        else:
+            return ['-O2', '-D_FORTIFY_SOURCE=2']
+
     def build_extensions(self):
         """Build extension providing extra compiler flags."""
         if sys.platform == 'win32':
@@ -388,7 +396,8 @@ def build_extensions(self):
             add_platform_specific_link_args(ext.extra_link_args)
 
             ext.extra_compile_args += ['-Wformat', '-Wformat-security']
-            ext.extra_compile_args += ['-O2', '-D_FORTIFY_SOURCE=2']
+            ext.extra_compile_args += self.add_debug_or_release_flags()
+
             if sys.platform == 'darwin':
                 ext.extra_compile_args += ['-stdlib=libc++']
         build_ext.build_extensions(self)

From 9bfbd3c6ee3edb8873954332a64dc636a2ad977a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Tue, 14 Jan 2020 15:25:19 +0100
Subject: [PATCH 10/12] [ONNX] Extended support for provenance tags (#4154)

* Checking if provenance_tags key exists

* Add provenance tag prototype

* Format provenance tag

* Display provenance tag

* Clean debug printing

* Add const to variables

* Separate method for add provenance tags

* Return NodeVector reference

* Return const NodeVector

* Moved add_provenance_tags function to commons

* Style apply

* Simple model for tests

* Provenance tag test

* Expect substring instead of  equal

* Add provenance tags to intermediate nodes recursively

* One tag per node

* Add traverse node args instead of recursion

* Return NodeVector instead of set of pointers

* Use treverse_nodes and lambda function

* Remove unused helper functions

* Remove is_constant() condition

* Update test model prototxt

* Update test substring

* Use node name and output names to build provenance tags in onnx importer

* Unit tests for onnx_importer provenance tags

* Missing <numeric> include

* Add provenance tags to constants buit from ONNX initializers

* Add provenance tags to Constants and Parameters created out of ONNX inputs and initializers

* More strict assertions in onnx provenance tests

* Unit test for onnx importer Parameter nodes tagging

* Helper function for the onnx provenance tests

* Some docs

* Obsolete comment removal

* Separate file for onnx provenance tags unit tests

* Code formatting

* Move the inputs tagging to the Graph class

* Tagging moved to the Graph class entirely

* Missing include and extra helper variable

* Unit tests helper documentation

* Change the UT helper to lowercase

Co-authored-by: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
---
 .../frontend/onnx_import/core/graph.cpp       | 77 ++++++++++++++-
 .../frontend/onnx_import/core/graph.hpp       | 10 ++
 .../frontend/onnx_import/utils/common.cpp     | 18 ----
 .../frontend/onnx_import/utils/common.hpp     |  2 -
 test/CMakeLists.txt                           |  1 +
 .../onnx/provenance_input_tags.prototxt       | 45 +++++++++
 .../provenance_multiple_outputs_op.prototxt   | 77 +++++++++++++++
 .../provenance_node_name_and_outputs.prototxt | 51 ++++++++++
 .../onnx/provenance_only_outputs.prototxt     | 50 ++++++++++
 test/onnx/onnx_import.in.cpp                  | 15 ---
 test/onnx/onnx_import_provenance.in.cpp       | 95 +++++++++++++++++++
 11 files changed, 402 insertions(+), 39 deletions(-)
 create mode 100644 test/models/onnx/provenance_input_tags.prototxt
 create mode 100644 test/models/onnx/provenance_multiple_outputs_op.prototxt
 create mode 100644 test/models/onnx/provenance_node_name_and_outputs.prototxt
 create mode 100644 test/models/onnx/provenance_only_outputs.prototxt
 create mode 100644 test/onnx/onnx_import_provenance.in.cpp

diff --git a/src/ngraph/frontend/onnx_import/core/graph.cpp b/src/ngraph/frontend/onnx_import/core/graph.cpp
index dfa2772b200..f79d781e645 100644
--- a/src/ngraph/frontend/onnx_import/core/graph.cpp
+++ b/src/ngraph/frontend/onnx_import/core/graph.cpp
@@ -15,6 +15,8 @@
 //*****************************************************************************
 
 #include <functional>
+#include <numeric>
+#include <sstream>
 
 #include "graph.hpp"
 #include "node.hpp"
@@ -57,6 +59,37 @@ namespace ngraph
                 std::string domain = get_node_domain(node_proto);
                 return (domain.empty() ? "" : domain + ".") + node_proto.op_type();
             }
+
+            static std::string concat_strings(
+                const std::vector<std::reference_wrapper<const std::string>>& strings)
+            {
+                const auto concat_with_comma =
+                    [](const std::string& accumulator,
+                       std::reference_wrapper<const std::string> next_string) {
+                        return accumulator + ", " + next_string.get();
+                    };
+
+                return std::accumulate(
+                    strings.begin() + 1, strings.end(), strings.begin()->get(), concat_with_comma);
+            }
+
+            static std::string build_input_provenance_tag(const std::string& input_name,
+                                                          const Shape& shape)
+            {
+                std::stringstream tag_builder;
+                tag_builder << "<ONNX Input (" << input_name << ") " << shape << ">";
+                return tag_builder.str();
+            }
+
+            static std::string build_op_provenance_tag(const Node& onnx_node)
+            {
+                const auto output_names = concat_strings(onnx_node.get_output_names());
+                const auto node_name =
+                    onnx_node.get_name().empty() ? "" : onnx_node.get_name() + " ";
+
+                return std::string{"<ONNX " + onnx_node.op_type() + " (" + node_name + "-> " +
+                                   output_names + ")>"};
+            }
         } // namespace detail
 
         Graph::Graph(const onnx::GraphProto& graph_proto, Model& model, const Weights& weights)
@@ -72,7 +105,9 @@ namespace ngraph
                     m_initializers.emplace(initializer_tensor.name(), tensor);
 
                     // For each initializer, create a Constant node and store in cache
-                    m_ng_node_cache.emplace(initializer_tensor.name(), tensor.get_ng_constant());
+                    auto ng_constant = tensor.get_ng_constant();
+                    add_provenance_tag_to_initializer(tensor, ng_constant);
+                    m_ng_node_cache.emplace(initializer_tensor.name(), std::move(ng_constant));
                 }
             }
 
@@ -87,8 +122,10 @@ namespace ngraph
                     continue;
                 }
 
-                m_ng_node_cache[input.name()] =
-                    m_inputs.back().get_ng_node(m_parameters, m_initializers, weights);
+                const auto value_info = m_inputs.back();
+                auto ng_node = value_info.get_ng_node(m_parameters, m_initializers, weights);
+                add_provenance_tag_to_input(value_info, ng_node);
+                m_ng_node_cache[input.name()] = std::move(ng_node);
             }
 
             // Process all graph outputs
@@ -160,11 +197,43 @@ namespace ngraph
         {
             const auto ng_node_factory =
                 m_model->get_operator(onnx_node.op_type(), onnx_node.domain());
+
             const auto ng_node_vector = ng_node_factory(onnx_node);
-            common::add_provenance_tags(onnx_node, ng_node_vector);
+            add_provenance_tags(onnx_node, ng_node_vector);
+
             return ng_node_vector;
         }
 
+        void Graph::add_provenance_tag_to_initializer(
+            const Tensor& tensor, std::shared_ptr<default_opset::Constant> node) const
+        {
+            const std::string tag =
+                detail::build_input_provenance_tag(tensor.get_name(), tensor.get_shape());
+
+            node->add_provenance_tag(tag);
+        }
+
+        void Graph::add_provenance_tag_to_input(const ValueInfo& input,
+                                                std::shared_ptr<ngraph::Node> node) const
+        {
+            const std::string tag =
+                detail::build_input_provenance_tag(input.get_name(), input.get_shape());
+
+            node->add_provenance_tag(tag);
+        }
+
+        void Graph::add_provenance_tags(const Node& onnx_node,
+                                        const NodeVector& ng_node_vector) const
+        {
+            const auto tag = detail::build_op_provenance_tag(onnx_node);
+            const auto ng_inputs = onnx_node.get_ng_inputs();
+
+            ngraph::traverse_nodes(
+                ng_node_vector,
+                [&tag](std::shared_ptr<ngraph::Node> ng_node) { ng_node->add_provenance_tag(tag); },
+                false,
+                ng_inputs);
+        }
     } // namespace onnx_import
 
 } // namespace ngraph
diff --git a/src/ngraph/frontend/onnx_import/core/graph.hpp b/src/ngraph/frontend/onnx_import/core/graph.hpp
index 3fb5cdb0b62..c24924d91eb 100644
--- a/src/ngraph/frontend/onnx_import/core/graph.hpp
+++ b/src/ngraph/frontend/onnx_import/core/graph.hpp
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "default_opset.hpp"
 #include "model.hpp"
 #include "ngraph/op/parameter.hpp"
 #include "operator_set.hpp"
@@ -46,6 +47,15 @@ namespace ngraph
             const std::string& get_name() const { return m_graph_proto->name(); }
             NodeVector make_ng_nodes(const Node& onnx_node) const;
 
+        protected:
+            void add_provenance_tag_to_initializer(
+                const Tensor& initializer, std::shared_ptr<default_opset::Constant> node) const;
+
+            void add_provenance_tag_to_input(const ValueInfo& input,
+                                             std::shared_ptr<ngraph::Node> node) const;
+
+            void add_provenance_tags(const Node& onnx_node, const NodeVector& ng_node_vector) const;
+
         private:
             const onnx::GraphProto* m_graph_proto;
             std::vector<Node> m_nodes;
diff --git a/src/ngraph/frontend/onnx_import/utils/common.cpp b/src/ngraph/frontend/onnx_import/utils/common.cpp
index 261c750e907..a4e5e1915fd 100644
--- a/src/ngraph/frontend/onnx_import/utils/common.cpp
+++ b/src/ngraph/frontend/onnx_import/utils/common.cpp
@@ -28,24 +28,6 @@ namespace ngraph
     {
         namespace common
         {
-            const NodeVector& add_provenance_tags(const Node& onnx_node,
-                                                  const NodeVector& ng_node_vector)
-            {
-                const std::string node_name =
-                    onnx_node.get_name().empty() ? "unnamed node" : onnx_node.get_name();
-                const std::string provenance_tag =
-                    "<ONNX " + onnx_node.op_type() + " (" + node_name + ")>";
-
-                auto ng_inputs = onnx_node.get_ng_inputs();
-                ngraph::traverse_nodes(ng_node_vector,
-                                       [&](std::shared_ptr<ngraph::Node> ng_node) {
-                                           ng_node->add_provenance_tag(provenance_tag);
-                                       },
-                                       false,
-                                       ng_inputs);
-                return ng_node_vector;
-            }
-
             const ngraph::element::Type& get_ngraph_element_type(int64_t onnx_type)
             {
                 switch (onnx_type)
diff --git a/src/ngraph/frontend/onnx_import/utils/common.hpp b/src/ngraph/frontend/onnx_import/utils/common.hpp
index 5fbb871e2da..8189cf1f591 100644
--- a/src/ngraph/frontend/onnx_import/utils/common.hpp
+++ b/src/ngraph/frontend/onnx_import/utils/common.hpp
@@ -38,8 +38,6 @@ namespace ngraph
     {
         namespace common
         {
-            const NodeVector& add_provenance_tags(const Node& onnx_node,
-                                                  const NodeVector& ng_node_vector);
             const ngraph::element::Type& get_ngraph_element_type(std::int64_t onnx_type);
 
             /// \brief      Return a monotonic sequence.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2921766d5ac..7833ea27469 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -498,6 +498,7 @@ if (NGRAPH_ONNX_IMPORT_ENABLE)
     list(APPEND MULTI_TEST_SRC
             onnx/onnx_import.in.cpp
             onnx/onnx_import_convpool.in.cpp
+            onnx/onnx_import_provenance.in.cpp
             onnx/onnx_import_reshape.in.cpp
             onnx/onnx_import_rnn.in.cpp
             onnx/onnx_import_quant.in.cpp)
diff --git a/test/models/onnx/provenance_input_tags.prototxt b/test/models/onnx/provenance_input_tags.prototxt
new file mode 100644
index 00000000000..85ecbebf698
--- /dev/null
+++ b/test/models/onnx/provenance_input_tags.prototxt
@@ -0,0 +1,45 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "initializer_of_A"
+    input: "input_B"
+    output: "output_of_add"
+    op_type: "Add"
+    name: "Add_node"
+  }
+  name: "test_graph"
+  initializer {
+    dims: 0
+    data_type: 7
+    int64_data: 1
+    name: "initializer_of_A"
+  }
+  input {
+    name: "input_B"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output_of_add"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/test/models/onnx/provenance_multiple_outputs_op.prototxt b/test/models/onnx/provenance_multiple_outputs_op.prototxt
new file mode 100644
index 00000000000..0369588e46b
--- /dev/null
+++ b/test/models/onnx/provenance_multiple_outputs_op.prototxt
@@ -0,0 +1,77 @@
+ir_version: 4
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    input: "k"
+    output: "values"
+    output: "indices"
+    op_type: "TopK"
+    name: "TOPK"
+  }
+  name: "test_graph"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "k"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "values"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "indices"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 10
+}
diff --git a/test/models/onnx/provenance_node_name_and_outputs.prototxt b/test/models/onnx/provenance_node_name_and_outputs.prototxt
new file mode 100644
index 00000000000..784a17a4129
--- /dev/null
+++ b/test/models/onnx/provenance_node_name_and_outputs.prototxt
@@ -0,0 +1,51 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "input_A"
+    input: "input_B"
+    output: "output_of_add"
+    op_type: "Add"
+    name: "Add_node"
+  }
+  name: "test_graph"
+  input {
+    name: "input_A"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "input_B"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output_of_add"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/test/models/onnx/provenance_only_outputs.prototxt b/test/models/onnx/provenance_only_outputs.prototxt
new file mode 100644
index 00000000000..b8dc775c67c
--- /dev/null
+++ b/test/models/onnx/provenance_only_outputs.prototxt
@@ -0,0 +1,50 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "input_A"
+    input: "input_B"
+    output: "output_of_add"
+    op_type: "Add"
+  }
+  name: "test_graph"
+  input {
+    name: "input_A"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "input_B"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output_of_add"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/test/onnx/onnx_import.in.cpp b/test/onnx/onnx_import.in.cpp
index 1f41f0c6102..e92cd1719dd 100644
--- a/test/onnx/onnx_import.in.cpp
+++ b/test/onnx/onnx_import.in.cpp
@@ -356,21 +356,6 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, model_initializer_wo_input)
     EXPECT_TRUE(test::all_close_f(expected_output, output.front()));
 }
 
-NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tag_text)
-{
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_tag_add.prototxt"));
-
-    auto ng_nodes = function->get_ordered_ops();
-    for (auto ng_node : ng_nodes)
-    {
-        for (auto tag : ng_node->get_provenance_tags())
-        {
-            EXPECT_HAS_SUBSTRING(tag, "ONNX");
-        }
-    }
-}
-
 // ############################################################################ OPERATOR TESTS
 NGRAPH_TEST(onnx_${BACKEND_NAME}, model_addmul_abc)
 {
diff --git a/test/onnx/onnx_import_provenance.in.cpp b/test/onnx/onnx_import_provenance.in.cpp
new file mode 100644
index 00000000000..718b425d632
--- /dev/null
+++ b/test/onnx/onnx_import_provenance.in.cpp
@@ -0,0 +1,95 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "gtest/gtest.h"
+#include "ngraph/file_util.hpp"
+#include "ngraph/frontend/onnx_import/default_opset.hpp"
+#include "ngraph/frontend/onnx_import/onnx.hpp"
+#include "util/test_control.hpp"
+#include "util/type_prop.hpp"
+
+using namespace ngraph;
+using namespace ngraph::onnx_import;
+
+static std::string s_manifest = "${MANIFEST}";
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tag_text)
+{
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_tag_add.prototxt"));
+
+    const auto ng_nodes = function->get_ordered_ops();
+    for (const auto ng_node : ng_nodes)
+    {
+        for (const auto tag : ng_node->get_provenance_tags())
+        {
+            EXPECT_HAS_SUBSTRING(tag, "ONNX");
+        }
+    }
+}
+
+// the NodeToCheck parameter of this template is used to find a node in the whole subgraph
+// that a particular unit test is supposed to check against the expected provenance tag
+template <typename NodeToCheck>
+void test_provenance_tags(const std::string& model_path, const std::string& expected_provenance_tag)
+{
+    const auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, model_path));
+
+    for (const auto ng_node : function->get_ordered_ops())
+    {
+        if (as_type_ptr<NodeToCheck>(ng_node))
+        {
+            const auto tags = ng_node->get_provenance_tags();
+            ASSERT_EQ(tags.size(), 1) << "There should be exactly one provenance tag set for "
+                                      << ng_node;
+
+            EXPECT_EQ(*(tags.cbegin()), expected_provenance_tag);
+        }
+    }
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_only_output)
+{
+    // the Add node in the model does not have a name,
+    // only its output name should be found in the provenance tags
+    test_provenance_tags<default_opset::Add>("onnx/provenance_only_outputs.prototxt",
+                                             "<ONNX Add (-> output_of_add)>");
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_node_name_and_outputs)
+{
+    test_provenance_tags<default_opset::Add>("onnx/provenance_node_name_and_outputs.prototxt",
+                                             "<ONNX Add (Add_node -> output_of_add)>");
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_multiple_outputs_op)
+{
+    test_provenance_tags<default_opset::TopK>("onnx/provenance_multiple_outputs_op.prototxt",
+                                              "<ONNX TopK (TOPK -> values, indices)>");
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tagging_constants)
+{
+    test_provenance_tags<default_opset::Constant>("onnx/provenance_input_tags.prototxt",
+                                                  "<ONNX Input (initializer_of_A) Shape{0}>");
+}
+
+NGRAPH_TEST(onnx_${BACKEND_NAME}, provenance_tagging_parameters)
+{
+    test_provenance_tags<default_opset::Parameter>("onnx/provenance_input_tags.prototxt",
+                                                   "<ONNX Input (input_B) Shape{0}>");
+}

From cd21bddcbfba2fe539fd1b9cc7011c7dc3df6cdc Mon Sep 17 00:00:00 2001
From: Tomasz Socha <tomasz.socha@intel.com>
Date: Tue, 14 Jan 2020 15:59:00 +0100
Subject: [PATCH 11/12] [ONNX] Remove unnecesary Mean in opset 8 (#4053)

* [ONNX] Remove unnecesary Mean in opset 8

* Use opset1 instead of v1

* Fix merge artifact

* Use opset1 Divide instead of opset0

Co-authored-by: Robert Kimball <robert.kimball@intel.com>
---
 src/ngraph/frontend/onnx_import/op/mean.cpp   | 27 ++-----------------
 src/ngraph/frontend/onnx_import/op/mean.hpp   |  6 -----
 .../frontend/onnx_import/ops_bridge.cpp       |  1 -
 3 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/src/ngraph/frontend/onnx_import/op/mean.cpp b/src/ngraph/frontend/onnx_import/op/mean.cpp
index 45e91b185e9..0f6925e830c 100644
--- a/src/ngraph/frontend/onnx_import/op/mean.cpp
+++ b/src/ngraph/frontend/onnx_import/op/mean.cpp
@@ -16,10 +16,6 @@
 
 #include "mean.hpp"
 #include "default_opset.hpp"
-#include "ngraph/op/add.hpp"
-#include "ngraph/op/constant.hpp"
-#include "ngraph/op/divide.hpp"
-#include "ngraph/opsets/opset0.hpp"
 #include "utils/variadic.hpp"
 
 namespace ngraph
@@ -29,25 +25,6 @@ namespace ngraph
         namespace op
         {
             namespace set_1
-            {
-                NodeVector mean(const Node& node)
-                {
-                    auto sum = variadic::make_ng_variadic_op<ngraph::opset0::Add>(node).front();
-                    auto shape = sum->get_shape();
-
-                    // Create a Constant representing the number of inputs with the same shape as
-                    // sum
-                    auto count = default_opset::Constant::create(
-                        sum->get_element_type(),
-                        shape,
-                        std::vector<int>(shape_size(shape), node.get_ng_inputs().size()));
-
-                    return {sum / count};
-                }
-
-            } // namespace set_1
-
-            namespace set_8
             {
                 NodeVector mean(const Node& node)
                 {
@@ -61,10 +38,10 @@ namespace ngraph
                         shape,
                         std::vector<int>(shape_size(shape), node.get_ng_inputs().size()));
 
-                    return {sum / count};
+                    return {std::make_shared<default_opset::Divide>(sum, count)};
                 }
 
-            } // namespace set_8
+            } // namespace set_1
 
         } // namespace op
 
diff --git a/src/ngraph/frontend/onnx_import/op/mean.hpp b/src/ngraph/frontend/onnx_import/op/mean.hpp
index 300b31f9048..955324e4922 100644
--- a/src/ngraph/frontend/onnx_import/op/mean.hpp
+++ b/src/ngraph/frontend/onnx_import/op/mean.hpp
@@ -31,12 +31,6 @@ namespace ngraph
 
             } // namespace set_1
 
-            namespace set_8
-            {
-                NodeVector mean(const Node& node);
-
-            } // namespace set_1
-
         } // namespace op
 
     } // namespace onnx_import
diff --git a/src/ngraph/frontend/onnx_import/ops_bridge.cpp b/src/ngraph/frontend/onnx_import/ops_bridge.cpp
index 457c558305b..ea8f8fa675c 100644
--- a/src/ngraph/frontend/onnx_import/ops_bridge.cpp
+++ b/src/ngraph/frontend/onnx_import/ops_bridge.cpp
@@ -301,7 +301,6 @@ namespace ngraph
             REGISTER_OPERATOR("Max", 1, max);
             REGISTER_OPERATOR("Max", 8, max);
             REGISTER_OPERATOR("Mean", 1, mean);
-            REGISTER_OPERATOR("Mean", 8, mean);
             REGISTER_OPERATOR("MeanVarianceNormalization", 1, mean_variance_normalization);
             REGISTER_OPERATOR("MeanVarianceNormalization", 9, mean_variance_normalization);
             REGISTER_OPERATOR("Min", 1, min);

From c748bda46af4a812c880764c935324ca179299e5 Mon Sep 17 00:00:00 2001
From: Robert Kimball <robert.kimball@intel.com>
Date: Tue, 14 Jan 2020 10:49:16 -0800
Subject: [PATCH 12/12] Revert "Remove conditional compile (#4106)" (#4178)

This reverts commit 1e58565af8d8650f9c9637a95d07052b2d1e2c8f.
---
 src/contrib/mlir/core/pass/ng_dialect_builder.cpp |  4 ++++
 src/ngraph/builder/make_constant.hpp              |  4 ++++
 src/ngraph/distributed/mlsl.hpp                   |  4 ++++
 src/ngraph/distributed/open_mpi.hpp               |  8 ++++++++
 src/ngraph/op/constant.cpp                        | 12 ++++++++++++
 src/ngraph/op/constant.hpp                        |  4 ++++
 src/ngraph/op/experimental/range.cpp              |  4 ++++
 src/ngraph/op/non_max_suppression.cpp             | 10 +++++++---
 src/ngraph/op/topk.cpp                            | 10 +++++++---
 src/ngraph/pass/constant_folding_convert.cpp      |  8 ++++++++
 src/ngraph/pass/constant_folding_reverse.cpp      |  4 ++++
 src/ngraph/pass/dyn_elimination.cpp               |  4 ++++
 src/ngraph/runtime/cpu/builder/random_uniform.cpp |  4 ++++
 src/ngraph/runtime/gcpu/gcpu_executable.cpp       |  4 ++++
 src/ngraph/runtime/interpreter/int_executable.hpp |  5 ++++-
 src/ngraph/serializer.cpp                         | 10 ++++++++++
 src/tools/nbench/benchmark_utils.cpp              |  4 ++++
 test/backend/distributed.in.cpp                   |  4 ++++
 18 files changed, 100 insertions(+), 7 deletions(-)

diff --git a/src/contrib/mlir/core/pass/ng_dialect_builder.cpp b/src/contrib/mlir/core/pass/ng_dialect_builder.cpp
index 5656d4b4327..8f96935476e 100644
--- a/src/contrib/mlir/core/pass/ng_dialect_builder.cpp
+++ b/src/contrib/mlir/core/pass/ng_dialect_builder.cpp
@@ -232,9 +232,11 @@ mlir::Type NgDialectConversionPass::getMlirType(const descriptor::Tensor* tensor
 // Converts an nGraph element type into an MLIR type.
 mlir::Type NgDialectConversionPass::getMlirType(const element::Type& type)
 {
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
 
     switch (type)
     {
@@ -259,7 +261,9 @@ mlir::Type NgDialectConversionPass::getMlirType(const element::Type& type)
     NGRAPH_CHECK(false, "Unreachable");
     return mlir::Type();
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 }
 
 mlir::Type NgDialectConversionPass::getMlirType(const ngraph::Node* node)
diff --git a/src/ngraph/builder/make_constant.hpp b/src/ngraph/builder/make_constant.hpp
index 2ec5e857954..15a8be8bf56 100644
--- a/src/ngraph/builder/make_constant.hpp
+++ b/src/ngraph/builder/make_constant.hpp
@@ -31,9 +31,11 @@ namespace ngraph
         {
             std::shared_ptr<Node> val = nullptr;
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
             switch (type)
             {
             case element::Type_t::f32:
@@ -97,7 +99,9 @@ namespace ngraph
             case element::Type_t::undefined:
                 throw ngraph_error("make_constant: Unsupported element type 'undefined'");
             }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
             if (shape.size() > 0)
             {
diff --git a/src/ngraph/distributed/mlsl.hpp b/src/ngraph/distributed/mlsl.hpp
index 6de80e9fbaa..34d1becfbbc 100644
--- a/src/ngraph/distributed/mlsl.hpp
+++ b/src/ngraph/distributed/mlsl.hpp
@@ -87,9 +87,11 @@ namespace ngraph
                 }
 
                 decltype(MLSL::RT_SUM) mlsl_reduce_type;
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
                 switch (reduce_type)
                 {
                 case reduction::Type::SUM: mlsl_reduce_type = MLSL::RT_SUM; break;
@@ -99,7 +101,9 @@ namespace ngraph
                 case reduction::Type::MIN: mlsl_reduce_type = MLSL::RT_MIN; break;
                 case reduction::Type::MAX: mlsl_reduce_type = MLSL::RT_MAX; break;
                 }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
                 MLSL::Environment& env = MLSL::Environment::GetEnv();
                 MLSL::Distribution* distribution = env.CreateDistribution(env.GetProcessCount(), 1);
diff --git a/src/ngraph/distributed/open_mpi.hpp b/src/ngraph/distributed/open_mpi.hpp
index 4487283d946..c8aaf7fd28a 100644
--- a/src/ngraph/distributed/open_mpi.hpp
+++ b/src/ngraph/distributed/open_mpi.hpp
@@ -99,9 +99,11 @@ namespace ngraph
                 }
 
                 decltype(MPI_SUM) mpi_reduce_type;
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
                 switch (reduce_type)
                 {
                 case reduction::Type::SUM: mpi_reduce_type = MPI_SUM; break;
@@ -109,7 +111,9 @@ namespace ngraph
                 case reduction::Type::MIN: mpi_reduce_type = MPI_MIN; break;
                 case reduction::Type::MAX: mpi_reduce_type = MPI_MAX; break;
                 }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
                 MPI_Allreduce(in, out, count, data_type, mpi_reduce_type, MPI_COMM_WORLD);
             }
@@ -172,9 +176,11 @@ namespace ngraph
             MPI_Datatype ngraph_type_to_mpi_type(element::Type_t& n_type)
             {
                 MPI_Datatype m_type = MPI_FLOAT;
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
                 switch (n_type)
                 {
                 case element::Type_t::boolean: m_type = MPI_BYTE; break;
@@ -194,7 +200,9 @@ namespace ngraph
                 case element::Type_t::undefined:
                 case element::Type_t::dynamic: throw std::runtime_error("unsupported type");
                 }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
                 return m_type;
             }
 
diff --git a/src/ngraph/op/constant.cpp b/src/ngraph/op/constant.cpp
index c5dfc37ac78..665aa5eea5a 100644
--- a/src/ngraph/op/constant.cpp
+++ b/src/ngraph/op/constant.cpp
@@ -120,9 +120,11 @@ op::Constant::~Constant()
 string op::Constant::convert_value_to_string(size_t index) const
 {
     string rc;
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (get_element_type())
     {
     case element::Type_t::boolean: rc = to_string(get_vector<char>()[index]); break;
@@ -148,7 +150,9 @@ string op::Constant::convert_value_to_string(size_t index) const
     case element::Type_t::undefined: throw runtime_error("unsupported type");
     case element::Type_t::dynamic: throw runtime_error("unsupported type");
     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
     return rc;
 }
 
@@ -156,9 +160,11 @@ vector<string> op::Constant::get_value_strings() const
 {
     vector<string> rc;
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (get_element_type())
     {
     case element::Type_t::boolean:
@@ -243,7 +249,9 @@ vector<string> op::Constant::get_value_strings() const
     case element::Type_t::undefined: throw runtime_error("unsupported type");
     case element::Type_t::dynamic: throw runtime_error("unsupported type");
     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
     return rc;
 }
@@ -349,9 +357,11 @@ static bool test_bitwise_identical(const op::Constant* constant)
 bool op::Constant::are_all_data_elements_bitwise_identical() const
 {
     bool rc = false;
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (get_element_type())
     {
     case element::Type_t::boolean:
@@ -387,7 +397,9 @@ bool op::Constant::are_all_data_elements_bitwise_identical() const
     case element::Type_t::undefined:
     case element::Type_t::dynamic: break;
     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
     return rc;
 }
 
diff --git a/src/ngraph/op/constant.hpp b/src/ngraph/op/constant.hpp
index ea2057460bf..028907a1c0e 100644
--- a/src/ngraph/op/constant.hpp
+++ b/src/ngraph/op/constant.hpp
@@ -318,9 +318,11 @@ namespace ngraph
                     {
                         throw std::runtime_error("Constant initializer does not match shape");
                     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
                     switch (target_type)
                     {
                     case element::Type_t::boolean:
@@ -366,7 +368,9 @@ namespace ngraph
                     case element::Type_t::undefined: throw std::runtime_error("unsupported type");
                     case element::Type_t::dynamic: throw std::runtime_error("unsupported type");
                     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
                 }
 
                 static constexpr size_t host_alignment() { return 64; }
diff --git a/src/ngraph/op/experimental/range.cpp b/src/ngraph/op/experimental/range.cpp
index e431dd2d914..6e9a7d0d4c2 100644
--- a/src/ngraph/op/experimental/range.cpp
+++ b/src/ngraph/op/experimental/range.cpp
@@ -210,9 +210,11 @@ void op::Range::validate_and_infer_types()
 
     PartialShape result_shape;
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (result_et)
     {
     case element::Type_t::bf16: result_shape = infer_output_shape<bfloat16>(this, result_et); break;
@@ -235,7 +237,9 @@ void op::Range::validate_and_infer_types()
             this, false, "Internal nGraph error: unsupported element type: ", result_et);
         break;
     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
     set_output_type(0, result_et, result_shape);
 }
diff --git a/src/ngraph/op/non_max_suppression.cpp b/src/ngraph/op/non_max_suppression.cpp
index 156d248147f..3d6ae79304e 100644
--- a/src/ngraph/op/non_max_suppression.cpp
+++ b/src/ngraph/op/non_max_suppression.cpp
@@ -154,8 +154,10 @@ int64_t op::v1::NonMaxSuppression::max_boxes_output_from_input() const
     const auto max_output_boxes_input =
         as_type_ptr<op::Constant>(input_value(2).get_node_shared_ptr());
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wswitch-enum"
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wswitch-enum"
+#endif
     switch (static_cast<element::Type_t>(max_output_boxes_input->get_element_type()))
     {
     case element::Type_t::i8:
@@ -180,7 +182,9 @@ int64_t op::v1::NonMaxSuppression::max_boxes_output_from_input() const
     }
     default: break;
     }
-#pragma GCC diagnostic pop
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
 
     return max_output_boxes;
 }
diff --git a/src/ngraph/op/topk.cpp b/src/ngraph/op/topk.cpp
index 50889e16678..19d8f858e64 100644
--- a/src/ngraph/op/topk.cpp
+++ b/src/ngraph/op/topk.cpp
@@ -313,8 +313,10 @@ size_t op::v1::TopK::read_k_from_constant_node(const shared_ptr<Node>& node,
 
     size_t k = 0;
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wswitch-enum"
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wswitch-enum"
+#endif
     switch (static_cast<element::Type_t>(k_element_type))
     {
     case element::Type_t::i8: k = validate_and_get_k<int8_t>(k_constant); break;
@@ -322,7 +324,9 @@ size_t op::v1::TopK::read_k_from_constant_node(const shared_ptr<Node>& node,
     case element::Type_t::i64: k = validate_and_get_k<int64_t>(k_constant); break;
     default: break;
     }
-#pragma GCC diagnostic pop
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
 
     return k;
 }
diff --git a/src/ngraph/pass/constant_folding_convert.cpp b/src/ngraph/pass/constant_folding_convert.cpp
index 1e9aaac10db..ee1597c272f 100644
--- a/src/ngraph/pass/constant_folding_convert.cpp
+++ b/src/ngraph/pass/constant_folding_convert.cpp
@@ -45,9 +45,11 @@ template <typename TI>
 shared_ptr<op::Constant> fold_constant_convert_helper0(shared_ptr<op::Constant> constant,
                                                        const element::Type& output_element_type)
 {
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (output_element_type)
     {
     case element::Type_t::undefined:
@@ -88,7 +90,9 @@ shared_ptr<op::Constant> fold_constant_convert_helper0(shared_ptr<op::Constant>
     }
 
     NGRAPH_UNREACHABLE("Unexpected switch case");
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 }
 
 static shared_ptr<op::Constant> fold_constant_convert(shared_ptr<op::Constant> constant,
@@ -101,9 +105,11 @@ static shared_ptr<op::Constant> fold_constant_convert(shared_ptr<op::Constant> c
         return constant;
     }
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (input_element_type)
     {
     case element::Type_t::undefined:
@@ -144,7 +150,9 @@ static shared_ptr<op::Constant> fold_constant_convert(shared_ptr<op::Constant> c
     }
 
     NGRAPH_UNREACHABLE("Unexpected switch case");
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 }
 
 void pass::ConstantFolding::construct_constant_convert()
diff --git a/src/ngraph/pass/constant_folding_reverse.cpp b/src/ngraph/pass/constant_folding_reverse.cpp
index 827a8ea83de..4fa2d4b0011 100644
--- a/src/ngraph/pass/constant_folding_reverse.cpp
+++ b/src/ngraph/pass/constant_folding_reverse.cpp
@@ -40,9 +40,11 @@ static shared_ptr<op::Constant> fold_constant_reverse(shared_ptr<op::Constant> c
 {
     auto& input_element_type = constant->get_output_element_type(0);
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (input_element_type)
     {
     case element::Type_t::undefined:
@@ -80,7 +82,9 @@ static shared_ptr<op::Constant> fold_constant_reverse(shared_ptr<op::Constant> c
 
     NGRAPH_UNREACHABLE("Unexpected switch case");
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 }
 
 void pass::ConstantFolding::construct_constant_reverse()
diff --git a/src/ngraph/pass/dyn_elimination.cpp b/src/ngraph/pass/dyn_elimination.cpp
index 74c48e94338..9e58cc40660 100644
--- a/src/ngraph/pass/dyn_elimination.cpp
+++ b/src/ngraph/pass/dyn_elimination.cpp
@@ -385,9 +385,11 @@ void pass::DynElimination::construct_range()
 
         std::shared_ptr<op::Constant> replacement;
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
         switch (et)
         {
         case element::Type_t::bf16:
@@ -433,7 +435,9 @@ void pass::DynElimination::construct_range()
             NGRAPH_CHECK(false, "Internal nGraph error: unsupported element type: ", et);
             break;
         }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
         replace_node(range_node, replacement);
         return true;
diff --git a/src/ngraph/runtime/cpu/builder/random_uniform.cpp b/src/ngraph/runtime/cpu/builder/random_uniform.cpp
index df84ad18f11..6fc15b3776a 100644
--- a/src/ngraph/runtime/cpu/builder/random_uniform.cpp
+++ b/src/ngraph/runtime/cpu/builder/random_uniform.cpp
@@ -97,9 +97,11 @@ namespace ngraph
                     throw ngraph_error("Unsupported index 2 element type");
                 }
                 auto element_type = args[0].get_element_type();
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
                 switch (element_type)
                 {
                 case element::Type_t::undefined:
@@ -155,7 +157,9 @@ namespace ngraph
                     NGRAPH_UNREACHABLE("Unexpected switch case");
                 }
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
                 functors.emplace_back(functor);
             }
diff --git a/src/ngraph/runtime/gcpu/gcpu_executable.cpp b/src/ngraph/runtime/gcpu/gcpu_executable.cpp
index 191a46b795c..2d3c855a32c 100644
--- a/src/ngraph/runtime/gcpu/gcpu_executable.cpp
+++ b/src/ngraph/runtime/gcpu/gcpu_executable.cpp
@@ -127,8 +127,10 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
 
         // get op type
         element::Type type;
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wswitch-enum"
+#endif
         switch (type_id)
         {
         case ngraph::runtime::interpreter::OP_TYPEID::Convert:
@@ -154,7 +156,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
             break;
         default: type = op->get_output_element_type(0); break;
         }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
         if (m_performance_counters_enabled)
         {
diff --git a/src/ngraph/runtime/interpreter/int_executable.hpp b/src/ngraph/runtime/interpreter/int_executable.hpp
index 6cf3fb01e91..56e5f11b31a 100644
--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
@@ -204,10 +204,11 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable
 // We want to check that every OP_TYPEID enumeration is included in the list.
 // These GCC flags enable compile-time checking so that if an enumeration
 // is not in the list an error is generated.
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
-        // #pragma GCC diagnostic error "-Wcovered-switch-default"
+#endif
         switch (get_typeid(node))
         {
         case OP_TYPEID::Abs:
@@ -1889,7 +1890,9 @@ class ngraph::runtime::interpreter::INTExecutable : public Executable
         case OP_TYPEID::TensorIterator:
         case OP_TYPEID::UnknownOp:
             throw unsupported_op("Unsupported op '" + node.description() + "'");
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
         }
     }
 };
diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp
index 078dc68c410..15fed5f9f47 100644
--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -780,9 +780,12 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
         vector<string> node_outputs = get_value<vector<string>>(node_js, "outputs");
         OutputVectorHelper args(deserialize_output_vector(node_js["inputs"]));
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+// #pragma GCC diagnostic error "-Wimplicit-fallthrough"
+#endif
 
         switch (get_typeid(type_info))
         {
@@ -3012,7 +3015,9 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
             throw runtime_error(ss.str());
         }
         }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
         for (auto& control_dep : control_deps_inputs)
         {
@@ -3152,9 +3157,12 @@ json JSONSerializer::serialize_node(const Node& n)
         node["provenance_tags"] = provenance_tags;
     }
 
+#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+// #pragma GCC diagnostic error "-Wimplicit-fallthrough"
+#endif
     switch (get_typeid(type_info))
     {
     case OP_TYPEID::Abs: { break;
@@ -4661,6 +4669,8 @@ json JSONSerializer::serialize_node(const Node& n)
     case OP_TYPEID::UnknownOp: { break;
     }
     }
+#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
 #pragma GCC diagnostic pop
+#endif
     return node;
 }
diff --git a/src/tools/nbench/benchmark_utils.cpp b/src/tools/nbench/benchmark_utils.cpp
index eabaf5e3899..24d085d4921 100644
--- a/src/tools/nbench/benchmark_utils.cpp
+++ b/src/tools/nbench/benchmark_utils.cpp
@@ -80,9 +80,11 @@ void set_denormals_flush_to_zero()
 void random_init(shared_ptr<runtime::Tensor> tensor)
 {
     element::Type et = tensor->get_element_type();
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
     switch (et)
     {
     case element::Type_t::boolean: init_int_tensor<char>(tensor, 0, 1); break;
@@ -103,7 +105,9 @@ void random_init(shared_ptr<runtime::Tensor> tensor)
     case element::Type_t::f16:
     default: throw runtime_error("unsupported type");
     }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 }
 
 default_random_engine& get_random_engine()
diff --git a/test/backend/distributed.in.cpp b/test/backend/distributed.in.cpp
index 747b41607a5..49a7a7ae06c 100644
--- a/test/backend/distributed.in.cpp
+++ b/test/backend/distributed.in.cpp
@@ -48,9 +48,11 @@ static void test_allreduce_common(reduction::Type reduce_type)
         auto a = backend->create_tensor(element::f32, shape);
         auto result = backend->create_tensor(element::f32, shape);
 
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic error "-Wswitch"
 #pragma GCC diagnostic error "-Wswitch-enum"
+#endif
         switch (reduce_type)
         {
         case reduction::Type::SUM:
@@ -81,7 +83,9 @@ static void test_allreduce_common(reduction::Type reduce_type)
                     v[i] = i + 2;
             }
         }
+#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
 #pragma GCC diagnostic pop
+#endif
 
         auto handle = backend->compile(f);
         handle->call_with_validate({result}, {a});