From 39cf8ff6529b658d325da6049b28109ff72fe7b4 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 9 Nov 2021 19:37:41 +0300 Subject: [PATCH] Add model compression to FP16 weights (#7588) * Add model compression to FP16 weights * Fix build * Fix build * Fix build * Add wrapper over ConvertPrecision * Add documentation to attributes * Fix MO IR Reader * Fix build * Return DisableDecompressionConvertConstantFolding call in CommonOptimizations * Temporarily disable old_api map * Fix TI Convert issue * Apply review feedback * Fix build * Fix build * Fix build --- .../offline_transformations_api.pyx | 4 + .../offline_transformations_api_impl.cpp | 11 +- .../offline_transformations_api_impl.hpp | 2 + .../offline_transformations_api_impl_defs.pxd | 2 + .../cldnn_transformations_pipeline.cpp | 3 + .../compress_float_constants.hpp | 51 +++++++ .../convert_compression_only_to_legacy.hpp | 50 +++++++ .../mark_precision_sensitive_subgraphs.hpp | 29 ++++ ...decompression_convert_constant_folding.hpp | 26 ++++ .../transformations/rt_info/attributes.hpp | 6 +- .../transformations/rt_info/decompression.hpp | 42 ++++++ .../rt_info/disable_fp16_compression.hpp | 34 +++++ .../include/transformations/utils/utils.hpp | 13 ++ .../common_optimizations.cpp | 6 + .../compress_float_constants.cpp | 125 ++++++++++++++++ .../common_optimizations/conv_bias_fusion.cpp | 4 +- .../convert_compression_only_to_legacy.cpp | 52 +++++++ .../mark_precision_sensitive_subgraphs.cpp | 73 +++++++++ ...decompression_convert_constant_folding.cpp | 27 ++++ .../transformations/rt_info/attributes.cpp | 2 + .../transformations/rt_info/decompression.cpp | 20 +++ .../rt_info/disable_fp16_compression.cpp | 20 +++ .../rt_info_serialization.cpp | 6 + .../compress_float_constants_test.cpp | 138 ++++++++++++++++++ ...onvert_compression_only_to_legacy_test.cpp | 106 ++++++++++++++ .../mo/back/offline_transformations.py | 12 +- model-optimizer/mo/main.py | 15 +- model-optimizer/mo/moc_frontend/serialize.py | 4 + model-optimizer/mo/pipeline/common.py | 13 +- .../mo/utils/ir_reader/restore_graph.py | 2 +- ngraph/core/include/openvino/core/variant.hpp | 8 + .../op/util/precision_sensitive_attribute.hpp | 35 +++++ ngraph/core/src/op/batch_to_space.cpp | 4 + ngraph/core/src/op/convolution.cpp | 2 + ngraph/core/src/op/group_conv.cpp | 2 + ngraph/core/src/op/interpolate.cpp | 4 + ngraph/core/src/op/one_hot.cpp | 2 + ngraph/core/src/op/pad.cpp | 5 + ngraph/core/src/op/reshape.cpp | 2 + ngraph/core/src/op/space_to_batch.cpp | 4 + ngraph/core/src/op/strided_slice.cpp | 4 + ngraph/core/src/op/tile.cpp | 2 + ngraph/core/src/op/topk.cpp | 3 + ngraph/core/src/op/util/broadcast_base.cpp | 9 +- .../op/util/precision_sensitive_attribute.cpp | 20 +++ 45 files changed, 986 insertions(+), 18 deletions(-) create mode 100644 inference-engine/src/transformations/include/transformations/common_optimizations/compress_float_constants.hpp create mode 100644 inference-engine/src/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp create mode 100644 inference-engine/src/transformations/include/transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp create mode 100644 inference-engine/src/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp create mode 100644 inference-engine/src/transformations/include/transformations/rt_info/decompression.hpp create mode 100644 inference-engine/src/transformations/include/transformations/rt_info/disable_fp16_compression.hpp create mode 100644 inference-engine/src/transformations/src/transformations/common_optimizations/compress_float_constants.cpp create mode 100644 inference-engine/src/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp create mode 100644 inference-engine/src/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp create mode 100644 inference-engine/src/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp create mode 100644 inference-engine/src/transformations/src/transformations/rt_info/decompression.cpp create mode 100644 inference-engine/src/transformations/src/transformations/rt_info/disable_fp16_compression.cpp create mode 100644 inference-engine/tests/functional/inference_engine/transformations/compress_float_constants_test.cpp create mode 100644 inference-engine/tests/functional/inference_engine/transformations/convert_compression_only_to_legacy_test.cpp create mode 100644 ngraph/core/include/openvino/op/util/precision_sensitive_attribute.hpp create mode 100644 ngraph/core/src/op/util/precision_sensitive_attribute.cpp diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx index 2ca15561620a4d..4b42bee7805ef2 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx @@ -31,6 +31,10 @@ def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer C.ApplyLowLatencyTransformation(network.impl, use_const_initializer) +def CompressModelTransformation(IENetwork network): + C.CompressModelTransformation(network.impl) + + def ApplyPruningTransformation(IENetwork network): C.ApplyPruningTransformation(network.impl) diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp index bae64e826a7843..9a7d652866643b 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -56,6 +58,13 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork manager.run_passes(network.actual->getFunction()); } +void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) { + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(network.actual->getFunction()); +} + void InferenceEnginePython::Serialize(InferenceEnginePython::IENetwork network, std::string path_to_xml, std::string path_to_bin) { @@ -80,4 +89,4 @@ void InferenceEnginePython::CheckAPI() { auto reshape = f->get_result()->input_value(0).get_node_shared_ptr(); assert(std::dynamic_pointer_cast(reshape->input_value(0).get_node_shared_ptr())); assert(std::dynamic_pointer_cast(reshape->input_value(1).get_node_shared_ptr())); -} \ No newline at end of file +} diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp index c135919a91f638..5de87dc29991fe 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp @@ -25,6 +25,8 @@ void ApplyPruningTransformation(InferenceEnginePython::IENetwork network); void GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names); +void CompressModelTransformation(InferenceEnginePython::IENetwork network); + void Serialize(InferenceEnginePython::IENetwork network, std::string path_to_xml, std::string path_to_bin); void CheckAPI(); diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd index 82f6133df887e9..41755a0b2e06af 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd @@ -17,6 +17,8 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi cdef void ApplyMakeStatefulTransformation(IENetwork network, map[string, string]& in_out_names) cdef void ApplyPruningTransformation(IENetwork network) + + cdef void CompressModelTransformation(IENetwork network) cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names) diff --git a/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp index c979d19f2626ab..d1c0eb38a01a16 100644 --- a/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp @@ -34,6 +34,7 @@ #include #include #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" #include #include #include @@ -163,6 +164,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto pass_config = manager.get_pass_config(); + pass_config->enable(); + // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 pass_config->set_callback( diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/compress_float_constants.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/compress_float_constants.hpp new file mode 100644 index 00000000000000..a0a4ef18580e7d --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/compress_float_constants.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transformations_visibility.hpp" +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API CompressFloatConstantsImpl; +class TRANSFORMATIONS_API AddOldApiMapToParameters; +class TRANSFORMATIONS_API CompressFloatConstants; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief CompressFloatConstantsImpl transformation replaces FP32/FP64 Constants with FP16 ones. + */ +class ov::pass::CompressFloatConstantsImpl : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("CompressFloatConstantsImpl", "0"); + CompressFloatConstantsImpl(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief AddOldApiMapToParameters transformation adds OldApiMap to each float input to the model. + */ +class ov::pass::AddOldApiMapToParameters : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("AddOldApiMapToParameters", "0"); + AddOldApiMapToParameters(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief CompressFloatConstants transformation replaces FP32/FP64 Constants with FP16 ones. + */ +class ov::pass::CompressFloatConstants : public ov::pass::GraphRewrite { +public: + OPENVINO_RTTI("CompressFloatConstants", "0"); + CompressFloatConstants() { + add_matcher(); + add_matcher(); + } +}; diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp new file mode 100644 index 00000000000000..40a297f65913b8 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transformations_visibility.hpp" +#include "openvino/pass/pass.hpp" +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertPrecisionCompressedOnly; +class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding; +class TRANSFORMATIONS_API ConvertCompressedOnlyToLegacy; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief ConvertPrecisionCompressedOnly transformation runs ConvertPrecision transformation for CompressedOnly format. + */ + +class ov::pass::ConvertPrecisionCompressedOnly : public ov::pass::FunctionPass { +public: + OPENVINO_RTTI("ConvertPrecisionCompressedOnly", "0"); + bool run_on_function(std::shared_ptr f) override; +}; + +/** + * @ingroup ie_transformation_common_api + * @brief Enables ConstantFolding for Convert operation in compressed function. + */ +class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); + EnableDecompressionConvertConstantFolding(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief ConvertCompressedOnlyToLegacy transformation converts compression only FP16 format to legacy FP16 format. + */ +class ov::pass::ConvertCompressedOnlyToLegacy : public ov::pass::FunctionPass { +public: + OPENVINO_RTTI("ConvertCompressedOnlyToLegacy", "0"); + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp new file mode 100644 index 00000000000000..ad72e23e1df091 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transformations_visibility.hpp" +#include "openvino/pass/pass.hpp" + + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API MarkPrecisionSensitiveSubgraphs; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief MarkPrecisionSensitiveSubgraphs transformation marks the constants + * inside the subgraph starting from precision-sensitive input and ending at + * the ShapeOf node as disabled for FP16 compression. + */ +class ov::pass::MarkPrecisionSensitiveSubgraphs : public FunctionPass { +public: + OPENVINO_RTTI("MarkPrecisionSensitiveSubgraphs", "0"); + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp b/inference-engine/src/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp new file mode 100644 index 00000000000000..ea9935e10029d0 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transformations_visibility.hpp" +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief Disables ConstantFolding for Convert operation in compressed function. + */ +class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("DisableDecompressionConvertConstantFolding", "0"); + DisableDecompressionConvertConstantFolding(); +}; diff --git a/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp b/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp index f8bd5493a6f88d..92a5cbe7da685a 100644 --- a/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp +++ b/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp @@ -12,11 +12,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -37,5 +39,5 @@ class TRANSFORMATIONS_API Attributes { ngraph::FactoryRegistry m_factory_registry; }; -} // namespace pass -} // namespace ov +} // namespace pass +} // namespace ov diff --git a/inference-engine/src/transformations/include/transformations/rt_info/decompression.hpp b/inference-engine/src/transformations/include/transformations/rt_info/decompression.hpp new file mode 100644 index 00000000000000..18965d1aee26b0 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/rt_info/decompression.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + +#include "openvino/core/node.hpp" +#include "openvino/core/variant.hpp" +#include "transformations_visibility.hpp" + + +namespace ov { + +TRANSFORMATIONS_API void mark_as_decompression(const std::shared_ptr& node); + +TRANSFORMATIONS_API void unmark_as_decompression(const std::shared_ptr& node); + +TRANSFORMATIONS_API bool is_decompression(const std::shared_ptr& node); + +/** + * @ingroup ie_runtime_attr_api + * @brief Decompression class represents runtime info attribute that marks operation + * as used as decompression for Compressed Only format. + */ +class TRANSFORMATIONS_API Decompression : public VariantImpl { +public: + OPENVINO_RTTI("decompression", "0"); + + Decompression() = default; + + bool visit_attributes(AttributeVisitor& visitor) override { return true; } + + bool is_copyable() const override { return false; } +}; + +} // namespace ov diff --git a/inference-engine/src/transformations/include/transformations/rt_info/disable_fp16_compression.hpp b/inference-engine/src/transformations/include/transformations/rt_info/disable_fp16_compression.hpp new file mode 100644 index 00000000000000..00352928ba2f07 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/rt_info/disable_fp16_compression.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/core/variant.hpp" +#include "transformations_visibility.hpp" + + +namespace ov { + +TRANSFORMATIONS_API void disable_fp16_compression(const std::shared_ptr& node); + +TRANSFORMATIONS_API void enable_fp16_compression(const std::shared_ptr& node); + +TRANSFORMATIONS_API bool fp16_compression_is_disabled(const std::shared_ptr& node); + +/** + * @ingroup ie_runtime_attr_api + * @brief DisableFP16Compression class represents runtime info attribute that marks operation + * as prohibitted to convert to FP16 as part of Compressed Only format. + */ +class TRANSFORMATIONS_API DisableFP16Compression : public VariantImpl { +public: + OPENVINO_RTTI("disable_fp16_compression", "0"); + + DisableFP16Compression() = default; + + bool is_copyable() const override { return false; } +}; + +} // namespace ov diff --git a/inference-engine/src/transformations/include/transformations/utils/utils.hpp b/inference-engine/src/transformations/include/transformations/utils/utils.hpp index b4e4a8e6142505..d6342c28fe138f 100644 --- a/inference-engine/src/transformations/include/transformations/utils/utils.hpp +++ b/inference-engine/src/transformations/include/transformations/utils/utils.hpp @@ -15,10 +15,12 @@ #include #include #include +#include #include #include #include +#include namespace ngraph { namespace op { @@ -49,6 +51,17 @@ bool has_op_with_type(const std::shared_ptr &function) { } return false; } + +inline bool has_decompression_converts(const std::shared_ptr& function) { + for (const auto& op : function->get_ops()) { + if (std::dynamic_pointer_cast(op)) { + if (ov::is_decompression(op)) + return true; + } + } + return false; +} + inline std::string create_ie_output_name(const ngraph::Output& output) { const auto& prev_layer = output.get_node_shared_ptr(); std::string out_name = prev_layer->get_friendly_name(); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index d55ab3ca5ceeb9..7e22faf8b1fd49 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -49,6 +49,7 @@ #include "transformations/common_optimizations/strides_optimization.hpp" #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" #include "transformations/common_optimizations/mul_conv_fusion.hpp" +#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" #include "transformations/op_conversions/convert_pad_to_group_conv.hpp" #include "transformations/op_conversions/convert_divide.hpp" @@ -78,6 +79,7 @@ #include "transformations/op_conversions/gather_normalize_negative_indices.hpp" #include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" #include "transformations/op_conversions/convert_maxpool_downgrade.hpp" +#include "transformations/disable_decompression_convert_constant_folding.hpp" #include #include @@ -95,10 +97,14 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr(); + // Disable low_precision_enabled as all plugins handle low-precision sub-graph manually // before CommonOptimization pipeline execution manager.register_pass(true, false); + manager.register_pass(); + // TODO: move to KMB manager.register_pass(); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/compress_float_constants.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/compress_float_constants.cpp new file mode 100644 index 00000000000000..8133c108a6f85c --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/compress_float_constants.cpp @@ -0,0 +1,125 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/compress_float_constants.hpp" + +#include "openvino/opsets/opset8.hpp" +#include "ngraph/rt_info.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "transformations/rt_info/disable_fp16_compression.hpp" +#include "transformations/rt_info/old_api_map_attribute.hpp" +#include "itt.hpp" + + +namespace { +template +std::shared_ptr change_constant_precision_to_fp16(std::shared_ptr& constant) { + using src_type = typename ov::element_type_traits::value_type; + + const auto* src_data = constant->get_data_ptr(); + const auto size = ov::shape_size(constant->get_shape()); + + auto new_constant = std::make_shared(ov::element::f16, constant->get_shape()); + auto* dst_data = const_cast(reinterpret_cast(new_constant->get_data_ptr())); + if (dst_data == nullptr) + return nullptr; + + bool is_overflow = false; + for (size_t i = 0; i < size; ++i) { + if (src_data[i] > std::numeric_limits::max()) { + dst_data[i] = std::numeric_limits::max(); + is_overflow = true; + } else if (src_data[i] < std::numeric_limits::lowest()) { + dst_data[i] = std::numeric_limits::lowest(); + is_overflow = true; + } else { + dst_data[i] = static_cast(src_data[i]); + } + } + if (is_overflow) { + std::cerr << "Warning: One or more of the values of the Constant can't fit in the float16 data type." + " Those values were casted to the nearest limit value, the model can produce incorrect results." << std::endl; + } + return new_constant; +} +} // namespace + +ov::pass::CompressFloatConstantsImpl::CompressFloatConstantsImpl() { + MATCHER_SCOPE(CompressFloatConstantsImpl); + auto const_pattern = pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto& const_node_pattern = pattern_map.at(const_pattern); + + auto const_node = std::dynamic_pointer_cast( + const_node_pattern.get_node_shared_ptr()); + if (!const_node) + return false; + + if (ov::fp16_compression_is_disabled(const_node)) + return false; + + auto c_type = const_node->get_element_type(); + std::shared_ptr new_const; + if (c_type == ov::element::f32) { + new_const = change_constant_precision_to_fp16(const_node); + } else if (c_type == ov::element::f64) { + new_const = change_constant_precision_to_fp16(const_node); + } else { + return false; + } + auto convert = std::make_shared(new_const, const_node->get_element_type()); + + convert->set_friendly_name(const_node->get_friendly_name()); + ngraph::copy_runtime_info(const_node, convert); + ov::mark_as_decompression(convert); + + ov::replace_node(const_node, convert); + + return true; + }; + + auto m = std::make_shared(const_pattern, matcher_name); + this->register_matcher(m, callback); +} + +ov::pass::AddOldApiMapToParameters::AddOldApiMapToParameters() { + MATCHER_SCOPE(AddOldApiMapToParameters); + auto param_pattern = pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto node = pattern_map.at(param_pattern).get_node_shared_ptr(); + + auto param_node = std::dynamic_pointer_cast(node); + if (!param_node) + return false; + auto p_type = param_node->get_element_type(); + if (p_type == ov::element::f32 || p_type == ov::element::f64) { + std::vector order; + if (ov::has_old_api_map(node)) { + auto old_api = ov::get_old_api_map(node).get(); + order = old_api.get_order(); + } else { + auto p_rank = param_node->get_partial_shape().rank(); + if (p_rank.is_static()) { + auto r = p_rank.get_length(); + order.resize(r); + std::iota(order.begin(), order.end(), 0); + } else { + return false; + } + } + ov::set_old_api_map(node, ov::OldApiMap(ov::OldApiMapAttr(order, ov::element::Type_t::f16))); + } else { + return false; + } + return true; + }; + + auto m = std::make_shared(param_pattern, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp index 4dddec4adab9fb..2cec36d7bf5e1f 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/conv_bias_fusion.cpp @@ -99,7 +99,7 @@ bool conv_callback(ngraph::pattern::Matcher &m) { auto expected_shape = Shape(output_rank, 1); expected_shape[1] = channel_dim; - if (op::util::check_for_broadcast(expected_shape, const_shape)) { + if (ngraph::op::util::check_for_broadcast(expected_shape, const_shape)) { return false; } @@ -107,7 +107,7 @@ bool conv_callback(ngraph::pattern::Matcher &m) { // the number of weights dimensions. Output final_const = m_const; if (is_scalar_multiplier) { - final_const = op::util::broadcastTo(m_const, expected_shape); + final_const = ngraph::op::util::broadcastTo(m_const, expected_shape); } if (final_const.get_shape().size() > 1) { diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp new file mode 100644 index 00000000000000..4725e64e5ff81b --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" + +#include "transformations/convert_precision.hpp" +#include "transformations/utils/utils.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/manager.hpp" +#include "itt.hpp" + +using namespace ov; + +bool ov::pass::ConvertPrecisionCompressedOnly::run_on_function(std::shared_ptr f) { + if (ngraph::op::util::has_decompression_converts(f)) { + const precisions_array convert_precision_list{ + {ov::element::f32, ov::element::f16} + }; + auto convert_precision = ngraph::pass::ConvertPrecision(convert_precision_list); + return convert_precision.run_on_function(f); + } + return false; +} + +ov::pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() { + MATCHER_SCOPE(EnableDecompressionConvertConstantFolding); + auto convert = pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!ov::is_decompression(node)) + return false; + enable_constant_folding(node); + return true; + }; + + auto m = std::make_shared(convert, matcher_name); + this->register_matcher(m, callback); +} + +bool ov::pass::ConvertCompressedOnlyToLegacy::run_on_function(std::shared_ptr f) { + Manager manager(get_pass_config()); + + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + + return false; +} diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp new file mode 100644 index 00000000000000..c31181edfcab3d --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp" + +#include +#include + +#include "transformations/utils/utils.hpp" +#include "transformations/rt_info/disable_fp16_compression.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset3.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" + +using namespace std; + +namespace { +void visit_shape_path(const shared_ptr& node, unordered_set>& visited) { + if (!node) + return; + visited.insert(node); + deque> nodes{ node }; + while (!nodes.empty()) { + auto curr_node = nodes.front(); + nodes.pop_front(); + // Do not check if already visited + if (ov::is_type(curr_node) || ov::is_type(curr_node)) { + continue; + } + visited.insert(curr_node); + if (ov::is_type(curr_node)) { + ov::disable_fp16_compression(curr_node); + } else { + for (auto& input_value : curr_node->input_values()) { + // continue searching + const auto& input_node = input_value.get_node_shared_ptr(); + nodes.push_front(input_node); + } + } + } +} +} // namespace + +bool ov::pass::MarkPrecisionSensitiveSubgraphs::run_on_function(std::shared_ptr f) { + deque> nodes; + unordered_set> visited; + for (auto& r : f->get_results()) + nodes.push_back(r); + for (auto& r : f->get_sinks()) + nodes.emplace_back(r); + + while (!nodes.empty()) { + auto curr_node = nodes.front(); + nodes.pop_front(); + if (visited.count(curr_node)) + continue; + for (auto& input : curr_node->inputs()) { + if (ov::is_precision_sensitive(input)) + visit_shape_path(input.get_source_output().get_node_shared_ptr(), visited); + } + visited.insert(curr_node); + + for (auto& input_value : curr_node->input_values()) { + // continue searching + const auto& input_node = input_value.get_node_shared_ptr(); + nodes.push_front(input_node); + } + } + return true; +} diff --git a/inference-engine/src/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp b/inference-engine/src/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp new file mode 100644 index 00000000000000..3b6af212327bf6 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/disable_decompression_convert_constant_folding.hpp" + +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "itt.hpp" + +ov::pass::DisableDecompressionConvertConstantFolding::DisableDecompressionConvertConstantFolding() { + MATCHER_SCOPE(DisableDecompressionConvertConstantFolding); + auto convert = pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!ov::is_decompression(node)) + return false; + disable_constant_folding(node); + return true; + }; + + auto m = std::make_shared(convert, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/rt_info/attributes.cpp b/inference-engine/src/transformations/src/transformations/rt_info/attributes.cpp index 26c42ab0fbceb5..910788d6c43ef9 100644 --- a/inference-engine/src/transformations/src/transformations/rt_info/attributes.cpp +++ b/inference-engine/src/transformations/src/transformations/rt_info/attributes.cpp @@ -8,10 +8,12 @@ ov::pass::Attributes::Attributes() { register_factory>(); register_factory(); register_factory(); + register_factory(); register_factory(); register_factory(); register_factory(); register_factory(); + register_factory(); } ov::Variant* ov::pass::Attributes::create_by_type_info(const ov::DiscreteTypeInfo& type_info) { diff --git a/inference-engine/src/transformations/src/transformations/rt_info/decompression.cpp b/inference-engine/src/transformations/src/transformations/rt_info/decompression.cpp new file mode 100644 index 00000000000000..01b45f2bfaba67 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/rt_info/decompression.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/rt_info/decompression.hpp" + +void ov::mark_as_decompression(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info[Decompression::get_type_info_static()] = std::make_shared(); +} + +void ov::unmark_as_decompression(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info.erase(Decompression::get_type_info_static()); +} + +bool ov::is_decompression(const std::shared_ptr& node) { + const auto& rt_info = node->get_rt_info(); + return rt_info.count(Decompression::get_type_info_static()); +} diff --git a/inference-engine/src/transformations/src/transformations/rt_info/disable_fp16_compression.cpp b/inference-engine/src/transformations/src/transformations/rt_info/disable_fp16_compression.cpp new file mode 100644 index 00000000000000..79513ddf2b7158 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/rt_info/disable_fp16_compression.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/rt_info/disable_fp16_compression.hpp" + +void ov::disable_fp16_compression(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info[DisableFP16Compression::get_type_info_static()] = std::make_shared(); +} + +void ov::enable_fp16_compression(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info.erase(DisableFP16Compression::get_type_info_static()); +} + +bool ov::fp16_compression_is_disabled(const std::shared_ptr& node) { + const auto& rt_info = node->get_rt_info(); + return rt_info.count(DisableFP16Compression::get_type_info_static()); +} diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp index a851b27026f2e0..3f030aa1b79ad1 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp @@ -57,6 +57,7 @@ TEST_F(RTInfoSerializationTest, all_attributes_latest) { std::make_shared("priority"); info[ov::OldApiMap::get_type_info_static()] = std::make_shared( ov::OldApiMapAttr(std::vector{0, 2, 3, 1}, ngraph::element::Type_t::f32)); + info[ov::Decompression::get_type_info_static()] = std::make_shared(); }; std::shared_ptr function; @@ -100,6 +101,11 @@ TEST_F(RTInfoSerializationTest, all_attributes_latest) { auto old_api_map_attr_val = old_api_map_attr->get(); ASSERT_EQ(old_api_map_attr_val.get_order(), std::vector({0, 2, 3, 1})); ASSERT_EQ(old_api_map_attr_val.get_type(), ngraph::element::Type_t::f32); + + const std::string& dkey = ov::Decompression::get_type_info_static(); + ASSERT_TRUE(info.count(dkey)); + auto decompression_attr = std::dynamic_pointer_cast(info.at(dkey)); + ASSERT_TRUE(decompression_attr); }; auto add = f->get_results()[0]->get_input_node_ptr(0); diff --git a/inference-engine/tests/functional/inference_engine/transformations/compress_float_constants_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/compress_float_constants_test.cpp new file mode 100644 index 00000000000000..1c3b6bfe4396cd --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/compress_float_constants_test.cpp @@ -0,0 +1,138 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include "openvino/core/function.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/compress_float_constants.hpp" +#include "transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/utils/utils.hpp" + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; + +TEST(TransformationTests, CompressConstants_f32) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f32, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto conv = std::make_shared(input, + const_weights, + ov::Strides{1, 1}, + ov::CoordinateDiff{0, 0}, + ov::CoordinateDiff{0, 0}, + ov::Strides{1, 1}); + auto const_scales = ov::opset8::Constant::create(ov::element::f32, ov::Shape{ 1 }, { 1.4 }); + + auto shape = std::make_shared(conv); + auto convert1 = std::make_shared(shape, ov::element::f32); + auto mul = std::make_shared(convert1, const_scales); + auto convert2 = std::make_shared(mul, ov::element::i32); + + auto default_scales_node = ov::opset8::Constant::create(ov::element::f32, ov::Shape{ 4 }, { 1., 1., 1.4, 1.4 }); + auto axes_node = ov::opset8::Constant::create(ov::element::i64, ov::Shape{ 4 }, { 0, 1, 2, 3 }); + + auto interpolate4_attr = ov::opset8::Interpolate::InterpolateAttrs(ov::opset8::Interpolate::InterpolateMode::NEAREST, + ov::opset8::Interpolate::ShapeCalcMode::SIZES, std::vector{0, 0, 0, 0}, std::vector{0, 0, 0, 0}, + ov::opset8::Interpolate::CoordinateTransformMode::ASYMMETRIC, ov::opset8::Interpolate::NearestMode::SIMPLE, + false, -0.75); + + auto resize = std::make_shared(conv, convert2, default_scales_node, axes_node, interpolate4_attr); + + f = std::make_shared(ov::NodeVector{ resize }, ov::ParameterVector{ input }); + + ov::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f16, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto convert_ins1 = std::make_shared(const_weights, ov::element::f32); + auto conv = std::make_shared(input, + convert_ins1, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + auto const_scales = ov::opset8::Constant::create(ov::element::f32, ov::Shape{ 1 }, { 1.4 }); + + auto shape = std::make_shared(conv); + auto convert1 = std::make_shared(shape, ov::element::f32); + auto mul = std::make_shared(convert1, const_scales); + auto convert2 = std::make_shared(mul, ov::element::i32); + + auto default_scales_node = ov::opset8::Constant::create(ov::element::f32, ov::Shape{ 4 }, { 1., 1., 1.4, 1.4 }); + auto axes_node = ov::opset8::Constant::create(ov::element::i64, ov::Shape{ 4 }, { 0, 1, 2, 3 }); + + auto interpolate4_attr = ov::opset8::Interpolate::InterpolateAttrs(ov::opset8::Interpolate::InterpolateMode::NEAREST, + ov::opset8::Interpolate::ShapeCalcMode::SIZES, std::vector{0, 0, 0, 0}, std::vector{0, 0, 0, 0}, + ov::opset8::Interpolate::CoordinateTransformMode::ASYMMETRIC, ov::opset8::Interpolate::NearestMode::SIMPLE, + false, -0.75); + + auto resize = std::make_shared(conv, convert2, default_scales_node, axes_node, interpolate4_attr); + + f_ref = std::make_shared(ov::NodeVector{ resize }, ov::ParameterVector{ input }); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, CompressConstants_f64) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto input = std::make_shared(ov::element::f64, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f64, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto conv = std::make_shared(input, + const_weights, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + f = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + + ov::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input = std::make_shared(ov::element::f64, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f16, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto convert_ins1 = std::make_shared(const_weights, ov::element::f64); + auto conv = std::make_shared(input, + convert_ins1, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + f_ref = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_compression_only_to_legacy_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_compression_only_to_legacy_test.cpp new file mode 100644 index 00000000000000..1ce8582ff95a3c --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_compression_only_to_legacy_test.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include "openvino/core/function.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/utils/utils.hpp" + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; + +TEST(TransformationTests, ConvertCompressionOnlyToLegacy) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f16, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto convert_ins1 = std::make_shared(const_weights, ov::element::f32); + ov::mark_as_decompression(convert_ins1); + auto conv = std::make_shared(input, + convert_ins1, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + + f = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + + ov::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input = std::make_shared(ov::element::f16, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f16, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto conv = std::make_shared(input, + const_weights, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + + f_ref = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, ConvertCompressionOnlyToLegacyNoConvertion) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f32, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto conv = std::make_shared(input, + const_weights, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + + f = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + + ov::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); + auto const_weights = ov::opset8::Constant::create(ov::element::f32, + ov::Shape{ 1, 3, 3, 3 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto conv = std::make_shared(input, + const_weights, + ov::Strides{ 1, 1 }, + ov::CoordinateDiff{ 0, 0 }, + ov::CoordinateDiff{ 0, 0 }, + ov::Strides{ 1, 1 }); + + f_ref = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py index 17b997669523a2..79f9d847323fbe 100644 --- a/model-optimizer/mo/back/offline_transformations.py +++ b/model-optimizer/mo/back/offline_transformations.py @@ -33,8 +33,11 @@ def apply_moc_transformations(net: object): from openvino.offline_transformations import ApplyMOCTransformations # pylint: disable=import-error,no-name-in-module ApplyMOCTransformations(net, False) +def compress_model(net:object): + from openvino.offline_transformations import CompressModelTransformation # pylint: disable=import-error,no-name-in-module + CompressModelTransformation(net) -def apply_offline_transformations(input_model: str, framework: str, transforms: list): +def apply_offline_transformations(input_model: str, framework: str, transforms: list, compress_fp16=False): # This variable is only needed by GenerateMappingFile transformation # to produce correct mapping extract_names = framework in ['tf', 'mxnet', 'kaldi'] @@ -58,6 +61,10 @@ def read_network(path_to_xml): apply_user_transformations(net, transforms) apply_moc_transformations(net) + + if compress_fp16: + compress_model(net) + Serialize(net, str(input_model + ".xml").encode('utf-8'), (input_model + ".bin").encode('utf-8')) path_to_mapping = input_model + ".mapping" GenerateMappingFile(net, path_to_mapping.encode('utf-8'), extract_names) @@ -68,6 +75,7 @@ def read_network(path_to_xml): parser.add_argument("--input_model") parser.add_argument("--framework") parser.add_argument("--transform") + parser.add_argument("--compress_fp16", action='store_true') args = parser.parse_args() - apply_offline_transformations(args.input_model, args.framework, parse_transform(args.transform)) + apply_offline_transformations(args.input_model, args.framework, parse_transform(args.transform), args.compress_fp16) diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py index c939d9523ed919..4e79ba84d289af 100644 --- a/model-optimizer/mo/main.py +++ b/model-optimizer/mo/main.py @@ -205,6 +205,12 @@ def raise_ie_not_found(): except Exception as e: raise_ie_not_found() + if 'data_type' in argv and argv.data_type in ['FP16', 'half']: + argv.data_type = 'FP32' + argv.compress_fp16 = True + else: + argv.compress_fp16 = False + # This is just to check that transform key is valid and transformations are available check_available_transforms(parse_transform(argv.transform)) @@ -355,10 +361,15 @@ def emit_ir(graph: Graph, argv: argparse.Namespace): if not argv.legacy_ir_generation: path_to_offline_transformations = os.path.join(os.path.realpath(os.path.dirname(__file__)), 'back', 'offline_transformations.py') - status = subprocess.run([sys.executable, path_to_offline_transformations, + cmd = [sys.executable, path_to_offline_transformations, "--input_model", orig_model_name, "--framework", argv.framework, - "--transform", argv.transform], env=os.environ) + "--transform", argv.transform] + if argv.compress_fp16: + cmd += ["--compress_fp16"] + # restore data_type cmd parameter + argv.data_type = 'FP16' + status = subprocess.run(cmd, env=os.environ) return_code = status.returncode except Exception as e: return_code = "failed" diff --git a/model-optimizer/mo/moc_frontend/serialize.py b/model-optimizer/mo/moc_frontend/serialize.py index abe150742e1d62..2de7c800d0225d 100644 --- a/model-optimizer/mo/moc_frontend/serialize.py +++ b/model-optimizer/mo/moc_frontend/serialize.py @@ -19,6 +19,10 @@ def moc_emit_ir(ngraph_function: Function, argv: argparse.Namespace): apply_user_transformations(network, parse_transform(argv.transform)) apply_moc_transformations(network) + if argv.compress_fp16: + from mo.back.offline_transformations import compress_model + compress_model(network) + orig_model_name = os.path.normpath(os.path.join(output_dir, argv.model_name)) network.serialize(orig_model_name + ".xml", orig_model_name + ".bin") diff --git a/model-optimizer/mo/pipeline/common.py b/model-optimizer/mo/pipeline/common.py index 6dfafa840d3ab2..319b99e9d57032 100644 --- a/model-optimizer/mo/pipeline/common.py +++ b/model-optimizer/mo/pipeline/common.py @@ -173,18 +173,19 @@ def convert_inputs_of_specific_ops(graph: Graph): def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_name: str, mean_data: [list, None] = None, input_names: list = None, meta_info: dict = None, - use_temporary_path=False): + use_temporary_path=False, used_by_ir_reader=False): if input_names is None: input_names = [] if meta_info is None: meta_info = {} graph.strict_mode = False - # convert Parameter data types - convert_data_type.convert_parameters_data_type(graph, data_type) - # convert blobs (usually weights and biases) - for sub_graph in [graph] + collect_sub_graphs(graph): - convert_data_type.convert_blobs(sub_graph, data_type) + if not used_by_ir_reader: + # convert Parameter data types + convert_data_type.convert_parameters_data_type(graph, data_type) + # convert blobs (usually weights and biases) + for sub_graph in [graph] + collect_sub_graphs(graph): + convert_data_type.convert_blobs(sub_graph, data_type) # restore data type for specific inputs/outputs of specific ops to the data types required by nGraph for_graph_and_each_sub_graph_recursively(graph, convert_inputs_of_specific_ops) diff --git a/model-optimizer/mo/utils/ir_reader/restore_graph.py b/model-optimizer/mo/utils/ir_reader/restore_graph.py index d18aa77089815f..1a4e3cca6df2f1 100644 --- a/model-optimizer/mo/utils/ir_reader/restore_graph.py +++ b/model-optimizer/mo/utils/ir_reader/restore_graph.py @@ -88,4 +88,4 @@ def save_restored_graph(graph: Graph, path: str, meta_data, name=None): for_graph_and_each_sub_graph_recursively(graph, RemoveConstOps().find_and_replace_pattern) for_graph_and_each_sub_graph_recursively(graph, CreateConstNodesReplacement().find_and_replace_pattern) - prepare_emit_ir(graph, data_type, path, name, meta_info=meta_data) + prepare_emit_ir(graph, data_type, path, name, meta_info=meta_data, used_by_ir_reader=True) diff --git a/ngraph/core/include/openvino/core/variant.hpp b/ngraph/core/include/openvino/core/variant.hpp index 2337f0639901ab..5802fe14f9f649 100644 --- a/ngraph/core/include/openvino/core/variant.hpp +++ b/ngraph/core/include/openvino/core/variant.hpp @@ -58,6 +58,14 @@ class VariantImpl : public Variant { value_type m_value; }; +template <> +class VariantImpl : public Variant { +public: + using value_type = void; + + VariantImpl() = default; +}; + extern template class OPENVINO_API VariantImpl; extern template class OPENVINO_API VariantImpl; extern template class OPENVINO_API VariantImpl; diff --git a/ngraph/core/include/openvino/op/util/precision_sensitive_attribute.hpp b/ngraph/core/include/openvino/op/util/precision_sensitive_attribute.hpp new file mode 100644 index 00000000000000..eb0f7308d4b0d7 --- /dev/null +++ b/ngraph/core/include/openvino/op/util/precision_sensitive_attribute.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/node.hpp" +#include "openvino/core/variant.hpp" + +namespace ov { + +void OPENVINO_API mark_as_precision_sensitive(ov::Input node_input); + +void OPENVINO_API unmark_as_precision_sensitive(ov::Input node_input); + +bool OPENVINO_API is_precision_sensitive(const ov::Input& node_input); + +/** + * @brief PrecisionSensitive class represents runtime info attribute that marks + * input to an operation as a precision sensitive and disables compression to FP16 + * of the subgraph before this input. + */ +class OPENVINO_API PrecisionSensitive : public VariantImpl { +public: + OPENVINO_RTTI("precision_sensitive", "0"); + + PrecisionSensitive() = default; + + bool is_copyable() const override { + return false; + } +}; + +} // namespace ov diff --git a/ngraph/core/src/op/batch_to_space.cpp b/ngraph/core/src/op/batch_to_space.cpp index 763a37db8cd5cb..3cee1ba4f42f59 100644 --- a/ngraph/core/src/op/batch_to_space.cpp +++ b/ngraph/core/src/op/batch_to_space.cpp @@ -19,6 +19,7 @@ #include "ngraph/runtime/reference/strided_slice.hpp" #include "ngraph/shape.hpp" #include "ngraph/slice_plan.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -30,6 +31,9 @@ ngraph::op::v1::BatchToSpace::BatchToSpace(const ngraph::Output& d const ngraph::Output& crops_begin, const ngraph::Output& crops_end) : Op({data, block_shape, crops_begin, crops_end}) { + ov::mark_as_precision_sensitive(input(1)); + ov::mark_as_precision_sensitive(input(2)); + ov::mark_as_precision_sensitive(input(3)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/convolution.cpp b/ngraph/core/src/op/convolution.cpp index d4fdc345ccf4d3..a66c88ee81fdff 100644 --- a/ngraph/core/src/op/convolution.cpp +++ b/ngraph/core/src/op/convolution.cpp @@ -12,6 +12,7 @@ #include "ngraph/op/reshape.hpp" #include "ngraph/util.hpp" #include "ngraph/validation_util.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -117,6 +118,7 @@ op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output& dat m_pads_end(pads_end), m_auto_pad(auto_pad), m_output_padding(output_padding) { + ov::mark_as_precision_sensitive(input(2)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/group_conv.cpp b/ngraph/core/src/op/group_conv.cpp index 850d8f0038d979..625110cabef0fa 100644 --- a/ngraph/core/src/op/group_conv.cpp +++ b/ngraph/core/src/op/group_conv.cpp @@ -14,6 +14,7 @@ #include "ngraph/op/convolution.hpp" #include "ngraph/op/reshape.hpp" #include "ngraph/validation_util.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -277,6 +278,7 @@ op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData(const Output< m_pads_end(pads_end), m_auto_pad(auto_pad), m_output_padding(output_padding) { + ov::mark_as_precision_sensitive(input(2)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/interpolate.cpp b/ngraph/core/src/op/interpolate.cpp index 4fa8f579968e56..2aed5f92af7e9d 100644 --- a/ngraph/core/src/op/interpolate.cpp +++ b/ngraph/core/src/op/interpolate.cpp @@ -13,6 +13,7 @@ #include "itt.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/runtime/reference/interpolate.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -22,6 +23,7 @@ BWDCMP_RTTI_DEFINITION(op::v0::Interpolate); op::v0::Interpolate::Interpolate(const Output& image, const Output& output_shape, const Attributes& attrs) : Op({image, output_shape}), m_attrs(attrs) { + ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } @@ -99,6 +101,8 @@ op::v4::Interpolate::Interpolate(const Output& image, const op::v4::Interpolate::InterpolateAttrs& attrs) : Op({image, output_shape, scales, axes}), m_attrs(attrs) { + ov::mark_as_precision_sensitive(input(1)); + ov::mark_as_precision_sensitive(input(2)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/one_hot.cpp b/ngraph/core/src/op/one_hot.cpp index 64f42cfef6feb7..e256070dbddc83 100644 --- a/ngraph/core/src/op/one_hot.cpp +++ b/ngraph/core/src/op/one_hot.cpp @@ -9,6 +9,7 @@ #include "ngraph/op/util/op_types.hpp" #include "ngraph/runtime/reference/one_hot.hpp" #include "ngraph/validation_util.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -22,6 +23,7 @@ op::v1::OneHot::OneHot(const Output& indices, int64_t axis) : Op({indices, depth, on_value, off_value}), m_axis(axis) { + ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/pad.cpp b/ngraph/core/src/op/pad.cpp index e06e83e17e2fdd..2812311b9dae2b 100644 --- a/ngraph/core/src/op/pad.cpp +++ b/ngraph/core/src/op/pad.cpp @@ -13,6 +13,7 @@ #include "ngraph/op/constant.hpp" #include "ngraph/op/util/op_types.hpp" #include "ngraph/runtime/reference/pad.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -26,6 +27,8 @@ op::v1::Pad::Pad(const Output& arg, PadMode pad_mode) : Op({arg, pads_begin, pads_end, arg_pad_value}), m_pad_mode{pad_mode} { + ov::mark_as_precision_sensitive(input(1)); + ov::mark_as_precision_sensitive(input(2)); constructor_validate_and_infer_types(); } @@ -35,6 +38,8 @@ op::v1::Pad::Pad(const Output& arg, PadMode pad_mode) : Op({arg, pads_begin, pads_end, op::v0::Constant::create(arg.get_element_type(), ov::Shape{}, {0})}), m_pad_mode{pad_mode} { + ov::mark_as_precision_sensitive(input(1)); + ov::mark_as_precision_sensitive(input(2)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/reshape.cpp b/ngraph/core/src/op/reshape.cpp index e13718a812ac5e..ed2c6e5363ecc4 100644 --- a/ngraph/core/src/op/reshape.cpp +++ b/ngraph/core/src/op/reshape.cpp @@ -11,6 +11,7 @@ #include "ngraph/op/constant.hpp" #include "ngraph/runtime/opt_kernel/reshape.hpp" #include "ngraph/runtime/reference/reshape.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -44,6 +45,7 @@ BWDCMP_RTTI_DEFINITION(op::v1::Reshape); op::v1::Reshape::Reshape(const Output& arg, const Output& shape_pattern, bool zero_flag) : Op({arg, shape_pattern}), m_special_zero(zero_flag) { + ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/space_to_batch.cpp b/ngraph/core/src/op/space_to_batch.cpp index 3a3498d60c40fb..cd11e09bd73f45 100644 --- a/ngraph/core/src/op/space_to_batch.cpp +++ b/ngraph/core/src/op/space_to_batch.cpp @@ -17,6 +17,7 @@ #include "ngraph/runtime/opt_kernel/reshape.hpp" #include "ngraph/runtime/reference/pad.hpp" #include "ngraph/shape.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -28,6 +29,9 @@ ngraph::op::v1::SpaceToBatch::SpaceToBatch(const ngraph::Output& d const ngraph::Output& pads_begin, const ngraph::Output& pads_end) : Op({data, block_shape, pads_begin, pads_end}) { + ov::mark_as_precision_sensitive(input(1)); + ov::mark_as_precision_sensitive(input(2)); + ov::mark_as_precision_sensitive(input(3)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/strided_slice.cpp b/ngraph/core/src/op/strided_slice.cpp index 003f909d9e7bd8..1cb0e93ee20354 100644 --- a/ngraph/core/src/op/strided_slice.cpp +++ b/ngraph/core/src/op/strided_slice.cpp @@ -19,6 +19,7 @@ #include "ngraph/type/element_type_traits.hpp" #include "ngraph/util.hpp" #include "ngraph/validation_util.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -40,6 +41,9 @@ op::v1::StridedSlice::StridedSlice(const Output& data, m_new_axis_mask{new_axis_mask}, m_shrink_axis_mask{shrink_axis_mask}, m_ellipsis_mask{ellipsis_mask} { + ov::mark_as_precision_sensitive(input(1)); + ov::mark_as_precision_sensitive(input(2)); + ov::mark_as_precision_sensitive(input(3)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/tile.cpp b/ngraph/core/src/op/tile.cpp index a77ae182fc02db..1f6d95fd650bb3 100644 --- a/ngraph/core/src/op/tile.cpp +++ b/ngraph/core/src/op/tile.cpp @@ -9,6 +9,7 @@ #include "itt.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/runtime/reference/tile.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -16,6 +17,7 @@ using namespace ngraph; BWDCMP_RTTI_DEFINITION(op::v0::Tile); op::v0::Tile::Tile(const Output& data, const Output& repeats) : Op({data, repeats}) { + ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/topk.cpp b/ngraph/core/src/op/topk.cpp index 93bc14a17bee8f..ef7b46f7290aaf 100644 --- a/ngraph/core/src/op/topk.cpp +++ b/ngraph/core/src/op/topk.cpp @@ -15,6 +15,7 @@ #include "ngraph/runtime/reference/topk.hpp" #include "ngraph/shape.hpp" #include "ngraph/validation_util.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ngraph; @@ -154,6 +155,7 @@ op::v1::TopK::TopK(const Output& data, m_mode{as_enum(mode)}, m_sort{as_enum(sort)}, m_index_element_type{index_element_type} { + ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } @@ -169,6 +171,7 @@ op::v1::TopK::TopK(const Output& data, m_mode{mode}, m_sort{sort}, m_index_element_type{index_element_type} { + ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } diff --git a/ngraph/core/src/op/util/broadcast_base.cpp b/ngraph/core/src/op/util/broadcast_base.cpp index 47d40d21c3c98e..8c094c739e0e87 100644 --- a/ngraph/core/src/op/util/broadcast_base.cpp +++ b/ngraph/core/src/op/util/broadcast_base.cpp @@ -14,6 +14,7 @@ #include "ngraph/op/util/op_types.hpp" #include "ngraph/partial_shape.hpp" #include "ngraph/runtime/reference/broadcast.hpp" +#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; @@ -24,13 +25,17 @@ ov::op::util::BroadcastBase::BroadcastBase(const Output& arg, const Output& axes_mapping, const BroadcastModeSpec& broadcast_mode) : Op({arg, target_shape, axes_mapping}), - m_mode{broadcast_mode} {} + m_mode{broadcast_mode} { + ov::mark_as_precision_sensitive(input(1)); +} ov::op::util::BroadcastBase::BroadcastBase(const Output& arg, const Output& target_shape, const BroadcastModeSpec& broadcast_mode) : Op({arg, target_shape}), - m_mode{broadcast_mode} {} + m_mode{broadcast_mode} { + ov::mark_as_precision_sensitive(input(1)); +} ov::PartialShape ov::op::util::BroadcastBase::get_result_shape_pdpd(const PartialShape& arg0_shape, const PartialShape& target_pshape, diff --git a/ngraph/core/src/op/util/precision_sensitive_attribute.cpp b/ngraph/core/src/op/util/precision_sensitive_attribute.cpp new file mode 100644 index 00000000000000..6591699a341702 --- /dev/null +++ b/ngraph/core/src/op/util/precision_sensitive_attribute.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/util/precision_sensitive_attribute.hpp" + +void ov::mark_as_precision_sensitive(ov::Input node_input) { + auto& rt_info = node_input.get_rt_info(); + rt_info[PrecisionSensitive::get_type_info_static()] = std::make_shared(); +} + +void ov::unmark_as_precision_sensitive(ov::Input node_input) { + auto& rt_info = node_input.get_rt_info(); + rt_info.erase(PrecisionSensitive::get_type_info_static()); +} + +bool ov::is_precision_sensitive(const ov::Input& node_input) { + const auto& rt_info = node_input.get_rt_info(); + return rt_info.count(PrecisionSensitive::get_type_info_static()); +}