Skip to content

Commit

Permalink
Add model compression to FP16 weights (openvinotoolkit#7588)
Browse files Browse the repository at this point in the history
* Add model compression to FP16 weights

* Fix build

* Fix build

* Fix build

* Add wrapper over ConvertPrecision

* Add documentation to attributes

* Fix MO IR Reader

* Fix build

* Return DisableDecompressionConvertConstantFolding call in CommonOptimizations

* Temporarily disable old_api map

* Fix TI Convert issue

* Apply review feedback

* Fix build

* Fix build

* Fix build
  • Loading branch information
mvafin authored and OpenVINO-dev-contest committed Nov 23, 2021
1 parent 713420f commit 39cf8ff
Show file tree
Hide file tree
Showing 45 changed files with 986 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer
C.ApplyLowLatencyTransformation(network.impl, use_const_initializer)


def CompressModelTransformation(IENetwork network):
C.CompressModelTransformation(network.impl)


def ApplyPruningTransformation(IENetwork network):
C.ApplyPruningTransformation(network.impl)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <openvino/pass/make_stateful.hpp>
#include <pot_transformations.hpp>
#include <pruning.hpp>
#include <transformations/common_optimizations/compress_float_constants.hpp>
#include <transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp>
#include <transformations/common_optimizations/moc_transformations.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/serialize.hpp>
Expand Down Expand Up @@ -56,6 +58,13 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork
manager.run_passes(network.actual->getFunction());
}

void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) {
ngraph::pass::Manager manager;
manager.register_pass<ov::pass::MarkPrecisionSensitiveSubgraphs>();
manager.register_pass<ov::pass::CompressFloatConstants>();
manager.run_passes(network.actual->getFunction());
}

void InferenceEnginePython::Serialize(InferenceEnginePython::IENetwork network,
std::string path_to_xml,
std::string path_to_bin) {
Expand All @@ -80,4 +89,4 @@ void InferenceEnginePython::CheckAPI() {
auto reshape = f->get_result()->input_value(0).get_node_shared_ptr();
assert(std::dynamic_pointer_cast<ngraph::opset6::Parameter>(reshape->input_value(0).get_node_shared_ptr()));
assert(std::dynamic_pointer_cast<ngraph::opset6::Constant>(reshape->input_value(1).get_node_shared_ptr()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);

void GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names);

void CompressModelTransformation(InferenceEnginePython::IENetwork network);

void Serialize(InferenceEnginePython::IENetwork network, std::string path_to_xml, std::string path_to_bin);

void CheckAPI();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
cdef void ApplyMakeStatefulTransformation(IENetwork network, map[string, string]& in_out_names)

cdef void ApplyPruningTransformation(IENetwork network)

cdef void CompressModelTransformation(IENetwork network)

cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include <transformations/op_conversions/convert_depth_to_space.hpp>
#include <transformations/op_conversions/convert_space_to_depth.hpp>
#include <transformations/op_conversions/convert_gelu.hpp>
Expand Down Expand Up @@ -163,6 +164,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {

auto pass_config = manager.get_pass_config();

pass_config->enable<ov::pass::ConvertCompressedOnlyToLegacy>();

// SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
ngraph::pass::ConvertDepthToSpace>(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "transformations_visibility.hpp"
#include "openvino/pass/graph_rewrite.hpp"

namespace ov {
namespace pass {

class TRANSFORMATIONS_API CompressFloatConstantsImpl;
class TRANSFORMATIONS_API AddOldApiMapToParameters;
class TRANSFORMATIONS_API CompressFloatConstants;

} // namespace pass
} // namespace ov

/**
* @ingroup ie_transformation_common_api
* @brief CompressFloatConstantsImpl transformation replaces FP32/FP64 Constants with FP16 ones.
*/
class ov::pass::CompressFloatConstantsImpl : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("CompressFloatConstantsImpl", "0");
CompressFloatConstantsImpl();
};

/**
* @ingroup ie_transformation_common_api
* @brief AddOldApiMapToParameters transformation adds OldApiMap to each float input to the model.
*/
class ov::pass::AddOldApiMapToParameters : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("AddOldApiMapToParameters", "0");
AddOldApiMapToParameters();
};

/**
* @ingroup ie_transformation_common_api
* @brief CompressFloatConstants transformation replaces FP32/FP64 Constants with FP16 ones.
*/
class ov::pass::CompressFloatConstants : public ov::pass::GraphRewrite {
public:
OPENVINO_RTTI("CompressFloatConstants", "0");
CompressFloatConstants() {
add_matcher<ov::pass::CompressFloatConstantsImpl>();
add_matcher<ov::pass::AddOldApiMapToParameters>();
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "transformations_visibility.hpp"
#include "openvino/pass/pass.hpp"
#include "openvino/pass/graph_rewrite.hpp"

namespace ov {
namespace pass {

class TRANSFORMATIONS_API ConvertPrecisionCompressedOnly;
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
class TRANSFORMATIONS_API ConvertCompressedOnlyToLegacy;

} // namespace pass
} // namespace ov

/**
* @ingroup ie_transformation_common_api
* @brief ConvertPrecisionCompressedOnly transformation runs ConvertPrecision transformation for CompressedOnly format.
*/

class ov::pass::ConvertPrecisionCompressedOnly : public ov::pass::FunctionPass {
public:
OPENVINO_RTTI("ConvertPrecisionCompressedOnly", "0");
bool run_on_function(std::shared_ptr<Function> f) override;
};

/**
* @ingroup ie_transformation_common_api
* @brief Enables ConstantFolding for Convert operation in compressed function.
*/
class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0");
EnableDecompressionConvertConstantFolding();
};

/**
* @ingroup ie_transformation_common_api
* @brief ConvertCompressedOnlyToLegacy transformation converts compression only FP16 format to legacy FP16 format.
*/
class ov::pass::ConvertCompressedOnlyToLegacy : public ov::pass::FunctionPass {
public:
OPENVINO_RTTI("ConvertCompressedOnlyToLegacy", "0");
bool run_on_function(std::shared_ptr<Function> f) override;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "transformations_visibility.hpp"
#include "openvino/pass/pass.hpp"


namespace ov {
namespace pass {

class TRANSFORMATIONS_API MarkPrecisionSensitiveSubgraphs;

} // namespace pass
} // namespace ov

/**
* @ingroup ie_transformation_common_api
* @brief MarkPrecisionSensitiveSubgraphs transformation marks the constants
* inside the subgraph starting from precision-sensitive input and ending at
* the ShapeOf node as disabled for FP16 compression.
*/
class ov::pass::MarkPrecisionSensitiveSubgraphs : public FunctionPass {
public:
OPENVINO_RTTI("MarkPrecisionSensitiveSubgraphs", "0");
bool run_on_function(std::shared_ptr<ov::Function> f) override;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "transformations_visibility.hpp"
#include "openvino/pass/graph_rewrite.hpp"

namespace ov {
namespace pass {

class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding;

} // namespace pass
} // namespace ov

/**
* @ingroup ie_transformation_common_api
* @brief Disables ConstantFolding for Convert operation in compressed function.
*/
class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("DisableDecompressionConvertConstantFolding", "0");
DisableDecompressionConvertConstantFolding();
};
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
#include <openvino/core/variant.hpp>
#include <set>
#include <transformations/rt_info/disable_constant_folding.hpp>
#include <transformations/rt_info/disable_fp16_compression.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
#include <transformations/rt_info/nms_selected_indices.hpp>
#include <transformations/rt_info/old_api_map_attribute.hpp>
#include <transformations/rt_info/primitives_priority_attribute.hpp>
#include <transformations/rt_info/strides_property.hpp>
#include <transformations/rt_info/decompression.hpp>
#include <transformations_visibility.hpp>
#include <utility>

Expand All @@ -37,5 +39,5 @@ class TRANSFORMATIONS_API Attributes {

ngraph::FactoryRegistry<Variant> m_factory_registry;
};
} // namespace pass
} // namespace ov
} // namespace pass
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <assert.h>
#include <functional>
#include <memory>
#include <string>
#include <set>

#include "openvino/core/node.hpp"
#include "openvino/core/variant.hpp"
#include "transformations_visibility.hpp"


namespace ov {

TRANSFORMATIONS_API void mark_as_decompression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API void unmark_as_decompression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API bool is_decompression(const std::shared_ptr<Node>& node);

/**
* @ingroup ie_runtime_attr_api
* @brief Decompression class represents runtime info attribute that marks operation
* as used as decompression for Compressed Only format.
*/
class TRANSFORMATIONS_API Decompression : public VariantImpl<void> {
public:
OPENVINO_RTTI("decompression", "0");

Decompression() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true; }

bool is_copyable() const override { return false; }
};

} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/core/node.hpp"
#include "openvino/core/variant.hpp"
#include "transformations_visibility.hpp"


namespace ov {

TRANSFORMATIONS_API void disable_fp16_compression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API void enable_fp16_compression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API bool fp16_compression_is_disabled(const std::shared_ptr<Node>& node);

/**
* @ingroup ie_runtime_attr_api
* @brief DisableFP16Compression class represents runtime info attribute that marks operation
* as prohibitted to convert to FP16 as part of Compressed Only format.
*/
class TRANSFORMATIONS_API DisableFP16Compression : public VariantImpl<void> {
public:
OPENVINO_RTTI("disable_fp16_compression", "0");

DisableFP16Compression() = default;

bool is_copyable() const override { return false; }
};

} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#include <ngraph/op/constant.hpp>
#include <ngraph/opsets/opset3.hpp>
#include <ngraph/opsets/opset4.hpp>
#include <ngraph/opsets/opset8.hpp>

#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <transformations/rt_info/attributes.hpp>

namespace ngraph {
namespace op {
Expand Down Expand Up @@ -49,6 +51,17 @@ bool has_op_with_type(const std::shared_ptr<const ngraph::Function> &function) {
}
return false;
}

inline bool has_decompression_converts(const std::shared_ptr<const ngraph::Function>& function) {
for (const auto& op : function->get_ops()) {
if (std::dynamic_pointer_cast<ngraph::opset8::Convert>(op)) {
if (ov::is_decompression(op))
return true;
}
}
return false;
}

inline std::string create_ie_output_name(const ngraph::Output<const ngraph::Node>& output) {
const auto& prev_layer = output.get_node_shared_ptr();
std::string out_name = prev_layer->get_friendly_name();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "transformations/common_optimizations/strides_optimization.hpp"
#include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp"
#include "transformations/common_optimizations/mul_conv_fusion.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
#include "transformations/op_conversions/convert_divide.hpp"
Expand Down Expand Up @@ -78,6 +79,7 @@
#include "transformations/op_conversions/gather_normalize_negative_indices.hpp"
#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp"
#include "transformations/op_conversions/convert_maxpool_downgrade.hpp"
#include "transformations/disable_decompression_convert_constant_folding.hpp"

#include <ngraph/pass/manager.hpp>
#include <ngraph/pass/constant_folding.hpp>
Expand All @@ -95,10 +97,14 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
RUN_ON_FUNCTION_SCOPE(CommonOptimizations);
ngraph::pass::Manager manager(get_pass_config());

manager.register_pass<ov::pass::DisableDecompressionConvertConstantFolding>();

// Disable low_precision_enabled as all plugins handle low-precision sub-graph manually
// before CommonOptimization pipeline execution
manager.register_pass<ngraph::pass::MOCTransformations>(true, false);

manager.register_pass<ov::pass::ConvertCompressedOnlyToLegacy, false>();

// TODO: move to KMB
manager.register_pass<ngraph::pass::WeightsDequantizeToFakeQuantize>();

Expand Down
Loading

0 comments on commit 39cf8ff

Please sign in to comment.