forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add model compression to FP16 weights (openvinotoolkit#7588)
* Add model compression to FP16 weights * Fix build * Fix build * Fix build * Add wrapper over ConvertPrecision * Add documentation to attributes * Fix MO IR Reader * Fix build * Return DisableDecompressionConvertConstantFolding call in CommonOptimizations * Temporarily disable old_api map * Fix TI Convert issue * Apply review feedback * Fix build * Fix build * Fix build
- Loading branch information
1 parent
713420f
commit 39cf8ff
Showing
45 changed files
with
986 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
...transformations/include/transformations/common_optimizations/compress_float_constants.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "transformations_visibility.hpp" | ||
#include "openvino/pass/graph_rewrite.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API CompressFloatConstantsImpl; | ||
class TRANSFORMATIONS_API AddOldApiMapToParameters; | ||
class TRANSFORMATIONS_API CompressFloatConstants; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief CompressFloatConstantsImpl transformation replaces FP32/FP64 Constants with FP16 ones. | ||
*/ | ||
class ov::pass::CompressFloatConstantsImpl : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_RTTI("CompressFloatConstantsImpl", "0"); | ||
CompressFloatConstantsImpl(); | ||
}; | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief AddOldApiMapToParameters transformation adds OldApiMap to each float input to the model. | ||
*/ | ||
class ov::pass::AddOldApiMapToParameters : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_RTTI("AddOldApiMapToParameters", "0"); | ||
AddOldApiMapToParameters(); | ||
}; | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief CompressFloatConstants transformation replaces FP32/FP64 Constants with FP16 ones. | ||
*/ | ||
class ov::pass::CompressFloatConstants : public ov::pass::GraphRewrite { | ||
public: | ||
OPENVINO_RTTI("CompressFloatConstants", "0"); | ||
CompressFloatConstants() { | ||
add_matcher<ov::pass::CompressFloatConstantsImpl>(); | ||
add_matcher<ov::pass::AddOldApiMapToParameters>(); | ||
} | ||
}; |
50 changes: 50 additions & 0 deletions
50
...tions/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "transformations_visibility.hpp" | ||
#include "openvino/pass/pass.hpp" | ||
#include "openvino/pass/graph_rewrite.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API ConvertPrecisionCompressedOnly; | ||
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding; | ||
class TRANSFORMATIONS_API ConvertCompressedOnlyToLegacy; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief ConvertPrecisionCompressedOnly transformation runs ConvertPrecision transformation for CompressedOnly format. | ||
*/ | ||
|
||
class ov::pass::ConvertPrecisionCompressedOnly : public ov::pass::FunctionPass { | ||
public: | ||
OPENVINO_RTTI("ConvertPrecisionCompressedOnly", "0"); | ||
bool run_on_function(std::shared_ptr<Function> f) override; | ||
}; | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief Enables ConstantFolding for Convert operation in compressed function. | ||
*/ | ||
class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); | ||
EnableDecompressionConvertConstantFolding(); | ||
}; | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief ConvertCompressedOnlyToLegacy transformation converts compression only FP16 format to legacy FP16 format. | ||
*/ | ||
class ov::pass::ConvertCompressedOnlyToLegacy : public ov::pass::FunctionPass { | ||
public: | ||
OPENVINO_RTTI("ConvertCompressedOnlyToLegacy", "0"); | ||
bool run_on_function(std::shared_ptr<Function> f) override; | ||
}; |
29 changes: 29 additions & 0 deletions
29
...tions/include/transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "transformations_visibility.hpp" | ||
#include "openvino/pass/pass.hpp" | ||
|
||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API MarkPrecisionSensitiveSubgraphs; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief MarkPrecisionSensitiveSubgraphs transformation marks the constants | ||
* inside the subgraph starting from precision-sensitive input and ending at | ||
* the ShapeOf node as disabled for FP16 compression. | ||
*/ | ||
class ov::pass::MarkPrecisionSensitiveSubgraphs : public FunctionPass { | ||
public: | ||
OPENVINO_RTTI("MarkPrecisionSensitiveSubgraphs", "0"); | ||
bool run_on_function(std::shared_ptr<ov::Function> f) override; | ||
}; |
26 changes: 26 additions & 0 deletions
26
...ransformations/include/transformations/disable_decompression_convert_constant_folding.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "transformations_visibility.hpp" | ||
#include "openvino/pass/graph_rewrite.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief Disables ConstantFolding for Convert operation in compressed function. | ||
*/ | ||
class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_RTTI("DisableDecompressionConvertConstantFolding", "0"); | ||
DisableDecompressionConvertConstantFolding(); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
inference-engine/src/transformations/include/transformations/rt_info/decompression.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <assert.h> | ||
#include <functional> | ||
#include <memory> | ||
#include <string> | ||
#include <set> | ||
|
||
#include "openvino/core/node.hpp" | ||
#include "openvino/core/variant.hpp" | ||
#include "transformations_visibility.hpp" | ||
|
||
|
||
namespace ov { | ||
|
||
TRANSFORMATIONS_API void mark_as_decompression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API void unmark_as_decompression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API bool is_decompression(const std::shared_ptr<Node>& node); | ||
|
||
/** | ||
* @ingroup ie_runtime_attr_api | ||
* @brief Decompression class represents runtime info attribute that marks operation | ||
* as used as decompression for Compressed Only format. | ||
*/ | ||
class TRANSFORMATIONS_API Decompression : public VariantImpl<void> { | ||
public: | ||
OPENVINO_RTTI("decompression", "0"); | ||
|
||
Decompression() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { return true; } | ||
|
||
bool is_copyable() const override { return false; } | ||
}; | ||
|
||
} // namespace ov |
34 changes: 34 additions & 0 deletions
34
...e-engine/src/transformations/include/transformations/rt_info/disable_fp16_compression.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/core/node.hpp" | ||
#include "openvino/core/variant.hpp" | ||
#include "transformations_visibility.hpp" | ||
|
||
|
||
namespace ov { | ||
|
||
TRANSFORMATIONS_API void disable_fp16_compression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API void enable_fp16_compression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API bool fp16_compression_is_disabled(const std::shared_ptr<Node>& node); | ||
|
||
/** | ||
* @ingroup ie_runtime_attr_api | ||
* @brief DisableFP16Compression class represents runtime info attribute that marks operation | ||
* as prohibitted to convert to FP16 as part of Compressed Only format. | ||
*/ | ||
class TRANSFORMATIONS_API DisableFP16Compression : public VariantImpl<void> { | ||
public: | ||
OPENVINO_RTTI("disable_fp16_compression", "0"); | ||
|
||
DisableFP16Compression() = default; | ||
|
||
bool is_copyable() const override { return false; } | ||
}; | ||
|
||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.