/
ptq.py
125 lines (111 loc) · 4.62 KB
/
ptq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from paddle.distributed import fleet
from paddle.nn import Layer
from .config import QuantConfig
from .quantize import Quantization
class PTQ(Quantization):
"""
Applying post training quantization to the model.
"""
def __init__(self, config: QuantConfig):
super().__init__(config)
def _is_parallel_training(self):
try:
if fleet.worker_num() > 2:
return True
else:
return False
except Exception: # fleet is not initialized
return False
def quantize(self, model: Layer, inplace=False):
r"""
Create a model for post-training quantization.
The quantization configuration will be propagated in the model.
And it will insert observers into the model to collect and compute
quantization parameters.
Args:
model(Layer) - The model to be quantized.
inplace(bool) - Whether to modify the model in-place.
Return: The prepared model for post-training quantization.
Examples:
.. code-block:: python
>>> from paddle.quantization import PTQ, QuantConfig
>>> from paddle.quantization.observers import AbsmaxObserver
>>> from paddle.vision.models import LeNet
>>> observer = AbsmaxObserver()
>>> q_config = QuantConfig(activation=observer, weight=observer)
>>> ptq = PTQ(q_config)
>>> model = LeNet()
>>> model.eval()
>>> quant_model = ptq.quantize(model)
>>> print(quant_model)
LeNet(
(features): Sequential(
(0): QuantedConv2D(
(weight_quanter): AbsmaxObserverLayer()
(activation_quanter): AbsmaxObserverLayer()
)
(1): ObserveWrapper(
(_observer): AbsmaxObserverLayer()
(_observed): ReLU()
)
(2): ObserveWrapper(
(_observer): AbsmaxObserverLayer()
(_observed): MaxPool2D(kernel_size=2, stride=2, padding=0)
)
(3): QuantedConv2D(
(weight_quanter): AbsmaxObserverLayer()
(activation_quanter): AbsmaxObserverLayer()
)
(4): ObserveWrapper(
(_observer): AbsmaxObserverLayer()
(_observed): ReLU()
)
(5): ObserveWrapper(
(_observer): AbsmaxObserverLayer()
(_observed): MaxPool2D(kernel_size=2, stride=2, padding=0)
)
)
(fc): Sequential(
(0): QuantedLinear(
(weight_quanter): AbsmaxObserverLayer()
(activation_quanter): AbsmaxObserverLayer()
)
(1): QuantedLinear(
(weight_quanter): AbsmaxObserverLayer()
(activation_quanter): AbsmaxObserverLayer()
)
(2): QuantedLinear(
(weight_quanter): AbsmaxObserverLayer()
(activation_quanter): AbsmaxObserverLayer()
)
)
)
"""
_model = model
if not inplace:
assert (
not self._is_parallel_training()
), "'inplace' is not compatible with parallel training."
_model = copy.deepcopy(model)
_model.eval()
assert (
not model.training
), "Post-Training Quantization shoud not work on training models. Please set evaluation mode by model.eval()."
self._config._specify(_model)
self._convert_to_quant_layers(_model, self._config)
self._insert_activation_observers(_model, self._config)
return _model