From 7ecc2bdc569a8750cb9f696a05424e3c72057fd5 Mon Sep 17 00:00:00 2001
From: yghstill <742925032@qq.com>
Date: Thu, 8 Dec 2022 09:37:57 +0000
Subject: [PATCH 1/4] support conv1d quant & skip calibrate zero-size tensor

---
 .../post_training_quantization.py             | 120 +++++++++++-------
 .../slim/quantization/quantization_pass.py    | 119 ++++++++++++-----
 2 files changed, 161 insertions(+), 78 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
index fa57a9bd746ea..f1f84f0b9bda3 100644
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -398,6 +398,8 @@ def __init__(
         self._best_calibration_loss = {}
         # The threshold for algo = abs_max, mse or avg
         self._quantized_threshold = {}
+        # If the tensor is zero-size during calibration, it will skip quantization
+        self._zero_size_var_names = set()
         self._same_scale_tensor_list = same_scale_tensor_list
         self._freeze_model = freeze_model
         self._scale_dict = scale_dict
@@ -465,9 +467,15 @@ def quantize(self):
 
         if self._algo == 'avg':
             for var_name in self._quantized_act_var_name:
+                if (
+                    var_name in self._zero_size_var_names
+                    and var_name not in self._quantized_var_avg
+                ):
+                    continue
                 self._quantized_threshold[var_name] = np.array(
                     self._quantized_var_avg[var_name]
                 ).mean()
+
         if self._algo in ["KL", "hist"]:
             self._calculate_kl_hist_threshold()
 
@@ -741,6 +749,9 @@ def _sample_mse(self):
         _logger.info("MSE searching stage ...")
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             var_tensor = var_tensor.flatten()
             abs_max_value = float(np.max(np.abs(var_tensor)))
             abs_max_value = 1e-8 if abs_max_value == 0.0 else abs_max_value
@@ -792,6 +803,9 @@ def _sample_emd(self):
         _logger.info("EMD searching stage ...")
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             var_tensor = var_tensor.flatten()
             abs_max_value = float(np.max(np.abs(var_tensor)))
             abs_max_value = 1e-8 if abs_max_value == 0.0 else abs_max_value
@@ -845,6 +859,9 @@ def _sample_avg(self):
 
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             abs_max_value = float(np.max(np.abs(var_tensor)))
             if var_name not in self._quantized_var_avg:
                 self._quantized_var_avg[var_name] = []
@@ -857,7 +874,6 @@ def _sample_avg(self):
                 )
             )
             self._quantized_var_avg[var_name].append(abs_avg_value)
-            continue
 
     def _sample_abs_max(self):
         if self._quantized_threshold == {}:
@@ -884,6 +900,9 @@ def _sample_abs_max(self):
 
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             abs_max_value = float(np.max(np.abs(var_tensor)))
             if (var_name not in self._quantized_threshold) or (
                 abs_max_value > self._quantized_threshold[var_name]
@@ -916,6 +935,9 @@ def _sample_min_max(self):
 
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             min_value = float(np.min(var_tensor))
             max_value = float(np.max(var_tensor))
             if (var_name not in self._quantized_var_min) or (
@@ -930,6 +952,11 @@ def _sample_min_max(self):
     def _sample_histogram(self):
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if (not var_tensor.any()) or (
+                var_name not in self._sampling_act_histogram
+            ):
+                self._zero_size_var_names.add(var_name)
+                continue
             var_tensor_abs = np.abs(var_tensor)
             bins = self._sampling_act_histogram[var_name][1]
             hist, _ = np.histogram(var_tensor_abs, bins=bins)
@@ -964,6 +991,9 @@ def _sample_ptf(self):
 
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             abs_max_value = float(np.max(np.abs(var_tensor)))
             q_max = 2 ** (self._activation_bits - 1) - 1
             scale8 = abs_max_value / q_max
@@ -1020,6 +1050,9 @@ def _collect_activation_abs_min_max(self):
         '''
         for var_name in self._quantized_act_var_name:
             var_tensor = utils.load_variable_data(self._scope, var_name)
+            if not var_tensor.any():
+                self._zero_size_var_names.add(var_name)
+                continue
             var_tensor = np.abs(var_tensor)
             min_value = float(np.min(var_tensor))
             max_value = float(np.max(var_tensor))
@@ -1039,6 +1072,10 @@ def _init_sampling_act_histogram(self):
         Based on the min/max value, init the sampling_act_histogram.
         '''
         for var_name in self._quantized_act_var_name:
+            if (var_name in self._zero_size_var_names) and (
+                var_name not in self._sampling_act_abs_min_max
+            ):
+                continue
             if var_name not in self._sampling_act_histogram:
                 min_val = self._sampling_act_abs_min_max[var_name][0]
                 max_val = self._sampling_act_abs_min_max[var_name][1]
@@ -1077,6 +1114,10 @@ def _calculate_kl_hist_threshold(self):
             self._quantized_var_threshold[var_name] = weight_threshold
 
         for var_name in self._quantized_act_var_name:
+            if (var_name in self._zero_size_var_names) and (
+                var_name not in self._sampling_act_histogram
+            ):
+                continue
             hist, hist_edeges = self._sampling_act_histogram[var_name]
             if self._algo == "KL":
                 bin_width = hist_edeges[1] - hist_edeges[0]
@@ -1162,7 +1203,6 @@ def _update_program(self):
             if self._same_scale_tensor_list is not None:
                 for tensor_list in self._same_scale_tensor_list:
                     max_scale = None
-                    tmp_tensor_list = []
                     for tensor_name in tensor_list:
                         if '#' in tensor_name:
                             real_tensor_name, opera, scalar = tensor_name.split(
@@ -1261,21 +1301,40 @@ def _save_output_threshold(self):
         self._calibration_scales = {}
 
         def save_info(
-            op_node, out_var_name, threshold_map, out_info_name, quantized_type
+            op_node,
+            out_var_name,
+            threshold_map,
+            out_info_name,
+            argname_index,
+            quantized_type,
         ):
-            assert (
-                out_var_name in threshold_map
-            ), "The output ({}) of {} node does not have threshold.".format(
-                out_var_name, op_node.type
-            )
+            if (out_var_name in self._zero_size_var_names) and (
+                out_var_name not in threshold_map
+            ):
+                _logger.warning(
+                    "{} is zero-size tensor and unable to calibrate, so skip quant it.".format(
+                        out_var_name
+                    )
+                )
+                return
+            else:
+                assert (
+                    out_var_name in threshold_map
+                ), "The output ({}) of {} node does not have threshold.".format(
+                    out_var_name, op_node.type
+                )
             if self._onnx_format:
                 # For easy extension, every var_node set a dict to save parameters of quant.
-                self._calibration_scales[var_name] = {}
-                self._calibration_scales[var_name]['scale'] = threshold_map[
-                    var_name
+                self._calibration_scales[out_var_name] = {}
+                self._calibration_scales[out_var_name]['scale'] = threshold_map[
+                    out_var_name
                 ]
             else:
-                op_node._set_attr(out_info_name, threshold_map[var_name])
+                op_node._set_attr(out_info_name, threshold_map[out_var_name])
+                op_node._set_attr(
+                    argname_index[0] + str(argname_index[1]) + "_threshold",
+                    threshold_map[out_var_name],
+                )
                 op_node._set_attr("with_quant_attr", True)
                 if op_node.type in self._quantizable_op_type:
                     op._set_attr("quantization_type", quantized_type)
@@ -1285,52 +1344,23 @@ def analysis_and_save_info(op_node, out_var_name):
             assert argname_index is not None, (
                 out_var_name + " is not the output of the op"
             )
-            if self._algo == "KL":
-                # For compatibility, we save output threshold by two methods.
-                save_info(
-                    op_node,
-                    out_var_name,
-                    self._quantized_var_threshold,
-                    "out_threshold",
-                    "post_kl",
-                )
-                save_info(
-                    op_node,
-                    out_var_name,
-                    self._quantized_var_threshold,
-                    argname_index[0] + str(argname_index[1]) + "_threshold",
-                    "post_kl",
-                )
-            elif self._algo == "hist":
+            if self._algo in ["KL", "hist"]:
                 # For compatibility, we save output threshold by two methods.
                 save_info(
                     op_node,
                     out_var_name,
                     self._quantized_var_threshold,
                     "out_threshold",
-                    "post_hist",
+                    argname_index,
+                    "post_" + str(self._algo).lower(),
                 )
-                save_info(
-                    op_node,
-                    out_var_name,
-                    self._quantized_var_threshold,
-                    argname_index[0] + str(argname_index[1]) + "_threshold",
-                    "post_hist",
-                )
-
             elif self._algo in ["avg", "abs_max", "mse", "emd", "ptf"]:
                 save_info(
                     op_node,
                     out_var_name,
                     self._quantized_threshold,
                     "out_threshold",
-                    "post_" + str(self._algo),
-                )
-                save_info(
-                    op_node,
-                    out_var_name,
-                    self._quantized_threshold,
-                    argname_index[0] + str(argname_index[1]) + "_threshold",
+                    argname_index,
                     "post_" + str(self._algo),
                 )
             elif self._algo == "min_max":
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 55e1dcacdcb62..338a4d461cad2 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -2134,7 +2134,9 @@ def __init__(
         self._moving_rate = moving_rate
         self._scale_dict = scale_dict
 
-    def insert_quant_op(self, graph, var_node, var_name=None):
+    def insert_quant_op(
+        self, graph, var_node, var_name=None, scale_var_node=None
+    ):
         assert var_node.is_var(), '{} is not a var'.format(var_node.name())
         var_name = var_node.name() if not var_name else var_name
         quant_var_node = graph.create_var_node(
@@ -2143,40 +2145,43 @@ def insert_quant_op(self, graph, var_node, var_name=None):
             shape=var_node.shape(),
             var_dtype=var_node.dtype(),
         )
-        data_type = (
-            'float64'
-            if var_node.dtype() == core.VarDesc.VarType.FP64
-            else 'float32'
-        )
-        scale_name = self._quantized_scale_name(var_name)
-        if self.channel_wise:
-            scale_var_shape = var_node.shape()[self.quant_axis]
-            scale_var_type = core.VarDesc.VarType.LOD_TENSOR
-            init_scale_value = (
-                np.ones(scale_var_shape, dtype=data_type) * _SCALE_DEFAULT_VALUE
+        if not scale_var_node:
+            data_type = (
+                'float64'
+                if var_node.dtype() == core.VarDesc.VarType.FP64
+                else 'float32'
             )
-        else:
-            scale_var_shape = 1
-            scale_var_type = var_node.type()
-            init_scale_value = np.array([_SCALE_DEFAULT_VALUE], dtype=data_type)
+            scale_name = self._quantized_scale_name(var_name)
+            if self.channel_wise:
+                scale_var_shape = var_node.shape()[self.quant_axis]
+                scale_var_type = core.VarDesc.VarType.LOD_TENSOR
+                init_scale_value = (
+                    np.ones(scale_var_shape, dtype=data_type)
+                    * _SCALE_DEFAULT_VALUE
+                )
+            else:
+                scale_var_shape = 1
+                scale_var_type = var_node.type()
+                init_scale_value = np.array(
+                    [_SCALE_DEFAULT_VALUE], dtype=data_type
+                )
 
-        if (
-            self._scale_dict is not None
-            and var_node.name() in self._scale_dict.keys()
-        ):
-            init_scale_value = np.array(
-                [self._scale_dict[var_node.name()]], dtype=data_type
+            if (
+                self._scale_dict is not None
+                and var_node.name() in self._scale_dict.keys()
+            ):
+                init_scale_value = np.array(
+                    [self._scale_dict[var_node.name()]], dtype=data_type
+                )
+            scale_var_node = graph.create_persistable_node(
+                name=scale_name,
+                var_type=scale_var_type,
+                shape=[scale_var_shape],
+                var_dtype=var_node.dtype(),
+            )
+            _init_var_node(
+                scale_var_node, init_scale_value, self._scope, self._place
             )
-
-        scale_var_node = graph.create_persistable_node(
-            name=scale_name,
-            var_type=scale_var_type,
-            shape=[scale_var_shape],
-            var_dtype=var_node.dtype(),
-        )
-        _init_var_node(
-            scale_var_node, init_scale_value, self._scope, self._place
-        )
 
         zero_point_node = None
         if zero_point_node is None:
@@ -2510,6 +2515,7 @@ def _quant_preprocess(self, op_node):
 
     def _transform_forward(self, graph, op):
         op.op()._set_attr("quantization_type", "qat_with_weight")
+        weight_scale_node = None
         inputs = op.inputs
         for var_node in inputs:
             if var_node.name() not in op.input_arg_names():
@@ -2595,7 +2601,10 @@ def _transform_forward(self, graph, op):
                 )
 
                 self.dequantized_vars[name] = dequant_var_node
+                if is_weight:
+                    weight_scale_node = scale_var_node
             graph.update_input_link(var_node, dequant_var_node, op)
+        return weight_scale_node
 
     def _transform_backward(self, graph, op):
         for var_node in op.inputs:
@@ -2610,11 +2619,49 @@ def _has_weight(self, op):
         for var_node in op.inputs:
             if var_node.name() not in op.input_arg_names():
                 continue
-            name = var_node.name()
             if var_node.name() in self.persistable_vars:
                 has_weight = True
         return has_weight
 
+    def _quant_conv1d(self, graph, op):
+        conv_weight_var_name = op.input("Filter")[0]
+        scale_var_node = None
+        # quant unsqueeze2
+        for _op in graph.all_op_nodes():
+            var_names = utils._get_op_output_var_names(_op)
+            if conv_weight_var_name in var_names and self._has_weight(_op):
+                scale_var_node = self._transform_forward(graph, _op)
+        # insert qdq before conv2d
+        for var_node in op.inputs:
+            quant_bits = self._weight_bits
+            quant_type = self._weight_quantize_type
+            quant_axis = -1
+            channel_wise = False
+            if quant_type == 'channel_wise_abs_max':  # Weight quantization
+                channel_wise = True
+                quant_axis = (
+                    1 if op.name() in utils._channelwise_quant_axis1_ops else 0
+                )
+            insert_quant_pass = InsertQuantizeLinear(
+                self._place,
+                self._scope,
+                quant_bits=quant_bits,
+                quant_axis=quant_axis,
+                channel_wise=channel_wise,
+                moving_rate=self._moving_rate,
+                is_test=self._is_test,
+            )
+            (quant_var_node, _,) = insert_quant_pass.insert_quant_op(
+                graph,
+                var_node,
+                var_name=var_node.name(),
+                scale_var_node=scale_var_node,
+            )
+            dequant_var_node = insert_quant_pass.insert_dequant_op(
+                graph, quant_var_node, scale_var_node
+            )
+            graph.update_input_link(var_node, dequant_var_node, op)
+
     def apply(self, graph):
         """
         Quantize the graph for training process. According to weight and
@@ -2664,6 +2711,12 @@ def apply(self, graph):
                         op
                     ):
                         self._transform_forward(graph, op)
+                    # support conv1d quantization
+                    if (
+                        op.name() == "conv2d"
+                        and "unsqueeze2" in op.input("Filter")[0]
+                    ):
+                        self._quant_conv1d(graph, op)
                 t.update()
         # The loop for renaming the inputs of backward op.
         for op in ops:

From 77f58bf2c89059808f29c2dda355777b655df6d1 Mon Sep 17 00:00:00 2001
From: yghstill <742925032@qq.com>
Date: Thu, 8 Dec 2022 10:24:40 +0000
Subject: [PATCH 2/4] fix conv1d quant

---
 .../slim/quantization/quantization_pass.py    | 26 +++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 338a4d461cad2..9e27e952e29be 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -2625,16 +2625,24 @@ def _has_weight(self, op):
 
     def _quant_conv1d(self, graph, op):
         conv_weight_var_name = op.input("Filter")[0]
-        scale_var_node = None
+        weight_scale_node = None
         # quant unsqueeze2
         for _op in graph.all_op_nodes():
             var_names = utils._get_op_output_var_names(_op)
             if conv_weight_var_name in var_names and self._has_weight(_op):
-                scale_var_node = self._transform_forward(graph, _op)
+                weight_scale_node = self._transform_forward(graph, _op)
         # insert qdq before conv2d
         for var_node in op.inputs:
-            quant_bits = self._weight_bits
-            quant_type = self._weight_quantize_type
+            quant_bits = (
+                self._weight_bits
+                if var_node.name() == conv_weight_var_name
+                else self._activation_bits
+            )
+            quant_type = (
+                self._weight_quantize_type
+                if var_node.name() == conv_weight_var_name
+                else self._activation_quantize_type
+            )
             quant_axis = -1
             channel_wise = False
             if quant_type == 'channel_wise_abs_max':  # Weight quantization
@@ -2651,7 +2659,15 @@ def _quant_conv1d(self, graph, op):
                 moving_rate=self._moving_rate,
                 is_test=self._is_test,
             )
-            (quant_var_node, _,) = insert_quant_pass.insert_quant_op(
+            scale_var_node = (
+                weight_scale_node
+                if var_node.name() == conv_weight_var_name
+                else None
+            )
+            (
+                quant_var_node,
+                scale_var_node,
+            ) = insert_quant_pass.insert_quant_op(
                 graph,
                 var_node,
                 var_name=var_node.name(),

From 1cfbf8e06e803bd1fc6606572cb399281e06a88f Mon Sep 17 00:00:00 2001
From: yghstill <742925032@qq.com>
Date: Fri, 9 Dec 2022 08:04:30 +0000
Subject: [PATCH 3/4] fix condition

---
 .../quantization/post_training_quantization.py    |  8 +++-----
 .../slim/quantization/quantization_pass.py        | 15 +++++++++------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
index f1f84f0b9bda3..5375f2e33b6ef 100644
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -398,7 +398,8 @@ def __init__(
         self._best_calibration_loss = {}
         # The threshold for algo = abs_max, mse or avg
         self._quantized_threshold = {}
-        # If the tensor is zero-size during calibration, it will skip quantization
+        # If the tensor is zero-size during any calibration step,
+        # it will be stored in self._zero_size_var_names
         self._zero_size_var_names = set()
         self._same_scale_tensor_list = same_scale_tensor_list
         self._freeze_model = freeze_model
@@ -467,10 +468,7 @@ def quantize(self):
 
         if self._algo == 'avg':
             for var_name in self._quantized_act_var_name:
-                if (
-                    var_name in self._zero_size_var_names
-                    and var_name not in self._quantized_var_avg
-                ):
+                if var_name not in self._quantized_var_avg:
                     continue
                 self._quantized_threshold[var_name] = np.array(
                     self._quantized_var_avg[var_name]
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 9e27e952e29be..43474a2f91243 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -2624,7 +2624,13 @@ def _has_weight(self, op):
         return has_weight
 
     def _quant_conv1d(self, graph, op):
+        # conv1d in inference is a combination of unsqueeze2 + conv2d
+        if ("conv2d" not in op.name()) or (
+            "unsqueeze2" not in op.input("Filter")[0]
+        ):
+            return
         conv_weight_var_name = op.input("Filter")[0]
+        # unsqueeze2 and conv2d will share weight scale
         weight_scale_node = None
         # quant unsqueeze2
         for _op in graph.all_op_nodes():
@@ -2645,7 +2651,7 @@ def _quant_conv1d(self, graph, op):
             )
             quant_axis = -1
             channel_wise = False
-            if quant_type == 'channel_wise_abs_max':  # Weight quantization
+            if quant_type == 'channel_wise_abs_max':
                 channel_wise = True
                 quant_axis = (
                     1 if op.name() in utils._channelwise_quant_axis1_ops else 0
@@ -2727,11 +2733,8 @@ def apply(self, graph):
                         op
                     ):
                         self._transform_forward(graph, op)
-                    # support conv1d quantization
-                    if (
-                        op.name() == "conv2d"
-                        and "unsqueeze2" in op.input("Filter")[0]
-                    ):
+                    else:  # op is not persistable
+                        # support conv1d quantization
                         self._quant_conv1d(graph, op)
                 t.update()
         # The loop for renaming the inputs of backward op.

From b6264ef5129895e6300e9ff19f3af224757e7997 Mon Sep 17 00:00:00 2001
From: yghstill <742925032@qq.com>
Date: Mon, 12 Dec 2022 05:49:23 +0000
Subject: [PATCH 4/4] fix unittest

---
 .../contrib/slim/quantization/post_training_quantization.py     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
index 5375f2e33b6ef..5ed3be2622ae5 100644
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -1367,6 +1367,7 @@ def analysis_and_save_info(op_node, out_var_name):
                     out_var_name,
                     self._quantized_var_min,
                     "out_min",
+                    argname_index,
                     "post_min_max",
                 )
                 save_info(
@@ -1374,6 +1375,7 @@ def analysis_and_save_info(op_node, out_var_name):
                     out_var_name,
                     self._quantized_var_max,
                     "out_max",
+                    argname_index,
                     "post_min_max",
                 )