From 5d29a5bf4530d4e51419836e10a905cbf8c3337a Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Thu, 22 Dec 2022 19:11:23 +0800
Subject: [PATCH] fix unittest in post training quantization (#49257)

---
 ..._post_training_quantization_mobilenetv1.py | 387 +++++++++++-------
 1 file changed, 232 insertions(+), 155 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
index 00a4e2c2aa49e..471798dec28c5 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate):
     return img, sample[1]
 
 
-def _reader_creator(file_list,
-                    mode,
-                    shuffle=False,
-                    color_jitter=False,
-                    rotate=False,
-                    data_dir=DATA_DIR):
-
+def _reader_creator(
+    file_list,
+    mode,
+    shuffle=False,
+    color_jitter=False,
+    rotate=False,
+    data_dir=DATA_DIR,
+):
     def reader():
         with open(file_list) as flist:
             full_lines = [line.strip() for line in flist]
@@ -98,10 +99,9 @@ def reader():
                     continue
                 yield img_path, int(label)
 
-    mapper = functools.partial(process_image,
-                               mode=mode,
-                               color_jitter=color_jitter,
-                               rotate=rotate)
+    mapper = functools.partial(
+        process_image, mode=mode, color_jitter=color_jitter, rotate=rotate
+    )
 
     return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
 
@@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR):
 
 
 class TestPostTrainingQuantization(unittest.TestCase):
-
     def setUp(self):
         self.int8_download = 'int8/download'
-        self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
-                                               self.int8_download)
+        self.cache_folder = os.path.expanduser(
+            '~/.cache/paddle/dataset/' + self.int8_download
+        )
         self.data_cache_folder = ''
         data_urls = []
         data_md5s = []
@@ -129,31 +129,34 @@ def setUp(self):
                 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
             )
             data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
-            self.data_cache_folder = self.download_data(data_urls, data_md5s,
-                                                        "full_data", False)
+            self.data_cache_folder = self.download_data(
+                data_urls, data_md5s, "full_data", False
+            )
         else:
             data_urls.append(
                 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
             )
             data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d')
-            self.data_cache_folder = self.download_data(data_urls, data_md5s,
-                                                        "small_data", False)
+            self.data_cache_folder = self.download_data(
+                data_urls, data_md5s, "small_data", False
+            )
 
         # reader/decorator.py requires the relative path to the data folder
         if not os.path.exists("./data/ILSVRC2012"):
-            cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data",
-                                                       self.data_cache_folder)
+            cmd = 'rm -rf {0} && ln -s {1} {0}'.format(
+                "data", self.data_cache_folder
+            )
             os.system(cmd)
 
         self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50
-        self.sample_iterations = 50 if os.environ.get(
-            'DATASET') == 'full' else 2
-        self.infer_iterations = 50000 if os.environ.get(
-            'DATASET') == 'full' else 2
+        self.infer_iterations = (
+            50000 if os.environ.get('DATASET') == 'full' else 2
+        )
 
         self.root_path = tempfile.TemporaryDirectory()
-        self.int8_model = os.path.join(self.root_path.name,
-                                       "post_training_quantization")
+        self.int8_model = os.path.join(
+            self.root_path.name, "post_training_quantization"
+        )
 
     def tearDown(self):
         self.root_path.cleanup()
@@ -161,7 +164,8 @@ def tearDown(self):
     def cache_unzipping(self, target_folder, zip_path):
         if not os.path.exists(target_folder):
             cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(
-                target_folder, zip_path)
+                target_folder, zip_path
+            )
             os.system(cmd)
 
     def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
@@ -173,13 +177,15 @@ def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
                 download(data_urls[i], self.int8_download, data_md5s[i])
                 file_names.append(data_urls[i].split('/')[-1])
 
-            zip_path = os.path.join(self.cache_folder,
-                                    'full_imagenet_val.tar.gz')
+            zip_path = os.path.join(
+                self.cache_folder, 'full_imagenet_val.tar.gz'
+            )
             if not os.path.exists(zip_path):
                 cat_command = 'cat'
                 for file_name in file_names:
-                    cat_command += ' ' + os.path.join(self.cache_folder,
-                                                      file_name)
+                    cat_command += ' ' + os.path.join(
+                        self.cache_folder, file_name
+                    )
                 cat_command += ' > ' + zip_path
                 os.system(cat_command)
 
@@ -199,8 +205,16 @@ def run_program(self, model_path, batch_size, infer_iterations):
         image_shape = [3, 224, 224]
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
-        [infer_program, feed_dict, fetch_targets] = \
-            fluid.io.load_inference_model(model_path, exe)
+        [
+            infer_program,
+            feed_dict,
+            fetch_targets,
+        ] = fluid.io.load_inference_model(
+            model_path,
+            exe,
+            model_filename="inference.pdmodel",
+            params_filename="inference.pdiparams",
+        )
         val_reader = paddle.batch(val(), batch_size)
         iterations = infer_iterations
 
@@ -208,23 +222,28 @@ def run_program(self, model_path, batch_size, infer_iterations):
         cnt = 0
         periods = []
         for batch_id, data in enumerate(val_reader()):
-            image = np.array([x[0].reshape(image_shape)
-                              for x in data]).astype("float32")
+            image = np.array([x[0].reshape(image_shape) for x in data]).astype(
+                "float32"
+            )
             label = np.array([x[1] for x in data]).astype("int64")
             label = label.reshape([-1, 1])
 
             t1 = time.time()
-            _, acc1, _ = exe.run(infer_program,
-                                 feed={
-                                     feed_dict[0]: image,
-                                     feed_dict[1]: label
-                                 },
-                                 fetch_list=fetch_targets)
+            pred = exe.run(
+                infer_program,
+                feed={feed_dict[0]: image},
+                fetch_list=fetch_targets,
+            )
             t2 = time.time()
             period = t2 - t1
             periods.append(period)
 
-            test_info.append(np.mean(acc1) * len(data))
+            pred = np.array(pred[0])
+            sort_array = pred.argsort(axis=1)
+            top_1_pred = sort_array[:, -1:][:, ::-1]
+            top_1 = np.mean(label == top_1_pred)
+
+            test_info.append(np.mean(top_1) * len(data))
             cnt += len(data)
 
             if (batch_id + 1) % 100 == 0:
@@ -238,22 +257,25 @@ def run_program(self, model_path, batch_size, infer_iterations):
         acc1 = np.sum(test_info) / cnt
         return (throughput, latency, acc1)
 
-    def generate_quantized_model(self,
-                                 model_path,
-                                 quantizable_op_type,
-                                 batch_size,
-                                 algo="KL",
-                                 round_type="round",
-                                 is_full_quantize=False,
-                                 is_use_cache_file=False,
-                                 is_optimize_model=False,
-                                 batch_nums=10,
-                                 onnx_format=False):
+    def generate_quantized_model(
+        self,
+        model_path,
+        quantizable_op_type,
+        batch_size,
+        algo="KL",
+        round_type="round",
+        is_full_quantize=False,
+        is_use_cache_file=False,
+        is_optimize_model=False,
+        batch_nums=10,
+        onnx_format=False,
+    ):
         try:
             os.system("mkdir " + self.int8_model)
         except Exception as e:
-            print("Failed to create {} due to {}".format(
-                self.int8_model, str(e)))
+            print(
+                "Failed to create {} due to {}".format(self.int8_model, str(e))
+            )
             sys.exit(-1)
 
         place = fluid.CPUPlace()
@@ -261,70 +283,98 @@ def generate_quantized_model(self,
         scope = fluid.global_scope()
         val_reader = val()
 
-        ptq = PostTrainingQuantization(executor=exe,
-                                       sample_generator=val_reader,
-                                       model_dir=model_path,
-                                       batch_size=batch_size,
-                                       batch_nums=batch_nums,
-                                       algo=algo,
-                                       quantizable_op_type=quantizable_op_type,
-                                       round_type=round_type,
-                                       is_full_quantize=is_full_quantize,
-                                       optimize_model=is_optimize_model,
-                                       onnx_format=onnx_format,
-                                       is_use_cache_file=is_use_cache_file)
+        ptq = PostTrainingQuantization(
+            executor=exe,
+            sample_generator=val_reader,
+            model_dir=model_path,
+            model_filename="inference.pdmodel",
+            params_filename="inference.pdiparams",
+            batch_size=batch_size,
+            batch_nums=batch_nums,
+            algo=algo,
+            quantizable_op_type=quantizable_op_type,
+            round_type=round_type,
+            is_full_quantize=is_full_quantize,
+            optimize_model=is_optimize_model,
+            onnx_format=onnx_format,
+            is_use_cache_file=is_use_cache_file,
+        )
         ptq.quantize()
-        ptq.save_quantized_model(self.int8_model)
-
-    def run_test(self,
-                 model,
-                 algo,
-                 round_type,
-                 data_urls,
-                 data_md5s,
-                 quantizable_op_type,
-                 is_full_quantize,
-                 is_use_cache_file,
-                 is_optimize_model,
-                 diff_threshold,
-                 onnx_format=False,
-                 batch_nums=10):
+        ptq.save_quantized_model(
+            self.int8_model,
+            model_filename="inference.pdmodel",
+            params_filename="inference.pdiparams",
+        )
+
+    def run_test(
+        self,
+        model,
+        algo,
+        round_type,
+        data_urls,
+        data_md5s,
+        quantizable_op_type,
+        is_full_quantize,
+        is_use_cache_file,
+        is_optimize_model,
+        diff_threshold,
+        onnx_format=False,
+        batch_nums=10,
+    ):
         infer_iterations = self.infer_iterations
         batch_size = self.batch_size
-        sample_iterations = self.sample_iterations
 
         model_cache_folder = self.download_data(data_urls, data_md5s, model)
 
-        print("Start FP32 inference for {0} on {1} images ...".format(
-            model, infer_iterations * batch_size))
+        print(
+            "Start FP32 inference for {0} on {1} images ...".format(
+                model, infer_iterations * batch_size
+            )
+        )
         (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program(
-            os.path.join(model_cache_folder, "model"), batch_size,
-            infer_iterations)
-
-        print("Start INT8 post training quantization for {0} on {1} images ...".
-              format(model, sample_iterations * batch_size))
-        self.generate_quantized_model(os.path.join(model_cache_folder, "model"),
-                                      quantizable_op_type, batch_size,
-                                      sample_iterations, algo, round_type,
-                                      is_full_quantize, is_use_cache_file,
-                                      is_optimize_model, batch_nums,
-                                      onnx_format)
-
-        print("Start INT8 inference for {0} on {1} images ...".format(
-            model, infer_iterations * batch_size))
-        (int8_throughput, int8_latency,
-         int8_acc1) = self.run_program(self.int8_model, batch_size,
-                                       infer_iterations)
+            os.path.join(model_cache_folder, "MobileNetV1_infer"),
+            batch_size,
+            infer_iterations,
+        )
+
+        print(
+            "Start INT8 post training quantization for {0} on {1} images ...".format(
+                model, batch_nums * batch_size
+            )
+        )
+        self.generate_quantized_model(
+            os.path.join(model_cache_folder, "MobileNetV1_infer"),
+            quantizable_op_type,
+            batch_size,
+            algo,
+            round_type,
+            is_full_quantize,
+            is_use_cache_file,
+            is_optimize_model,
+            batch_nums,
+            onnx_format,
+        )
+
+        print(
+            "Start INT8 inference for {0} on {1} images ...".format(
+                model, infer_iterations * batch_size
+            )
+        )
+        (int8_throughput, int8_latency, int8_acc1) = self.run_program(
+            self.int8_model, batch_size, infer_iterations
+        )
 
         print("---Post training quantization of {} method---".format(algo))
         print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}."
-            .format(model, batch_size, fp32_throughput, fp32_latency,
-                    fp32_acc1))
+            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format(
+                model, batch_size, fp32_throughput, fp32_latency, fp32_acc1
+            )
+        )
         print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n"
-            .format(model, batch_size, int8_throughput, int8_latency,
-                    int8_acc1))
+            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format(
+                model, batch_size, int8_throughput, int8_latency, int8_acc1
+            )
+        )
         sys.stdout.flush()
 
         delta_value = fp32_acc1 - int8_acc1
@@ -332,15 +382,14 @@ def run_test(self,
 
 
 class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
-
     def test_post_training_kl_mobilenetv1(self):
         model = "MobileNet-V1"
         algo = "KL"
         round_type = "round"
         data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+            'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
         ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
         quantizable_op_type = [
             "conv2d",
             "depthwise_conv2d",
@@ -351,21 +400,30 @@ def test_post_training_kl_mobilenetv1(self):
         is_use_cache_file = False
         is_optimize_model = True
         diff_threshold = 0.025
-        self.run_test(model, algo, round_type, data_urls, data_md5s,
-                      quantizable_op_type, is_full_quantize, is_use_cache_file,
-                      is_optimize_model, diff_threshold)
+        batch_nums = 3
+        self.run_test(
+            model,
+            algo,
+            round_type,
+            data_urls,
+            data_md5s,
+            quantizable_op_type,
+            is_full_quantize,
+            is_use_cache_file,
+            is_optimize_model,
+            diff_threshold,
+        )
 
 
 class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
-
     def test_post_training_avg_mobilenetv1(self):
         model = "MobileNet-V1"
         algo = "avg"
         round_type = "round"
         data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+            'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
         ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
         quantizable_op_type = [
             "conv2d",
             "depthwise_conv2d",
@@ -375,21 +433,29 @@ def test_post_training_avg_mobilenetv1(self):
         is_use_cache_file = False
         is_optimize_model = True
         diff_threshold = 0.025
-        self.run_test(model, algo, round_type, data_urls, data_md5s,
-                      quantizable_op_type, is_full_quantize, is_use_cache_file,
-                      is_optimize_model, diff_threshold)
+        self.run_test(
+            model,
+            algo,
+            round_type,
+            data_urls,
+            data_md5s,
+            quantizable_op_type,
+            is_full_quantize,
+            is_use_cache_file,
+            is_optimize_model,
+            diff_threshold,
+        )
 
 
 class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
-
     def test_post_training_hist_mobilenetv1(self):
         model = "MobileNet-V1"
         algo = "hist"
         round_type = "round"
         data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+            'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
         ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
         quantizable_op_type = [
             "conv2d",
             "depthwise_conv2d",
@@ -400,29 +466,30 @@ def test_post_training_hist_mobilenetv1(self):
         is_optimize_model = True
         diff_threshold = 0.03
         batch_nums = 3
-        self.run_test(model,
-                      algo,
-                      round_type,
-                      data_urls,
-                      data_md5s,
-                      quantizable_op_type,
-                      is_full_quantize,
-                      is_use_cache_file,
-                      is_optimize_model,
-                      diff_threshold,
-                      batch_nums=batch_nums)
+        self.run_test(
+            model,
+            algo,
+            round_type,
+            data_urls,
+            data_md5s,
+            quantizable_op_type,
+            is_full_quantize,
+            is_use_cache_file,
+            is_optimize_model,
+            diff_threshold,
+            batch_nums=batch_nums,
+        )
 
 
 class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
-
     def test_post_training_abs_max_mobilenetv1(self):
         model = "MobileNet-V1"
         algo = "abs_max"
         round_type = "round"
         data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+            'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
         ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
         quantizable_op_type = [
             "conv2d",
             "mul",
@@ -432,21 +499,29 @@ def test_post_training_abs_max_mobilenetv1(self):
         is_optimize_model = False
         # The accuracy diff of post-training quantization (abs_max) maybe bigger
         diff_threshold = 0.05
-        self.run_test(model, algo, round_type, data_urls, data_md5s,
-                      quantizable_op_type, is_full_quantize, is_use_cache_file,
-                      is_optimize_model, diff_threshold)
+        self.run_test(
+            model,
+            algo,
+            round_type,
+            data_urls,
+            data_md5s,
+            quantizable_op_type,
+            is_full_quantize,
+            is_use_cache_file,
+            is_optimize_model,
+            diff_threshold,
+        )
 
 
 class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
-
     def test_post_training_onnx_format_mobilenetv1(self):
         model = "MobileNet-V1"
         algo = "emd"
         round_type = "round"
         data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+            'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
         ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        data_md5s = ['5ee2b1775b11dc233079236cdc216c2e']
         quantizable_op_type = [
             "conv2d",
             "depthwise_conv2d",
@@ -458,18 +533,20 @@ def test_post_training_onnx_format_mobilenetv1(self):
         onnx_format = True
         diff_threshold = 0.05
         batch_nums = 3
-        self.run_test(model,
-                      algo,
-                      round_type,
-                      data_urls,
-                      data_md5s,
-                      quantizable_op_type,
-                      is_full_quantize,
-                      is_use_cache_file,
-                      is_optimize_model,
-                      diff_threshold,
-                      onnx_format=onnx_format,
-                      batch_nums=batch_nums)
+        self.run_test(
+            model,
+            algo,
+            round_type,
+            data_urls,
+            data_md5s,
+            quantizable_op_type,
+            is_full_quantize,
+            is_use_cache_file,
+            is_optimize_model,
+            diff_threshold,
+            onnx_format=onnx_format,
+            batch_nums=batch_nums,
+        )
 
 
 if __name__ == '__main__':