From 5d29a5bf4530d4e51419836e10a905cbf8c3337a Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Thu, 22 Dec 2022 19:11:23 +0800 Subject: [PATCH] fix unittest in post training quantization (#49257) --- ..._post_training_quantization_mobilenetv1.py | 387 +++++++++++------- 1 file changed, 232 insertions(+), 155 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 00a4e2c2aa49e..471798dec28c5 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -77,13 +77,14 @@ def process_image(sample, mode, color_jitter, rotate): return img, sample[1] -def _reader_creator(file_list, - mode, - shuffle=False, - color_jitter=False, - rotate=False, - data_dir=DATA_DIR): - +def _reader_creator( + file_list, + mode, + shuffle=False, + color_jitter=False, + rotate=False, + data_dir=DATA_DIR, +): def reader(): with open(file_list) as flist: full_lines = [line.strip() for line in flist] @@ -98,10 +99,9 @@ def reader(): continue yield img_path, int(label) - mapper = functools.partial(process_image, - mode=mode, - color_jitter=color_jitter, - rotate=rotate) + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate + ) return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) @@ -112,11 +112,11 @@ def val(data_dir=DATA_DIR): class TestPostTrainingQuantization(unittest.TestCase): - def setUp(self): self.int8_download = 'int8/download' - self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + - self.int8_download) + self.cache_folder = os.path.expanduser( + '~/.cache/paddle/dataset/' + self.int8_download + ) self.data_cache_folder = '' data_urls = [] data_md5s = [] @@ -129,31 +129,34 @@ def setUp(self): 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' ) data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') - self.data_cache_folder = self.download_data(data_urls, data_md5s, - "full_data", False) + self.data_cache_folder = self.download_data( + data_urls, data_md5s, "full_data", False + ) else: data_urls.append( 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz' ) data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d') - self.data_cache_folder = self.download_data(data_urls, data_md5s, - "small_data", False) + self.data_cache_folder = self.download_data( + data_urls, data_md5s, "small_data", False + ) # reader/decorator.py requires the relative path to the data folder if not os.path.exists("./data/ILSVRC2012"): - cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data", - self.data_cache_folder) + cmd = 'rm -rf {0} && ln -s {1} {0}'.format( + "data", self.data_cache_folder + ) os.system(cmd) self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50 - self.sample_iterations = 50 if os.environ.get( - 'DATASET') == 'full' else 2 - self.infer_iterations = 50000 if os.environ.get( - 'DATASET') == 'full' else 2 + self.infer_iterations = ( + 50000 if os.environ.get('DATASET') == 'full' else 2 + ) self.root_path = tempfile.TemporaryDirectory() - self.int8_model = os.path.join(self.root_path.name, - "post_training_quantization") + self.int8_model = os.path.join( + self.root_path.name, "post_training_quantization" + ) def tearDown(self): self.root_path.cleanup() @@ -161,7 +164,8 @@ def tearDown(self): def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( - target_folder, zip_path) + target_folder, zip_path + ) os.system(cmd) def download_data(self, data_urls, data_md5s, folder_name, is_model=True): @@ -173,13 +177,15 @@ def download_data(self, data_urls, data_md5s, folder_name, is_model=True): download(data_urls[i], self.int8_download, data_md5s[i]) file_names.append(data_urls[i].split('/')[-1]) - zip_path = os.path.join(self.cache_folder, - 'full_imagenet_val.tar.gz') + zip_path = os.path.join( + self.cache_folder, 'full_imagenet_val.tar.gz' + ) if not os.path.exists(zip_path): cat_command = 'cat' for file_name in file_names: - cat_command += ' ' + os.path.join(self.cache_folder, - file_name) + cat_command += ' ' + os.path.join( + self.cache_folder, file_name + ) cat_command += ' > ' + zip_path os.system(cat_command) @@ -199,8 +205,16 @@ def run_program(self, model_path, batch_size, infer_iterations): image_shape = [3, 224, 224] place = fluid.CPUPlace() exe = fluid.Executor(place) - [infer_program, feed_dict, fetch_targets] = \ - fluid.io.load_inference_model(model_path, exe) + [ + infer_program, + feed_dict, + fetch_targets, + ] = fluid.io.load_inference_model( + model_path, + exe, + model_filename="inference.pdmodel", + params_filename="inference.pdiparams", + ) val_reader = paddle.batch(val(), batch_size) iterations = infer_iterations @@ -208,23 +222,28 @@ def run_program(self, model_path, batch_size, infer_iterations): cnt = 0 periods = [] for batch_id, data in enumerate(val_reader()): - image = np.array([x[0].reshape(image_shape) - for x in data]).astype("float32") + image = np.array([x[0].reshape(image_shape) for x in data]).astype( + "float32" + ) label = np.array([x[1] for x in data]).astype("int64") label = label.reshape([-1, 1]) t1 = time.time() - _, acc1, _ = exe.run(infer_program, - feed={ - feed_dict[0]: image, - feed_dict[1]: label - }, - fetch_list=fetch_targets) + pred = exe.run( + infer_program, + feed={feed_dict[0]: image}, + fetch_list=fetch_targets, + ) t2 = time.time() period = t2 - t1 periods.append(period) - test_info.append(np.mean(acc1) * len(data)) + pred = np.array(pred[0]) + sort_array = pred.argsort(axis=1) + top_1_pred = sort_array[:, -1:][:, ::-1] + top_1 = np.mean(label == top_1_pred) + + test_info.append(np.mean(top_1) * len(data)) cnt += len(data) if (batch_id + 1) % 100 == 0: @@ -238,22 +257,25 @@ def run_program(self, model_path, batch_size, infer_iterations): acc1 = np.sum(test_info) / cnt return (throughput, latency, acc1) - def generate_quantized_model(self, - model_path, - quantizable_op_type, - batch_size, - algo="KL", - round_type="round", - is_full_quantize=False, - is_use_cache_file=False, - is_optimize_model=False, - batch_nums=10, - onnx_format=False): + def generate_quantized_model( + self, + model_path, + quantizable_op_type, + batch_size, + algo="KL", + round_type="round", + is_full_quantize=False, + is_use_cache_file=False, + is_optimize_model=False, + batch_nums=10, + onnx_format=False, + ): try: os.system("mkdir " + self.int8_model) except Exception as e: - print("Failed to create {} due to {}".format( - self.int8_model, str(e))) + print( + "Failed to create {} due to {}".format(self.int8_model, str(e)) + ) sys.exit(-1) place = fluid.CPUPlace() @@ -261,70 +283,98 @@ def generate_quantized_model(self, scope = fluid.global_scope() val_reader = val() - ptq = PostTrainingQuantization(executor=exe, - sample_generator=val_reader, - model_dir=model_path, - batch_size=batch_size, - batch_nums=batch_nums, - algo=algo, - quantizable_op_type=quantizable_op_type, - round_type=round_type, - is_full_quantize=is_full_quantize, - optimize_model=is_optimize_model, - onnx_format=onnx_format, - is_use_cache_file=is_use_cache_file) + ptq = PostTrainingQuantization( + executor=exe, + sample_generator=val_reader, + model_dir=model_path, + model_filename="inference.pdmodel", + params_filename="inference.pdiparams", + batch_size=batch_size, + batch_nums=batch_nums, + algo=algo, + quantizable_op_type=quantizable_op_type, + round_type=round_type, + is_full_quantize=is_full_quantize, + optimize_model=is_optimize_model, + onnx_format=onnx_format, + is_use_cache_file=is_use_cache_file, + ) ptq.quantize() - ptq.save_quantized_model(self.int8_model) - - def run_test(self, - model, - algo, - round_type, - data_urls, - data_md5s, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - onnx_format=False, - batch_nums=10): + ptq.save_quantized_model( + self.int8_model, + model_filename="inference.pdmodel", + params_filename="inference.pdiparams", + ) + + def run_test( + self, + model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + onnx_format=False, + batch_nums=10, + ): infer_iterations = self.infer_iterations batch_size = self.batch_size - sample_iterations = self.sample_iterations model_cache_folder = self.download_data(data_urls, data_md5s, model) - print("Start FP32 inference for {0} on {1} images ...".format( - model, infer_iterations * batch_size)) + print( + "Start FP32 inference for {0} on {1} images ...".format( + model, infer_iterations * batch_size + ) + ) (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( - os.path.join(model_cache_folder, "model"), batch_size, - infer_iterations) - - print("Start INT8 post training quantization for {0} on {1} images ...". - format(model, sample_iterations * batch_size)) - self.generate_quantized_model(os.path.join(model_cache_folder, "model"), - quantizable_op_type, batch_size, - sample_iterations, algo, round_type, - is_full_quantize, is_use_cache_file, - is_optimize_model, batch_nums, - onnx_format) - - print("Start INT8 inference for {0} on {1} images ...".format( - model, infer_iterations * batch_size)) - (int8_throughput, int8_latency, - int8_acc1) = self.run_program(self.int8_model, batch_size, - infer_iterations) + os.path.join(model_cache_folder, "MobileNetV1_infer"), + batch_size, + infer_iterations, + ) + + print( + "Start INT8 post training quantization for {0} on {1} images ...".format( + model, batch_nums * batch_size + ) + ) + self.generate_quantized_model( + os.path.join(model_cache_folder, "MobileNetV1_infer"), + quantizable_op_type, + batch_size, + algo, + round_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + batch_nums, + onnx_format, + ) + + print( + "Start INT8 inference for {0} on {1} images ...".format( + model, infer_iterations * batch_size + ) + ) + (int8_throughput, int8_latency, int8_acc1) = self.run_program( + self.int8_model, batch_size, infer_iterations + ) print("---Post training quantization of {} method---".format(algo)) print( - "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}." - .format(model, batch_size, fp32_throughput, fp32_latency, - fp32_acc1)) + "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format( + model, batch_size, fp32_throughput, fp32_latency, fp32_acc1 + ) + ) print( - "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n" - .format(model, batch_size, int8_throughput, int8_latency, - int8_acc1)) + "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format( + model, batch_size, int8_throughput, int8_latency, int8_acc1 + ) + ) sys.stdout.flush() delta_value = fp32_acc1 - int8_acc1 @@ -332,15 +382,14 @@ def run_test(self, class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): - def test_post_training_kl_mobilenetv1(self): model = "MobileNet-V1" algo = "KL" round_type = "round" data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + data_md5s = ['5ee2b1775b11dc233079236cdc216c2e'] quantizable_op_type = [ "conv2d", "depthwise_conv2d", @@ -351,21 +400,30 @@ def test_post_training_kl_mobilenetv1(self): is_use_cache_file = False is_optimize_model = True diff_threshold = 0.025 - self.run_test(model, algo, round_type, data_urls, data_md5s, - quantizable_op_type, is_full_quantize, is_use_cache_file, - is_optimize_model, diff_threshold) + batch_nums = 3 + self.run_test( + model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + ) class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): - def test_post_training_avg_mobilenetv1(self): model = "MobileNet-V1" algo = "avg" round_type = "round" data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + data_md5s = ['5ee2b1775b11dc233079236cdc216c2e'] quantizable_op_type = [ "conv2d", "depthwise_conv2d", @@ -375,21 +433,29 @@ def test_post_training_avg_mobilenetv1(self): is_use_cache_file = False is_optimize_model = True diff_threshold = 0.025 - self.run_test(model, algo, round_type, data_urls, data_md5s, - quantizable_op_type, is_full_quantize, is_use_cache_file, - is_optimize_model, diff_threshold) + self.run_test( + model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + ) class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): - def test_post_training_hist_mobilenetv1(self): model = "MobileNet-V1" algo = "hist" round_type = "round" data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + data_md5s = ['5ee2b1775b11dc233079236cdc216c2e'] quantizable_op_type = [ "conv2d", "depthwise_conv2d", @@ -400,29 +466,30 @@ def test_post_training_hist_mobilenetv1(self): is_optimize_model = True diff_threshold = 0.03 batch_nums = 3 - self.run_test(model, - algo, - round_type, - data_urls, - data_md5s, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - batch_nums=batch_nums) + self.run_test( + model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + batch_nums=batch_nums, + ) class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): - def test_post_training_abs_max_mobilenetv1(self): model = "MobileNet-V1" algo = "abs_max" round_type = "round" data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + data_md5s = ['5ee2b1775b11dc233079236cdc216c2e'] quantizable_op_type = [ "conv2d", "mul", @@ -432,21 +499,29 @@ def test_post_training_abs_max_mobilenetv1(self): is_optimize_model = False # The accuracy diff of post-training quantization (abs_max) maybe bigger diff_threshold = 0.05 - self.run_test(model, algo, round_type, data_urls, data_md5s, - quantizable_op_type, is_full_quantize, is_use_cache_file, - is_optimize_model, diff_threshold) + self.run_test( + model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + ) class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): - def test_post_training_onnx_format_mobilenetv1(self): model = "MobileNet-V1" algo = "emd" round_type = "round" data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + data_md5s = ['5ee2b1775b11dc233079236cdc216c2e'] quantizable_op_type = [ "conv2d", "depthwise_conv2d", @@ -458,18 +533,20 @@ def test_post_training_onnx_format_mobilenetv1(self): onnx_format = True diff_threshold = 0.05 batch_nums = 3 - self.run_test(model, - algo, - round_type, - data_urls, - data_md5s, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - onnx_format=onnx_format, - batch_nums=batch_nums) + self.run_test( + model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + onnx_format=onnx_format, + batch_nums=batch_nums, + ) if __name__ == '__main__':