PaddlePaddle · lizexu123 · Dec 19, 2023 · Dec 29, 2023 · Jan 3, 2024 · Jan 8, 2024
diff --git a/example/auto_compression/detection/configs/picodet_reader.yml b/example/auto_compression/detection/configs/picodet_reader.yml
@@ -6,13 +6,13 @@ TrainDataset:
   !COCODataSet
     image_dir: train2017
     anno_path: annotations/instances_train2017.json
-    dataset_dir: dataset/coco/
+    dataset_dir: /work/GETR-Lite-paddle-new/inference/datasets/coco/
 
 EvalDataset:
   !COCODataSet
     image_dir: val2017
     anno_path: annotations/instances_val2017.json
-    dataset_dir: dataset/coco/
+    dataset_dir: /work/GETR-Lite-paddle-new/inference/datasets/coco/
 
 eval_height: &eval_height 416
 eval_width: &eval_width 416

diff --git a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml
@@ -2,7 +2,7 @@
 Global:
   reader_config: configs/yolo_reader.yml
   arch: PPYOLOE
-  include_nms: True
+  include_nms: False
   Evaluation: True
   model_dir: ./ppyoloe_crn_l_300e_coco
   model_filename: model.pdmodel
@@ -30,5 +30,4 @@ TrainConfig:
   optimizer_builder:
     optimizer: 
       type: SGD
-    weight_decay: 4.0e-05
-
+    weight_decay: 4.0e-05
diff --git a/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml
@@ -8,27 +8,39 @@ Global:
   model_filename: model.pdmodel
   params_filename: model.pdiparams
 
-Distillation:
-  alpha: 1.0
-  loss: soft_label
+# Distillation:
+#   alpha: 1.0
+#   loss: soft_label
 
-QuantAware:
-  onnx_format: true
-  use_pact: true
-  activation_quantize_type: 'moving_average_abs_max'
-  quantize_op_types:
-  - conv2d
-  - depthwise_conv2d
+# QuantAware:
+#   onnx_format: true
+#   use_pact: true
+#   activation_quantize_type: 'moving_average_abs_max'
+#   quantize_op_types:
+#   - conv2d
+#   - depthwise_conv2d
 
-TrainConfig:
-  train_iter: 5000
-  eval_iter: 1000
-  learning_rate:  
-    type: CosineAnnealingDecay
-    learning_rate: 0.00003
-    T_max: 6000
-  optimizer_builder:
-    optimizer: 
-      type: SGD
-    weight_decay: 4.0e-05
+# TrainConfig:
+#   train_iter: 5000
+#   eval_iter: 1000
+#   learning_rate:  
+#     type: CosineAnnealingDecay
+#     learning_rate: 0.00003
+#     T_max: 6000
+#   optimizer_builder:
+#     optimizer: 
+#       type: SGD
+#     weight_decay: 4.0e-05
+QuantPost:
+    batch_size: 32
+    batch_nums: None
+    algo: 'hist'
+    hist_percent: 0.999
+    bias_correct: False
+    recon_level: None
+    regions: None
+    epochs: 20
+    lr: 0.1
+    simulate_activation_quant: False
+    skip_tensor_list: None
 
diff --git a/example/auto_compression/detection/configs/yolo_reader.yml b/example/auto_compression/detection/configs/yolo_reader.yml
@@ -6,13 +6,13 @@ TrainDataset:
   !COCODataSet
     image_dir: train2017
     anno_path: annotations/instances_train2017.json
-    dataset_dir: dataset/coco/
+    dataset_dir: /work/GETR-Lite-paddle-new/inference/datasets/coco/
 
 EvalDataset:
   !COCODataSet
     image_dir: val2017
     anno_path: annotations/instances_val2017.json
-    dataset_dir: dataset/coco/
+    dataset_dir: /work/GETR-Lite-paddle-new/inference/datasets/coco/
 
 worker_num: 0
 

diff --git a/example/auto_compression/detection/paddle_inference_eval.py b/example/auto_compression/detection/paddle_inference_eval.py
@@ -18,6 +18,7 @@
 import sys
 import cv2
 import numpy as np
+from tqdm import tqdm
 
 import paddle
 from paddle.inference import Config
@@ -82,9 +83,15 @@ def argsparser():
     parser.add_argument("--img_shape", type=int, default=640, help="input_size")
     parser.add_argument(
         '--include_nms',
-        type=bool,
-        default=True,
+        type=str,
+        default='True',
         help="Whether include nms or not.")
+    parser.add_argument(
+        "--trt_calib_mode",
+        type=bool,
+        default=False,
+        help="If the model is produced by TRT offline quantitative "
+        "calibration, trt_calib_mode need to set True.")
 
     return parser
 
@@ -208,8 +215,9 @@ def load_predictor(
         use_mkldnn=False,
         batch_size=1,
         device="CPU",
-        min_subgraph_size=3,
+        min_subgraph_size=4,
         use_dynamic_shape=False,
+        trt_calib_mode=False,
         trt_min_shape=1,
         trt_max_shape=1280,
         trt_opt_shape=640,
@@ -238,9 +246,11 @@ def load_predictor(
     config = Config(
         os.path.join(model_dir, "model.pdmodel"),
         os.path.join(model_dir, "model.pdiparams"))
+
+    config.enable_memory_optim()
     if device == "GPU":
         # initial GPU memory(M), device ID
-        config.enable_use_gpu(200, 0)
+        config.enable_use_gpu(1000, 0)
         # optimize graph and fuse op
         config.switch_ir_optim(True)
     else:
@@ -260,12 +270,12 @@ def load_predictor(
     }
     if precision in precision_map.keys() and use_trt:
         config.enable_tensorrt_engine(
-            workspace_size=(1 << 25) * batch_size,
+            workspace_size=(1 << 30) * batch_size,
             max_batch_size=batch_size,
             min_subgraph_size=min_subgraph_size,
             precision_mode=precision_map[precision],
             use_static=True,
-            use_calib_mode=False, )
+            use_calib_mode=False)
 
         if use_dynamic_shape:
             dynamic_shape_file = os.path.join(FLAGS.model_path,
@@ -297,6 +307,7 @@ def predict_image(predictor,
     img, scale_factor = image_preprocess(image_file, image_shape)
     inputs = {}
     inputs["image"] = img
+
     if FLAGS.include_nms:
         inputs['scale_factor'] = scale_factor
     input_names = predictor.get_input_names()
@@ -356,7 +367,8 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
     boxes_tensor = predictor.get_output_handle(output_names[0])
     if FLAGS.include_nms:
         boxes_num = predictor.get_output_handle(output_names[1])
-    for batch_id, data in enumerate(val_loader):
+    for batch_id, data in tqdm(
+            enumerate(val_loader), total=len(val_loader), desc='Evaluating'):
         data_all = {k: np.array(v) for k, v in data.items()}
         for i, _ in enumerate(input_names):
             input_tensor = predictor.get_input_handle(input_names[i])
@@ -382,7 +394,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
             res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
         metric.update(data_all, res)
         if batch_id % 100 == 0:
-            print("Eval iter:", batch_id)
             sys.stdout.flush()
     metric.accumulate()
     metric.log()
@@ -421,7 +432,6 @@ def main():
             repeats=repeats)
     else:
         reader_cfg = load_config(FLAGS.reader_config)
-
         dataset = reader_cfg["EvalDataset"]
         global val_loader
         val_loader = create("EvalReader")(
@@ -432,6 +442,7 @@ def main():
         anno_file = dataset.get_anno()
         metric = COCOMetric(
             anno_file=anno_file, clsid2catid=clsid2catid, IouType="bbox")
+
         eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
 
     if rerun_flag:
@@ -444,6 +455,10 @@ def main():
     paddle.enable_static()
     parser = argsparser()
     FLAGS = parser.parse_args()
+    if FLAGS.include_nms == 'True':
+        FLAGS.include_nms = True
+    else:
+        FLAGS.include_nms = False
 
     # DataLoader need run on cpu
     paddle.set_device("cpu")

diff --git a/example/auto_compression/detection/post_process.py b/example/auto_compression/detection/post_process.py
@@ -41,8 +41,7 @@ def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
         rest_boxes = boxes[indexes, :]
         iou = iou_of(
             rest_boxes,
-            np.expand_dims(
-                current_box, axis=0), )
+            np.expand_dims(current_box, axis=0), )
         indexes = indexes[iou <= iou_threshold]
 
     return box_scores[picked, :]
@@ -122,7 +121,7 @@ def _non_max_suppression(self, prediction, scale_factor):
                 picked_labels.extend([class_index] * box_probs.shape[0])
 
             if len(picked_box_probs) == 0:
-                out_boxes_list.append(np.empty((0, 4)))
+                out_boxes_list.append(np.empty((0, 6)))
 
             else:
                 picked_box_probs = np.concatenate(picked_box_probs)
@@ -135,9 +134,8 @@ def _non_max_suppression(self, prediction, scale_factor):
                 # clas score box
                 out_box = np.concatenate(
                     [
-                        np.expand_dims(
-                            np.array(picked_labels), axis=-1), np.expand_dims(
-                                picked_box_probs[:, 4], axis=-1),
+                        np.expand_dims(np.array(picked_labels), axis=-1),
+                        np.expand_dims(picked_box_probs[:, 4], axis=-1),
                         picked_box_probs[:, :4]
                     ],
                     axis=1)
@@ -152,6 +150,6 @@ def _non_max_suppression(self, prediction, scale_factor):
         return out_boxes_list, box_num_list
 
     def __call__(self, outs, scale_factor):
-        out_boxes_list, box_num_list = self._non_max_suppression(outs,
-                                                                 scale_factor)
+        out_boxes_list, box_num_list = self._non_max_suppression(
+            outs, scale_factor)
         return {'bbox': out_boxes_list, 'bbox_num': box_num_list}
diff --git a/example/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml b/example/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml
@@ -6,11 +6,20 @@ Global:
   dataset: clue
   batch_size: 16
   max_seq_length: 128
-TransformerPrune:
-  pruned_ratio: 0.25
-HyperParameterOptimization:
+
+
+# 蒸馏
 Distillation:
-QuantPost:
+  teacher_model_dir: ./afqmc
+  teacher_model_filename: inference.pdmodel
+  teacher_params_filename: inference.pdiparams
+
+# 剪枝参数
+# 剪枝参数包括剪枝算法和裁剪度
+Prune:
+  prune_algo: transformer_pruner
+  pruned_ratio: 0.25
+
 TrainConfig:
   epochs: 6
   eval_iter: 1070
@@ -20,3 +29,12 @@ TrainConfig:
       type: AdamW
     weight_decay: 0.01
   origin_metric: 0.7403
+
+
+# 离线量化
+QuantPost:
+  activation_bits: 8
+  quantize_op_types:
+  - conv2d
+  - depthwise_conv2d
+  weight_bits: 8
diff --git a/example/auto_compression/nlp/configs/uie/uie_base.yaml b/example/auto_compression/nlp/configs/uie/uie_base.yaml
@@ -2,21 +2,24 @@ Global:
   model_dir: ./UIE
   model_filename: inference.pdmodel
   params_filename: inference.pdiparams
-  batch_size: 1
-  max_seq_length: 512
-  train_data: ./data/train.txt
-  dev_data: ./data/dev.txt
-TrainConfig:
-  epochs: 200
-  eval_iter: 100
-  learning_rate: 1.0e-5
-  optimizer_builder:
-    optimizer:
-      type: AdamW
-    weight_decay: 0.01
+  task_name: afqmc
+  dataset: clue
+  batch_size: 16
+  max_seq_length: 128
 
-QuantAware:
-  onnx_format: True
-Distillation:
-  alpha: 1.0
-  loss: l2
+
+HyperParameterOptimization:
+  batch_num:
+  - 4
+  - 16
+  bias_correct:
+  - true
+  hist_percent:
+  - 0.999
+  - 0.99999
+  max_quant_count: 20
+  ptq_algo:
+  - KL
+  - hist
+  weight_quantize_type:
+  - channel_wise_abs_max
diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py
@@ -17,6 +17,8 @@
 from paddlenlp.metrics import Mcc, PearsonAndSpearman
 from paddleslim.common import load_config
 from paddleslim.auto_compression.compressor import AutoCompression
+import sys
+sys.setrecursionlimit(1500)  # 设置一个更高的限制，例如 1500
 
 
 def argsparser():