From 9795a5287d4e2d8dab2ec434c0a8d4e9ea09901f Mon Sep 17 00:00:00 2001
From: Jack Zhou <zhoushunjie@baidu.com>
Date: Thu, 29 Dec 2022 15:50:45 +0800
Subject: [PATCH 01/20] [Backend] Update paddle inference version (#990)

2.4-dev3 -> 2.4-dev4
---
 cmake/paddle_inference.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake
index 3ab45454a51..3822f9ac3ab 100644
--- a/cmake/paddle_inference.cmake
+++ b/cmake/paddle_inference.cmake
@@ -62,7 +62,7 @@ if(PADDLEINFERENCE_DIRECTORY)
   execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference)
 else()
   set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
-  set(PADDLEINFERENCE_VERSION "2.4-dev3")
+  set(PADDLEINFERENCE_VERSION "2.4-dev4")
   if(WIN32)
     if (WITH_GPU)
       set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip")

From 3017ec487c73d31a71fca591c2ccb9c5598dac4c Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Fri, 4 Nov 2022 02:46:39 +0000
Subject: [PATCH 02/20] Add uie benchmark

---
 benchmark/benchmark_uie.py     | 192 +++++++++++++++++++++++++++++++++
 benchmark/run_benchmark_uie.sh |  24 +++++
 2 files changed, 216 insertions(+)
 create mode 100644 benchmark/benchmark_uie.py
 create mode 100644 benchmark/run_benchmark_uie.sh

diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py
new file mode 100644
index 00000000000..d7f74a048e7
--- /dev/null
+++ b/benchmark/benchmark_uie.py
@@ -0,0 +1,192 @@
+import numpy as np
+import os
+import time
+import distutils.util
+import sys
+import json
+
+from paddlenlp.utils.log import logger
+import fastdeploy as fd
+from fastdeploy.text import UIEModel, SchemaLanguage
+import pynvml
+import psutil
+import GPUtil
+import multiprocessing
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_dir",
+        required=True,
+        help="The directory of model and tokenizer.")
+    parser.add_argument(
+        "--data_path", required=True, help="The path of uie data.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        choices=['gpu', 'cpu'],
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--backend",
+        type=str,
+        default='pp',
+        choices=['ort', 'pp', 'trt', 'pp-trt', 'openvino'],
+        help="The inference runtime backend.")
+    parser.add_argument(
+        "--device_id", type=int, default=0, help="device(gpu) id")
+    parser.add_argument(
+        "--batch_size", type=int, default=1, help="The batch size of data.")
+    parser.add_argument(
+        "--max_length",
+        type=int,
+        default=128,
+        help="The max length of sequence.")
+    parser.add_argument(
+        "--log_interval",
+        type=int,
+        default=10,
+        help="The interval of logging.")
+    parser.add_argument(
+        "--cpu_num_threads",
+        type=int,
+        default=1,
+        help="The number of threads when inferring on cpu.")
+    parser.add_argument(
+        "--use_fp16",
+        type=distutils.util.strtobool,
+        default=False,
+        help="Use FP16 mode")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    if args.device == 'cpu':
+        option.use_cpu()
+        option.set_cpu_thread_num(args.cpu_num_threads)
+    else:
+        option.use_gpu(args.device_id)
+    if args.backend == 'pp':
+        option.use_paddle_backend()
+    elif args.backend == 'ort':
+        option.use_ort_backend()
+    elif args.backend == 'openvino':
+        option.use_openvino_backend()
+    else:
+        option.use_trt_backend()
+        if args.backend == 'pp-trt':
+            option.enable_paddle_to_trt()
+            option.enable_paddle_trt_collect_shape()
+        trt_file = os.path.join(args.model_dir, "infer.trt")
+        option.set_trt_input_shape(
+            'input_ids',
+            min_shape=[1, args.max_length],
+            opt_shape=[args.batch_size, args.max_length],
+            max_shape=[args.batch_size, args.max_length])
+        option.set_trt_input_shape(
+            'token_type_ids',
+            min_shape=[1, args.max_length],
+            opt_shape=[args.batch_size, args.max_length],
+            max_shape=[args.batch_size, args.max_length])
+        option.set_trt_input_shape(
+            'pos_ids',
+            min_shape=[1, args.max_length],
+            opt_shape=[args.batch_size, args.max_length],
+            max_shape=[args.batch_size, args.max_length])
+        option.set_trt_input_shape(
+            'att_mask',
+            min_shape=[1, args.max_length],
+            opt_shape=[args.batch_size, args.max_length],
+            max_shape=[args.batch_size, args.max_length])
+        if args.use_fp16:
+            option.enable_trt_fp16()
+            trt_file = trt_file + ".fp16"
+        option.set_trt_cache_file(trt_file)
+    return option
+
+
+def get_current_memory_mb(gpu_id=None):
+    pid = os.getpid()
+    p = psutil.Process(pid)
+    info = p.memory_full_info()
+    cpu_mem = info.uss / 1024. / 1024.
+    gpu_mem = 0
+    if gpu_id is not None:
+        pynvml.nvmlInit()
+        handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
+        meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        gpu_mem = meminfo.used / 1024. / 1024.
+    return cpu_mem, gpu_mem
+
+
+def get_current_gputil(gpu_id):
+    GPUs = GPUtil.getGPUs()
+    gpu_load = GPUs[gpu_id].load
+    return gpu_load
+
+
+def sample_gpuutil(gpu_id, gpu_utilization=[]):
+    while True:
+        gpu_utilization.append(get_current_gputil(gpu_id))
+        time.sleep(0.01)
+
+
+def get_dataset(data_path, max_seq_len=512):
+    json_lines = []
+    with open(data_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            json_line = json.loads(line)
+            content = json_line['content'].strip()
+            prompt = json_line['prompt']
+            # Model Input is aslike: [CLS] Prompt [SEP] Content [SEP]
+            # It include three summary tokens.
+            if max_seq_len <= len(prompt) + 3:
+                raise ValueError(
+                    "The value of max_seq_len is too small, please set a larger value"
+                )
+            json_lines.append(json_line)
+
+    return json_lines
+
+
+def run_inference(ds, uie):
+    for i, sample in enumerate(ds):
+        uie.set_schema([sample['prompt']])
+        result = uie.predict([sample['content']])
+        if (i + 1) % args.log_interval == 0:
+            runtime_statis = uie.print_statis_info_of_runtime()
+            print(f"Step {i + 1}:")
+            print(runtime_statis)
+            print()
+
+    runtime_statis = uie.print_statis_info_of_runtime()
+    print(f"Final:")
+    print(runtime_statis)
+    print()
+
+
+if __name__ == '__main__':
+    args = parse_arguments()
+    runtime_option = build_option(args)
+    model_path = os.path.join(args.model_dir, "inference.pdmodel")
+    param_path = os.path.join(args.model_dir, "inference.pdiparams")
+    vocab_path = os.path.join(args.model_dir, "vocab.txt")
+
+    ds = get_dataset(args.data_path)
+    schema = ["时间"]
+    uie = UIEModel(
+        model_path,
+        param_path,
+        vocab_path,
+        position_prob=0.5,
+        max_length=args.max_length,
+        schema=schema,
+        runtime_option=runtime_option,
+        schema_language=SchemaLanguage.ZH)
+
+    uie.enable_record_time_of_runtime()
+    run_inference(ds, uie)
diff --git a/benchmark/run_benchmark_uie.sh b/benchmark/run_benchmark_uie.sh
new file mode 100644
index 00000000000..8f0c03ee3e4
--- /dev/null
+++ b/benchmark/run_benchmark_uie.sh
@@ -0,0 +1,24 @@
+# wget https://bj.bcebos.com/fastdeploy/benchmark/uie/reimbursement_form_data.txt
+# wget https://bj.bcebos.com/fastdeploy/models/uie/uie-base.tgz
+
+# GPU
+## FP32 Model
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device gpu
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device gpu
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device gpu
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device gpu
+
+## INT8 Model
+python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend pp-trt --device gpu
+python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend trt --device gpu
+
+# CPU
+## FP32 Model
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device cpu
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu
+python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend openvino --device cpu
+
+## INT8 Model
+
+python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend pp --device cpu
+python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend ort --device cpu

From 2ac94e91beca5ed974c9a41189dffb619267a7ba Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Fri, 4 Nov 2022 03:47:47 +0000
Subject: [PATCH 03/20] fix trt dy shape

---
 benchmark/benchmark_uie.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py
index d7f74a048e7..a97bb026fcb 100644
--- a/benchmark/benchmark_uie.py
+++ b/benchmark/benchmark_uie.py
@@ -84,23 +84,23 @@ def build_option(args):
         trt_file = os.path.join(args.model_dir, "infer.trt")
         option.set_trt_input_shape(
             'input_ids',
-            min_shape=[1, args.max_length],
-            opt_shape=[args.batch_size, args.max_length],
+            min_shape=[1, 1],
+            opt_shape=[args.batch_size, args.max_length // 2],
             max_shape=[args.batch_size, args.max_length])
         option.set_trt_input_shape(
             'token_type_ids',
-            min_shape=[1, args.max_length],
-            opt_shape=[args.batch_size, args.max_length],
+            min_shape=[1, 1],
+            opt_shape=[args.batch_size, args.max_length // 2],
             max_shape=[args.batch_size, args.max_length])
         option.set_trt_input_shape(
             'pos_ids',
-            min_shape=[1, args.max_length],
-            opt_shape=[args.batch_size, args.max_length],
+            min_shape=[1, 1],
+            opt_shape=[args.batch_size, args.max_length // 2],
             max_shape=[args.batch_size, args.max_length])
         option.set_trt_input_shape(
             'att_mask',
-            min_shape=[1, args.max_length],
-            opt_shape=[args.batch_size, args.max_length],
+            min_shape=[1, 1],
+            opt_shape=[args.batch_size, args.max_length // 2],
             max_shape=[args.batch_size, args.max_length])
         if args.use_fp16:
             option.enable_trt_fp16()

From 4f7233c11fb9ccca7d1544fcf209b5e3cd500c3a Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Tue, 27 Dec 2022 04:55:46 +0000
Subject: [PATCH 04/20] update uie benchmark

---
 benchmark/benchmark_uie.py     | 31 +++++++++++++++++++----------
 benchmark/run_benchmark_uie.sh | 36 ++++++++++++++++------------------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py
index a97bb026fcb..8e73c34f1ee 100644
--- a/benchmark/benchmark_uie.py
+++ b/benchmark/benchmark_uie.py
@@ -5,7 +5,6 @@
 import sys
 import json
 
-from paddlenlp.utils.log import logger
 import fastdeploy as fd
 from fastdeploy.text import UIEModel, SchemaLanguage
 import pynvml
@@ -60,6 +59,8 @@ def parse_arguments():
         type=distutils.util.strtobool,
         default=False,
         help="Use FP16 mode")
+    parser.add_argument(
+        "--epoch", type=int, default=1, help="The epoch of test")
     return parser.parse_args()
 
 
@@ -153,19 +154,29 @@ def get_dataset(data_path, max_seq_len=512):
     return json_lines
 
 
-def run_inference(ds, uie):
-    for i, sample in enumerate(ds):
+def run_inference(ds, uie, epoch=1, warmup_steps=10):
+    for j, sample in enumerate(ds):
+        if j > warmup_steps:
+            break
         uie.set_schema([sample['prompt']])
         result = uie.predict([sample['content']])
-        if (i + 1) % args.log_interval == 0:
-            runtime_statis = uie.print_statis_info_of_runtime()
-            print(f"Step {i + 1}:")
-            print(runtime_statis)
-            print()
-
+    print(f"Run {warmup_steps} steps to warm up")
+    start = time.time()
+    for ep in range(epoch):
+        curr_start = time.time()
+        for i, sample in enumerate(ds):
+            uie.set_schema([sample['prompt']])
+            result = uie.predict([sample['content']])
+        print(
+            f"Epoch {ep} average time = {(time.time() - curr_start) * 1000.0 / (len(ds)):.4f} ms"
+        )
+    end = time.time()
     runtime_statis = uie.print_statis_info_of_runtime()
     print(f"Final:")
     print(runtime_statis)
+    print(
+        f"Total average time = {(end - start) * 1000.0 / (len(ds) * epoch):.4f} ms"
+    )
     print()
 
 
@@ -189,4 +200,4 @@ def run_inference(ds, uie):
         schema_language=SchemaLanguage.ZH)
 
     uie.enable_record_time_of_runtime()
-    run_inference(ds, uie)
+    run_inference(ds, uie, args.epoch)
diff --git a/benchmark/run_benchmark_uie.sh b/benchmark/run_benchmark_uie.sh
index 8f0c03ee3e4..58030d5f6c1 100644
--- a/benchmark/run_benchmark_uie.sh
+++ b/benchmark/run_benchmark_uie.sh
@@ -1,24 +1,22 @@
 # wget https://bj.bcebos.com/fastdeploy/benchmark/uie/reimbursement_form_data.txt
 # wget https://bj.bcebos.com/fastdeploy/models/uie/uie-base.tgz
-
+# tar xvfz uie-base.tgz
 # GPU
-## FP32 Model
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device gpu
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device gpu
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device gpu
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device gpu
-
-## INT8 Model
-python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend pp-trt --device gpu
-python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend trt --device gpu
+echo "-------------------------------GPU Benchmark---------------------------------------"
+python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device gpu
+python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device gpu
+python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device gpu --use_fp16 False
+python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device gpu --use_fp16 False
+python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device gpu --use_fp16 True
+python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device gpu --use_fp16 True
+echo "-----------------------------------------------------------------------------------"
 
 # CPU
-## FP32 Model
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device cpu
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu
-python benchmark_uie.py --model_dir uie-base --data_path reimbursement_form_data.txt --backend openvino --device cpu
-
-## INT8 Model
-
-python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend pp --device cpu
-python benchmark_uie.py --model_dir uie_bs1_lr1e-5_qat_final_format_4inputs --data_path reimbursement_form_data.txt --backend ort --device cpu
+echo "-------------------------------CPU Benchmark---------------------------------------"
+for cpu_num_threads in 1 2 4 8 16;
+do
+  python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device cpu --cpu_num_threads ${cpu_num_threads}
+  python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu --cpu_num_threads ${cpu_num_threads}
+  python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend openvino --device cpu --cpu_num_threads ${cpu_num_threads}
+done
+echo "-----------------------------------------------------------------------------------"

From 51e346ea0945978d6367ea178cd7794d9c47163d Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Thu, 29 Dec 2022 10:29:32 +0000
Subject: [PATCH 05/20] Update uie benchmark output

---
 benchmark/benchmark_uie.py     | 240 ++++++++++++++++++++++++---------
 benchmark/run_benchmark_uie.sh |  23 ++--
 2 files changed, 193 insertions(+), 70 deletions(-)

diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py
index 8e73c34f1ee..40c3ac4c48c 100644
--- a/benchmark/benchmark_uie.py
+++ b/benchmark/benchmark_uie.py
@@ -7,10 +7,6 @@
 
 import fastdeploy as fd
 from fastdeploy.text import UIEModel, SchemaLanguage
-import pynvml
-import psutil
-import GPUtil
-import multiprocessing
 
 
 def parse_arguments():
@@ -44,23 +40,23 @@ def parse_arguments():
         type=int,
         default=128,
         help="The max length of sequence.")
-    parser.add_argument(
-        "--log_interval",
-        type=int,
-        default=10,
-        help="The interval of logging.")
     parser.add_argument(
         "--cpu_num_threads",
         type=int,
-        default=1,
+        default=8,
         help="The number of threads when inferring on cpu.")
     parser.add_argument(
-        "--use_fp16",
+        "--enable_trt_fp16",
         type=distutils.util.strtobool,
         default=False,
-        help="Use FP16 mode")
+        help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--epoch", type=int, default=1, help="The epoch of test")
+    parser.add_argument(
+        "--enable_collect_memory_info",
+        type=ast.literal_eval,
+        default=False,
+        help="whether enable collect memory info")
     return parser.parse_args()
 
 
@@ -103,37 +99,116 @@ def build_option(args):
             min_shape=[1, 1],
             opt_shape=[args.batch_size, args.max_length // 2],
             max_shape=[args.batch_size, args.max_length])
-        if args.use_fp16:
+        if args.enable_trt_fp16:
             option.enable_trt_fp16()
             trt_file = trt_file + ".fp16"
         option.set_trt_cache_file(trt_file)
     return option
 
 
-def get_current_memory_mb(gpu_id=None):
-    pid = os.getpid()
-    p = psutil.Process(pid)
-    info = p.memory_full_info()
-    cpu_mem = info.uss / 1024. / 1024.
-    gpu_mem = 0
-    if gpu_id is not None:
-        pynvml.nvmlInit()
-        handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
-        meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        gpu_mem = meminfo.used / 1024. / 1024.
-    return cpu_mem, gpu_mem
+class StatBase(object):
+    """StatBase"""
+    nvidia_smi_path = "nvidia-smi"
+    gpu_keys = ('index', 'uuid', 'name', 'timestamp', 'memory.total',
+                'memory.free', 'memory.used', 'utilization.gpu',
+                'utilization.memory')
+    nu_opt = ',nounits'
+    cpu_keys = ('cpu.util', 'memory.util', 'memory.used')
+
 
+class Monitor(StatBase):
+    """Monitor"""
 
-def get_current_gputil(gpu_id):
-    GPUs = GPUtil.getGPUs()
-    gpu_load = GPUs[gpu_id].load
-    return gpu_load
+    def __init__(self, use_gpu=False, gpu_id=0, interval=0.1):
+        self.result = {}
+        self.gpu_id = gpu_id
+        self.use_gpu = use_gpu
+        self.interval = interval
+        self.cpu_stat_q = multiprocessing.Queue()
 
+    def start(self):
+        cmd = '%s --id=%s --query-gpu=%s --format=csv,noheader%s -lms 50' % (
+            StatBase.nvidia_smi_path, self.gpu_id, ','.join(StatBase.gpu_keys),
+            StatBase.nu_opt)
+        if self.use_gpu:
+            self.gpu_stat_worker = subprocess.Popen(
+                cmd,
+                stderr=subprocess.STDOUT,
+                stdout=subprocess.PIPE,
+                shell=True,
+                close_fds=True,
+                preexec_fn=os.setsid)
+        # cpu stat
+        pid = os.getpid()
+        self.cpu_stat_worker = multiprocessing.Process(
+            target=self.cpu_stat_func,
+            args=(self.cpu_stat_q, pid, self.interval))
+        self.cpu_stat_worker.start()
 
-def sample_gpuutil(gpu_id, gpu_utilization=[]):
-    while True:
-        gpu_utilization.append(get_current_gputil(gpu_id))
-        time.sleep(0.01)
+    def stop(self):
+        try:
+            if self.use_gpu:
+                os.killpg(self.gpu_stat_worker.pid, signal.SIGUSR1)
+            # os.killpg(p.pid, signal.SIGTERM)
+            self.cpu_stat_worker.terminate()
+            self.cpu_stat_worker.join(timeout=0.01)
+        except Exception as e:
+            print(e)
+            return
+
+        # gpu
+        if self.use_gpu:
+            lines = self.gpu_stat_worker.stdout.readlines()
+            lines = [
+                line.strip().decode("utf-8") for line in lines
+                if line.strip() != ''
+            ]
+            gpu_info_list = [{
+                k: v
+                for k, v in zip(StatBase.gpu_keys, line.split(', '))
+            } for line in lines]
+            if len(gpu_info_list) == 0:
+                return
+            result = gpu_info_list[0]
+            for item in gpu_info_list:
+                for k in item.keys():
+                    if k not in ["name", "uuid", "timestamp"]:
+                        result[k] = max(int(result[k]), int(item[k]))
+                    else:
+                        result[k] = max(result[k], item[k])
+            self.result['gpu'] = result
+
+        # cpu
+        cpu_result = {}
+        if self.cpu_stat_q.qsize() > 0:
+            cpu_result = {
+                k: v
+                for k, v in zip(StatBase.cpu_keys, self.cpu_stat_q.get())
+            }
+        while not self.cpu_stat_q.empty():
+            item = {
+                k: v
+                for k, v in zip(StatBase.cpu_keys, self.cpu_stat_q.get())
+            }
+            for k in StatBase.cpu_keys:
+                cpu_result[k] = max(cpu_result[k], item[k])
+        cpu_result['name'] = cpuinfo.get_cpu_info()['brand_raw']
+        self.result['cpu'] = cpu_result
+
+    def output(self):
+        return self.result
+
+    def cpu_stat_func(self, q, pid, interval=0.0):
+        """cpu stat function"""
+        stat_info = psutil.Process(pid)
+        while True:
+            # pid = os.getpid()
+            cpu_util, mem_util, mem_use = stat_info.cpu_percent(
+            ), stat_info.memory_percent(), round(stat_info.memory_info().rss /
+                                                 1024.0 / 1024.0, 4)
+            q.put([cpu_util, mem_util, mem_use])
+            time.sleep(interval)
+        return
 
 
 def get_dataset(data_path, max_seq_len=512):
@@ -154,32 +229,6 @@ def get_dataset(data_path, max_seq_len=512):
     return json_lines
 
 
-def run_inference(ds, uie, epoch=1, warmup_steps=10):
-    for j, sample in enumerate(ds):
-        if j > warmup_steps:
-            break
-        uie.set_schema([sample['prompt']])
-        result = uie.predict([sample['content']])
-    print(f"Run {warmup_steps} steps to warm up")
-    start = time.time()
-    for ep in range(epoch):
-        curr_start = time.time()
-        for i, sample in enumerate(ds):
-            uie.set_schema([sample['prompt']])
-            result = uie.predict([sample['content']])
-        print(
-            f"Epoch {ep} average time = {(time.time() - curr_start) * 1000.0 / (len(ds)):.4f} ms"
-        )
-    end = time.time()
-    runtime_statis = uie.print_statis_info_of_runtime()
-    print(f"Final:")
-    print(runtime_statis)
-    print(
-        f"Total average time = {(end - start) * 1000.0 / (len(ds) * epoch):.4f} ms"
-    )
-    print()
-
-
 if __name__ == '__main__':
     args = parse_arguments()
     runtime_option = build_option(args)
@@ -187,6 +236,25 @@ def run_inference(ds, uie, epoch=1, warmup_steps=10):
     param_path = os.path.join(args.model_dir, "inference.pdiparams")
     vocab_path = os.path.join(args.model_dir, "vocab.txt")
 
+    gpu_id = args.device_id
+    enable_collect_memory_info = args.enable_collect_memory_info
+    dump_result = dict()
+    end2end_statis = list()
+    cpu_mem = list()
+    gpu_mem = list()
+    gpu_util = list()
+    if args.device == "cpu":
+        file_path = args.model_dir + "_model_" + args.backend + "_" + \
+            args.device + "_" + str(args.cpu_num_thread) + ".txt"
+    else:
+        if args.enable_trt_fp16:
+            file_path = args.model_dir + "_model_" + \
+                args.backend + "_fp16_" + args.device + ".txt"
+        else:
+            file_path = args.model_dir + "_model_" + args.backend + "_" + args.device + ".txt"
+    f = open(file_path, "w")
+    f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4]))
+
     ds = get_dataset(args.data_path)
     schema = ["时间"]
     uie = UIEModel(
@@ -195,9 +263,59 @@ def run_inference(ds, uie, epoch=1, warmup_steps=10):
         vocab_path,
         position_prob=0.5,
         max_length=args.max_length,
+        batch_size=args.batch_size,
         schema=schema,
         runtime_option=runtime_option,
         schema_language=SchemaLanguage.ZH)
 
-    uie.enable_record_time_of_runtime()
-    run_inference(ds, uie, args.epoch)
+    try:
+        if enable_collect_memory_info:
+            import multiprocessing
+            import subprocess
+            import psutil
+            import signal
+            import cpuinfo
+            enable_gpu = args.device == "gpu"
+            monitor = Monitor(enable_gpu, gpu_id)
+            monitor.start()
+        uie.enable_record_time_of_runtime()
+
+        for ep in range(args.epoch):
+            for i, sample in enumerate(ds):
+                curr_start = time.time()
+                uie.set_schema([sample['prompt']])
+                result = uie.predict([sample['content']])
+                end2end_statis.append(time.time() - curr_start)
+        runtime_statis = uie.print_statis_info_of_runtime()
+
+        warmup_iter = args.epoch * len(ds) // 5
+
+        end2end_statis_repeat = end2end_statis[warmup_iter:]
+        if enable_collect_memory_info:
+            monitor.stop()
+            mem_info = monitor.output()
+            dump_result["cpu_rss_mb"] = mem_info['cpu'][
+                'memory.used'] if 'cpu' in mem_info else 0
+            dump_result["gpu_rss_mb"] = mem_info['gpu'][
+                'memory.used'] if 'gpu' in mem_info else 0
+            dump_result["gpu_util"] = mem_info['gpu'][
+                'utilization.gpu'] if 'gpu' in mem_info else 0
+
+        dump_result["runtime"] = runtime_statis["avg_time"] * 1000
+        dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
+
+        time_cost_str = f"Runtime(ms): {dump_result['runtime']}\n" \
+                        f"End2End(ms): {dump_result['end2end']}\n"
+        f.writelines(time_cost_str)
+        print(time_cost_str)
+
+        if enable_collect_memory_info:
+            mem_info_str = f"cpu_rss_mb: {dump_result['cpu_rss_mb']}\n" \
+                           f"gpu_rss_mb: {dump_result['gpu_rss_mb']}\n" \
+                           f"gpu_util: {dump_result['gpu_util']}\n"
+            f.writelines(mem_info_str)
+            print(mem_info_str)
+    except:
+        f.writelines("!!!!!Infer Failed\n")
+
+    f.close()
diff --git a/benchmark/run_benchmark_uie.sh b/benchmark/run_benchmark_uie.sh
index 58030d5f6c1..5ba9e88dbb0 100644
--- a/benchmark/run_benchmark_uie.sh
+++ b/benchmark/run_benchmark_uie.sh
@@ -1,22 +1,27 @@
 # wget https://bj.bcebos.com/fastdeploy/benchmark/uie/reimbursement_form_data.txt
 # wget https://bj.bcebos.com/fastdeploy/models/uie/uie-base.tgz
 # tar xvfz uie-base.tgz
+
+DEVICE_ID=0
+
+echo "[FastDeploy]    Running UIE benchmark..."
+
 # GPU
 echo "-------------------------------GPU Benchmark---------------------------------------"
-python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device gpu
-python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device gpu
-python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device gpu --use_fp16 False
-python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device gpu --use_fp16 False
-python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device gpu --use_fp16 True
-python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device gpu --use_fp16 True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True
 echo "-----------------------------------------------------------------------------------"
 
 # CPU
 echo "-------------------------------CPU Benchmark---------------------------------------"
 for cpu_num_threads in 1 2 4 8 16;
 do
-  python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device cpu --cpu_num_threads ${cpu_num_threads}
-  python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu --cpu_num_threads ${cpu_num_threads}
-  python benchmark_uie.py --log_interval 100 --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend openvino --device cpu --cpu_num_threads ${cpu_num_threads}
+  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
+  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
+  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend openvino --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
 done
 echo "-----------------------------------------------------------------------------------"

From 34aebb1de34c692df26c76617f066c4196fb2c03 Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Thu, 29 Dec 2022 10:53:25 +0000
Subject: [PATCH 06/20] Fix cpu_num_thread->cpu_num_threads

---
 benchmark/benchmark_uie.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py
index 40c3ac4c48c..18e7566fea5 100644
--- a/benchmark/benchmark_uie.py
+++ b/benchmark/benchmark_uie.py
@@ -245,7 +245,7 @@ def get_dataset(data_path, max_seq_len=512):
     gpu_util = list()
     if args.device == "cpu":
         file_path = args.model_dir + "_model_" + args.backend + "_" + \
-            args.device + "_" + str(args.cpu_num_thread) + ".txt"
+            args.device + "_" + str(args.cpu_num_threads) + ".txt"
     else:
         if args.enable_trt_fp16:
             file_path = args.model_dir + "_model_" + \

From cefdadf5e2141c11247b089d04f544efbd039d06 Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Thu, 29 Dec 2022 11:09:26 +0000
Subject: [PATCH 07/20] Update backend name

---
 benchmark/benchmark_uie.py     | 10 +++++-----
 benchmark/run_benchmark_uie.sh | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py
index 18e7566fea5..44c562d7e66 100644
--- a/benchmark/benchmark_uie.py
+++ b/benchmark/benchmark_uie.py
@@ -28,8 +28,8 @@ def parse_arguments():
     parser.add_argument(
         "--backend",
         type=str,
-        default='pp',
-        choices=['ort', 'pp', 'trt', 'pp-trt', 'openvino'],
+        default='paddle',
+        choices=['ort', 'paddle', 'trt', 'paddle_trt', 'ov'],
         help="The inference runtime backend.")
     parser.add_argument(
         "--device_id", type=int, default=0, help="device(gpu) id")
@@ -67,15 +67,15 @@ def build_option(args):
         option.set_cpu_thread_num(args.cpu_num_threads)
     else:
         option.use_gpu(args.device_id)
-    if args.backend == 'pp':
+    if args.backend == 'paddle':
         option.use_paddle_backend()
     elif args.backend == 'ort':
         option.use_ort_backend()
-    elif args.backend == 'openvino':
+    elif args.backend == 'ov':
         option.use_openvino_backend()
     else:
         option.use_trt_backend()
-        if args.backend == 'pp-trt':
+        if args.backend == 'paddle_trt':
             option.enable_paddle_to_trt()
             option.enable_paddle_trt_collect_shape()
         trt_file = os.path.join(args.model_dir, "infer.trt")
diff --git a/benchmark/run_benchmark_uie.sh b/benchmark/run_benchmark_uie.sh
index 5ba9e88dbb0..51eb5d97328 100644
--- a/benchmark/run_benchmark_uie.sh
+++ b/benchmark/run_benchmark_uie.sh
@@ -8,20 +8,20 @@ echo "[FastDeploy]    Running UIE benchmark..."
 
 # GPU
 echo "-------------------------------GPU Benchmark---------------------------------------"
-python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True
 python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True
-python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle_trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True
 python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True
-python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp-trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True
+python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle_trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True
 python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True
 echo "-----------------------------------------------------------------------------------"
 
 # CPU
 echo "-------------------------------CPU Benchmark---------------------------------------"
-for cpu_num_threads in 1 2 4 8 16;
+for cpu_num_threads in 1 8;
 do
-  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend pp --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
+  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
   python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
-  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend openvino --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
+  python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ov --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True
 done
 echo "-----------------------------------------------------------------------------------"

From dd5759bd990834d2217ba634c9a96382b952e204 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Thu, 29 Dec 2022 21:14:39 +0800
Subject: [PATCH 08/20] [Model] Update PPSeg Preprocess (#1007)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 更新PPSeg pybind and python

* 更新PPSeg pybind and python
---
 .../paddleclas/rknpu2/cpp/README.md           |  6 ++---
 .../paddleclas/rknpu2/python/README.md        |  6 ++---
 .../paddleseg/rknpu2/cpp/infer.cc             |  3 ++-
 .../paddleseg/rknpu2/python/infer.py          |  3 ++-
 .../vision/segmentation/ppseg/ppseg_pybind.cc |  9 ++++---
 .../vision/segmentation/ppseg/preprocessor.cc | 26 ++++++++++++-------
 .../vision/segmentation/ppseg/preprocessor.h  | 14 ++++++----
 .../vision/segmentation/ppseg/__init__.py     | 13 +++++++---
 8 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/examples/vision/classification/paddleclas/rknpu2/cpp/README.md b/examples/vision/classification/paddleclas/rknpu2/cpp/README.md
index 1e1883486d1..c21d1d77b37 100644
--- a/examples/vision/classification/paddleclas/rknpu2/cpp/README.md
+++ b/examples/vision/classification/paddleclas/rknpu2/cpp/README.md
@@ -64,8 +64,8 @@ cd ./build/install
 
 ## 运行结果展示
 ClassifyResult(
-label_ids: 153, 
-scores: 0.684570, 
+label_ids: 153,
+scores: 0.684570,
 )
 
 ## 注意事项
@@ -75,4 +75,4 @@ DisablePermute(C++)或`disable_permute(Python)，在预处理阶段禁用数据
 ## 其它文档
 - [ResNet50_vd Python 部署](../python)
 - [模型预测结果说明](../../../../../../docs/api/vision_results/)
-- [转换ResNet50_vd RKNN模型文档](../README.md)
\ No newline at end of file
+- [转换ResNet50_vd RKNN模型文档](../README.md)
diff --git a/examples/vision/classification/paddleclas/rknpu2/python/README.md b/examples/vision/classification/paddleclas/rknpu2/python/README.md
index b85bb81f70a..f1f0994d857 100644
--- a/examples/vision/classification/paddleclas/rknpu2/python/README.md
+++ b/examples/vision/classification/paddleclas/rknpu2/python/README.md
@@ -19,8 +19,8 @@ python3 infer.py --model_file ./ResNet50_vd_infer/ResNet50_vd_infer_rk3588.rknn
 
 # 运行完成后返回结果如下所示
 ClassifyResult(
-label_ids: 153, 
-scores: 0.684570, 
+label_ids: 153,
+scores: 0.684570,
 )
 ```
 
@@ -32,4 +32,4 @@ DisablePermute(C++)或`disable_permute(Python)，在预处理阶段禁用数据
 ## 其它文档
 - [ResNet50_vd C++部署](../cpp)
 - [模型预测结果说明](../../../../../../docs/api/vision_results/)
-- [转换ResNet50_vd RKNN模型文档](../README.md)
\ No newline at end of file
+- [转换ResNet50_vd RKNN模型文档](../README.md)
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
index 834b2ccb3fc..f80d3fc8f5e 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
@@ -62,7 +62,8 @@ void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
     std::cerr << "Failed to initialize." << std::endl;
     return;
   }
-  model.GetPreprocessor().DisableNormalizeAndPermute();
+  model.GetPreprocessor().DisablePermute();
+  model.GetPreprocessor().DisableNormalize();
 
   fastdeploy::TimeCounter tc;
   tc.Start();
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
index 4168d591df8..193a6dfb9b3 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
@@ -49,7 +49,8 @@ def build_option(args):
     runtime_option=runtime_option,
     model_format=fd.ModelFormat.RKNN)
 
-model.preprocessor.disable_normalize_and_permute()
+model.preprocessor.disable_normalize()
+model.preprocessor.disable_permute()
 
 # 预测图片分割结果
 im = cv2.imread(args.image)
diff --git a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
index e687d3cc413..78c7c9ccc3b 100644
--- a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
+++ b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
@@ -36,9 +36,12 @@ void BindPPSeg(pybind11::module& m) {
              }
              return make_pair(outputs, imgs_info);;
            })
-      .def("disable_normalize_and_permute",
-                     &vision::segmentation::PaddleSegPreprocessor::DisableNormalizeAndPermute)
-                     
+      .def("disable_normalize", [](vision::segmentation::PaddleSegPreprocessor& self) {
+        self.DisableNormalize();
+      })
+      .def("disable_permute", [](vision::segmentation::PaddleSegPreprocessor& self) {
+        self.DisablePermute();
+      })
       .def_property("is_vertical_screen",
                      &vision::segmentation::PaddleSegPreprocessor::GetIsVerticalScreen,
 		     &vision::segmentation::PaddleSegPreprocessor::SetIsVerticalScreen);
diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.cc b/fastdeploy/vision/segmentation/ppseg/preprocessor.cc
index 027309aad0f..92b0378955f 100644
--- a/fastdeploy/vision/segmentation/ppseg/preprocessor.cc
+++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.cc
@@ -43,7 +43,7 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
       FDASSERT(op.IsMap(),
                "Require the transform information in yaml be Map type.");
       if (op["type"].as<std::string>() == "Normalize") {
-        if (!disable_normalize_and_permute_) {
+        if (!disable_normalize_) {
           std::vector<float> mean = {0.5, 0.5, 0.5};
           std::vector<float> std = {0.5, 0.5, 0.5};
           if (op["mean"]) {
@@ -55,7 +55,7 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
           processors_.push_back(std::make_shared<Normalize>(mean, std));
         }
       } else if (op["type"].as<std::string>() == "Resize") {
-        is_contain_resize_op = true;
+        is_contain_resize_op_ = true;
         const auto& target_size = op["target_size"];
         int resize_width = target_size[0].as<int>();
         int resize_height = target_size[1].as<int>();
@@ -73,13 +73,13 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
     auto input_shape = cfg["Deploy"]["input_shape"];
     int input_height = input_shape[2].as<int>();
     int input_width = input_shape[3].as<int>();
-    if (input_height != -1 && input_width != -1 && !is_contain_resize_op) {
-      is_contain_resize_op = true;
+    if (input_height != -1 && input_width != -1 && !is_contain_resize_op_) {
+      is_contain_resize_op_ = true;
       processors_.insert(processors_.begin(),
           std::make_shared<Resize>(input_width, input_height));
     }
   }
-  if (!disable_normalize_and_permute_) {
+  if (!disable_permute_) {
     processors_.push_back(std::make_shared<HWC2CHW>());
   }
 
@@ -121,7 +121,7 @@ bool PaddleSegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor
   }
   size_t img_num = images->size();
   // Batch preprocess : resize all images to the largest image shape in batch
-  if (!is_contain_resize_op && img_num > 1) {
+  if (!is_contain_resize_op_ && img_num > 1) {
     int max_width = 0;
     int max_height = 0; 
     for (size_t i = 0; i < img_num; ++i) {
@@ -156,14 +156,20 @@ bool PaddleSegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor
   return true;
 }
 
-void PaddleSegPreprocessor::DisableNormalizeAndPermute(){
-  disable_normalize_and_permute_ = true;
-  // the DisableNormalizeAndPermute function will be invalid if the configuration file is loaded during preprocessing
+void PaddleSegPreprocessor::DisableNormalize() {
+  this->disable_normalize_ = true;
+  // the DisableNormalize function will be invalid if the configuration file is loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
+  }
+}
+void PaddleSegPreprocessor::DisablePermute() {
+  this->disable_permute_ = true;
+  // the DisablePermute function will be invalid if the configuration file is loaded during preprocessing
   if (!BuildPreprocessPipelineFromConfig()) {
     FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
   }
 }
-
 }  // namespace segmentation
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.h b/fastdeploy/vision/segmentation/ppseg/preprocessor.h
index faa7fb8de59..6452e8e0e22 100644
--- a/fastdeploy/vision/segmentation/ppseg/preprocessor.h
+++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.h
@@ -49,8 +49,10 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
     is_vertical_screen_ = value;
   }
 
-  // This function will disable normalize and hwc2chw in preprocessing step.
-  void DisableNormalizeAndPermute();
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize();
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
 
  private:
   virtual bool BuildPreprocessPipelineFromConfig();
@@ -61,10 +63,12 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
    */
   bool is_vertical_screen_ = false;
 
-  // for recording the switch of normalize and hwc2chw
-  bool disable_normalize_and_permute_ = false;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
 
-  bool is_contain_resize_op = false;
+  bool is_contain_resize_op_ = false;
 
   bool initialized_ = false;
 };
diff --git a/python/fastdeploy/vision/segmentation/ppseg/__init__.py b/python/fastdeploy/vision/segmentation/ppseg/__init__.py
index 455785686bc..f0106a39a27 100644
--- a/python/fastdeploy/vision/segmentation/ppseg/__init__.py
+++ b/python/fastdeploy/vision/segmentation/ppseg/__init__.py
@@ -104,10 +104,17 @@ def run(self, input_ims):
         """
         return self._preprocessor.run(input_ims)
 
-    def disable_normalize_and_permute(self):
-        """To disable normalize and hwc2chw in preprocessing step.
+    def disable_normalize(self):
         """
-        return self._preprocessor.disable_normalize_and_permute()
+        This function will disable normalize in preprocessing step.
+        """
+        self._preprocessor.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._preprocessor.disable_permute()
 
     @property
     def is_vertical_screen(self):

From d275c3ba0219c314e6fc8e0db5233c1067a7d144 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Thu, 29 Dec 2022 21:15:23 +0800
Subject: [PATCH 09/20] [Model] Update PPDet Preprocess (#1006)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 更新导航文档

* 更新导航文档

* 更新导航文档

* 更新导航文档

* 更新PPDet PreProcess

* 更新PPDet PreProcess

* 更新PPDet pybind and python

* 更新

* 更新ppdet
---
 fastdeploy/vision/detection/ppdet/model.h     |  1 +
 .../vision/detection/ppdet/ppdet_pybind.cc    |  8 ++-
 .../vision/detection/ppdet/preprocessor.cc    | 65 ++++++++++++-------
 .../vision/detection/ppdet/preprocessor.h     | 13 +++-
 .../vision/detection/ppdet/__init__.py        | 14 +++-
 5 files changed, 74 insertions(+), 27 deletions(-)

diff --git a/fastdeploy/vision/detection/ppdet/model.h b/fastdeploy/vision/detection/ppdet/model.h
index be13b0b4d58..17502cf21ec 100755
--- a/fastdeploy/vision/detection/ppdet/model.h
+++ b/fastdeploy/vision/detection/ppdet/model.h
@@ -68,6 +68,7 @@ class FASTDEPLOY_DECL PPYOLOE : public PPDetBase {
     valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
     valid_timvx_backends = {Backend::LITE};
     valid_kunlunxin_backends = {Backend::LITE};
+    valid_rknpu_backends = {Backend::RKNPU2};
     valid_ascend_backends = {Backend::LITE};
     initialized = Initialize();
   }
diff --git a/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc b/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc
index 09c89dfcef9..573164910bf 100644
--- a/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc
+++ b/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc
@@ -31,7 +31,13 @@ void BindPPDet(pybind11::module& m) {
           outputs[i].StopSharing();
         }
         return outputs;
-      });
+      })
+      .def("disable_normalize", [](vision::detection::PaddleDetPreprocessor& self) {
+        self.DisableNormalize();
+      })
+      .def("disable_permute", [](vision::detection::PaddleDetPreprocessor& self) {
+        self.DisablePermute();
+      });;
 
   pybind11::class_<vision::detection::PaddleDetPostprocessor>(
       m, "PaddleDetPostprocessor")
diff --git a/fastdeploy/vision/detection/ppdet/preprocessor.cc b/fastdeploy/vision/detection/ppdet/preprocessor.cc
index bb38c67ec6a..a18d43b708c 100644
--- a/fastdeploy/vision/detection/ppdet/preprocessor.cc
+++ b/fastdeploy/vision/detection/ppdet/preprocessor.cc
@@ -22,19 +22,19 @@ namespace vision {
 namespace detection {
 
 PaddleDetPreprocessor::PaddleDetPreprocessor(const std::string& config_file) {
-  FDASSERT(BuildPreprocessPipelineFromConfig(config_file),
+  this->config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
            "Failed to create PaddleDetPreprocessor.");
   initialized_ = true;
 }
 
-bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig(
-    const std::string& config_file) {
+bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig() {
   processors_.clear();
   YAML::Node cfg;
   try {
-    cfg = YAML::LoadFile(config_file);
+    cfg = YAML::LoadFile(config_file_);
   } catch (YAML::BadFile& e) {
-    FDERROR << "Failed to load yaml file " << config_file
+    FDERROR << "Failed to load yaml file " << config_file_
             << ", maybe you should check this file." << std::endl;
     return false;
   }
@@ -45,21 +45,23 @@ bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig(
   for (const auto& op : cfg["Preprocess"]) {
     std::string op_name = op["type"].as<std::string>();
     if (op_name == "NormalizeImage") {
-      auto mean = op["mean"].as<std::vector<float>>();
-      auto std = op["std"].as<std::vector<float>>();
-      bool is_scale = true;
-      if (op["is_scale"]) {
-        is_scale = op["is_scale"].as<bool>();
-      }
-      std::string norm_type = "mean_std";
-      if (op["norm_type"]) {
-        norm_type = op["norm_type"].as<std::string>();
-      }
-      if (norm_type != "mean_std") {
-        std::fill(mean.begin(), mean.end(), 0.0);
-        std::fill(std.begin(), std.end(), 1.0);
+      if (!disable_normalize_) {
+        auto mean = op["mean"].as<std::vector<float>>();
+        auto std = op["std"].as<std::vector<float>>();
+        bool is_scale = true;
+        if (op["is_scale"]) {
+          is_scale = op["is_scale"].as<bool>();
+        }
+        std::string norm_type = "mean_std";
+        if (op["norm_type"]) {
+          norm_type = op["norm_type"].as<std::string>();
+        }
+        if (norm_type != "mean_std") {
+          std::fill(mean.begin(), mean.end(), 0.0);
+          std::fill(std.begin(), std.end(), 1.0);
+        }
+        processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
       }
-      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
     } else if (op_name == "Resize") {
       bool keep_ratio = op["keep_ratio"].as<bool>();
       auto target_size = op["target_size"].as<std::vector<int>>();
@@ -104,10 +106,12 @@ bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig(
       return false;
     }
   }
-  if (has_permute) {
-    // permute = cast<float> + HWC2CHW
-    processors_.push_back(std::make_shared<Cast>("float"));
-    processors_.push_back(std::make_shared<HWC2CHW>());
+  if (!disable_permute_) {
+    if (has_permute) {
+      // permute = cast<float> + HWC2CHW
+      processors_.push_back(std::make_shared<Cast>("float"));
+      processors_.push_back(std::make_shared<HWC2CHW>());
+    }
   }
 
   // Fusion will improve performance
@@ -202,7 +206,20 @@ bool PaddleDetPreprocessor::Run(std::vector<FDMat>* images,
 
   return true;
 }
-
+void PaddleDetPreprocessor::DisableNormalize() {
+  this->disable_normalize_ = true;
+  // the DisableNormalize function will be invalid if the configuration file is loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
+  }
+}
+void PaddleDetPreprocessor::DisablePermute() {
+  this->disable_permute_ = true;
+  // the DisablePermute function will be invalid if the configuration file is loaded during preprocessing
+  if (!BuildPreprocessPipelineFromConfig()) {
+    FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
+  }
+}
 } // namespace detection
 } // namespace vision
 } // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/ppdet/preprocessor.h b/fastdeploy/vision/detection/ppdet/preprocessor.h
index 2733c450e8a..8371afb696f 100644
--- a/fastdeploy/vision/detection/ppdet/preprocessor.h
+++ b/fastdeploy/vision/detection/ppdet/preprocessor.h
@@ -39,10 +39,21 @@ class FASTDEPLOY_DECL PaddleDetPreprocessor {
    */
   bool Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs);
 
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize();
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute();
+
  private:
-  bool BuildPreprocessPipelineFromConfig(const std::string& config_file);
+  bool BuildPreprocessPipelineFromConfig();
   std::vector<std::shared_ptr<Processor>> processors_;
   bool initialized_ = false;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
+  // read config file
+  std::string config_file_;
 };
 
 }  // namespace detection
diff --git a/python/fastdeploy/vision/detection/ppdet/__init__.py b/python/fastdeploy/vision/detection/ppdet/__init__.py
index 45734eef09e..f9b162aca63 100644
--- a/python/fastdeploy/vision/detection/ppdet/__init__.py
+++ b/python/fastdeploy/vision/detection/ppdet/__init__.py
@@ -36,6 +36,18 @@ def run(self, input_ims):
         """
         return self._preprocessor.run(input_ims)
 
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._preprocessor.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._preprocessor.disable_permute()
+
 
 class PaddleDetPostprocessor:
     def __init__(self):
@@ -500,4 +512,4 @@ def __init__(self,
         self._model = C.vision.detection.RTMDet(
             model_file, params_file, config_file, self._runtime_option,
             model_format)
-        assert self.initialized, "RTMDet model initialize failed."
\ No newline at end of file
+        assert self.initialized, "RTMDet model initialize failed."

From 0c09989839ce4f599e7430f714b819d4e095ad83 Mon Sep 17 00:00:00 2001
From: yeliang2258 <30516196+yeliang2258@users.noreply.github.com>
Date: Fri, 30 Dec 2022 10:54:39 +0800
Subject: [PATCH 10/20] [Other]Update Paddle Lite for RV1126  (#1013)

update lite link
---
 cmake/toolchain.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/toolchain.cmake b/cmake/toolchain.cmake
index 4b3485748b7..85bd057981b 100755
--- a/cmake/toolchain.cmake
+++ b/cmake/toolchain.cmake
@@ -10,7 +10,7 @@ if (DEFINED TARGET_ABI)
         set(OPENCV_URL "https://bj.bcebos.com/fastdeploy/third_libs/opencv-linux-armv7hf-4.6.0.tgz")
         set(OPENCV_FILENAME "opencv-linux-armv7hf-4.6.0")
         if(WITH_TIMVX)
-            set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-armhf-timvx-1130.tgz")
+            set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-armhf-timvx-20221229.tgz")
         else()
             message(STATUS "PADDLELITE_URL will be configured if WITH_TIMVX=ON.")
         endif()

From f23c6c03af5904d6a8f1f7239850b6b6ea2d51aa Mon Sep 17 00:00:00 2001
From: Wang Xinyu <wangxinyu_es@163.com>
Date: Fri, 30 Dec 2022 11:58:56 +0800
Subject: [PATCH 11/20] [Other] Remove TRT static libs in package (#1011)

* remove duplicated and useless libs

* use os system to run ldd

* remove filter libs by ldd
---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 549d7b708d6..a68243dec8e 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -387,9 +387,9 @@ if(ENABLE_TRT_BACKEND)
     find_package(Python COMPONENTS Interpreter Development REQUIRED)
     message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...")
     execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
-    file(GLOB_RECURSE TRT_STAIC_LIBS ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib/*.a)
+    file(GLOB_RECURSE TRT_STATIC_LIBS ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib/*.a)
     if(TRT_STATIC_LIBS)
-      file(REMOVE ${TRT_STAIC_LIBS})
+      file(REMOVE ${TRT_STATIC_LIBS})
     endif()
     if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
       execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib

From 01b67856a6b40ee842ef3c3690406e4cfaf3249f Mon Sep 17 00:00:00 2001
From: heliqi <1101791222@qq.com>
Date: Fri, 30 Dec 2022 12:00:07 +0800
Subject: [PATCH 12/20] [Serving]update ocr model.py from np.object to
 np.object_ (#1017)

* [Serving]update ocr model.py from np.object to np.object_

* Update model.py
---
 .../ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py      | 2 +-
 .../ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py b/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py
index faaca906737..9cfe2583e02 100644
--- a/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py
+++ b/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py
@@ -217,7 +217,7 @@ def execute(self, requests):
             out_tensor_0 = pb_utils.Tensor(
                 self.output_names[0],
                 np.array(
-                    batch_rec_texts, dtype=np.object))
+                    batch_rec_texts, dtype=np.object_))
             out_tensor_1 = pb_utils.Tensor(self.output_names[1],
                                            np.array(batch_rec_scores))
             inference_response = pb_utils.InferenceResponse(
diff --git a/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py b/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py
index fe66e8c3f3c..c046cd929b7 100755
--- a/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py
+++ b/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py
@@ -96,7 +96,7 @@ def execute(self, requests):
             results = self.postprocessor.run([infer_outputs])
             out_tensor_0 = pb_utils.Tensor(
                 self.output_names[0], np.array(
-                    results[0], dtype=np.object))
+                    results[0], dtype=np.object_))
             out_tensor_1 = pb_utils.Tensor(self.output_names[1],
                                            np.array(results[1]))
             inference_response = pb_utils.InferenceResponse(

From f038268c794eaec10690e317b40c504e5a7f7006 Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Fri, 30 Dec 2022 15:06:13 +0800
Subject: [PATCH 13/20] [Bug Fix] Fix build with Paddle Inference on Jetson
 (#1019)

Fix build with Paddle Inference on Jetson
---
 CMakeLists.txt               | 12 +++++------
 FastDeploy.cmake.in          |  7 +++----
 cmake/paddle_inference.cmake | 39 +++++++++++++++++++++++++-----------
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a68243dec8e..44ff6c7863a 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -240,12 +240,12 @@ if(ENABLE_PADDLE_BACKEND)
   add_definitions(-DENABLE_PADDLE_BACKEND)
   list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
   include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
-  if(NOT APPLE)
-    list(APPEND DEPEND_LIBS external_paddle_inference external_dnnl external_omp)
-  else()
-    # no third parties libs(mkldnn and omp) need to
-    # link into paddle_inference on MacOS OSX.
-    list(APPEND DEPEND_LIBS external_paddle_inference)
+  list(APPEND DEPEND_LIBS external_paddle_inference)
+  if(external_dnnl_FOUND)
+    list(APPEND DEPEND_LIBS external_dnnl external_omp)
+  endif()
+  if(external_ort_FOUND)
+    list(APPEND DEPEND_LIBS external_p2o external_ort)
   endif()
 endif()
 
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index 456a4d3217f..83114e90138 100755
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -74,10 +74,9 @@ if(ENABLE_PADDLE_BACKEND)
     set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/libmkldnn.so.0")
     set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.so")
   endif()
-  if(NOT APPLE)
-    list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB} ${DNNL_LIB} ${IOMP_LIB})
-  else()
-    list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB})
+  list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB})
+  if(EXISTS "${DNNL_LIB}")
+    list(APPEND FASTDEPLOY_LIBS ${DNNL_LIB} ${IOMP_LIB})
   endif()
 endif()
 
diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake
index 3822f9ac3ab..e33a14eb6c5 100644
--- a/cmake/paddle_inference.cmake
+++ b/cmake/paddle_inference.cmake
@@ -40,16 +40,24 @@ if(WIN32)
       CACHE FILEPATH "paddle_inference compile library." FORCE)
   set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/mkldnn.lib")
   set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5md.lib")
+  set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/paddle2onnx.lib")
+  set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/onnxruntime.lib")
 elseif(APPLE)
   set(PADDLEINFERENCE_COMPILE_LIB
       "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.dylib"
       CACHE FILEPATH "paddle_inference compile library." FORCE)
+  set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2")
+  set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so")
+  set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.dylib")
+  set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.dylib")
 else()
   set(PADDLEINFERENCE_COMPILE_LIB
       "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so"
       CACHE FILEPATH "paddle_inference compile library." FORCE)
   set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2")
   set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so")
+  set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so")
+  set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so")
 endif(WIN32)
 
 
@@ -116,16 +124,23 @@ set_property(TARGET external_paddle_inference PROPERTY IMPORTED_LOCATION
                                          ${PADDLEINFERENCE_COMPILE_LIB})
 add_dependencies(external_paddle_inference ${PADDLEINFERENCE_PROJECT})
 
-if (NOT APPLE)
-  # no third parties libs(mkldnn and omp) need to 
-  # link into paddle_inference on MacOS OSX.
-  add_library(external_dnnl STATIC IMPORTED GLOBAL)
-  set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION
-                                          ${DNNL_LIB})
-  add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT})
 
-  add_library(external_omp STATIC IMPORTED GLOBAL)
-  set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION
-                                          ${OMP_LIB})
-  add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT})
-endif()
+add_library(external_p2o STATIC IMPORTED GLOBAL)
+set_property(TARGET external_p2o PROPERTY IMPORTED_LOCATION
+        ${P2O_LIB})
+add_dependencies(external_p2o ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_ort STATIC IMPORTED GLOBAL)
+set_property(TARGET external_ort PROPERTY IMPORTED_LOCATION
+        ${ORT_LIB})
+add_dependencies(external_ort ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_dnnl STATIC IMPORTED GLOBAL)
+set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION
+                                        ${DNNL_LIB})
+add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_omp STATIC IMPORTED GLOBAL)
+set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION
+                                        ${OMP_LIB})
+add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT})

From 665f2e549a54b47b0cb3f2a978143c3f14443758 Mon Sep 17 00:00:00 2001
From: leiqing <54695910+leiqing1@users.noreply.github.com>
Date: Fri, 30 Dec 2022 16:10:31 +0800
Subject: [PATCH 14/20] Update README.md

---
 examples/vision/detection/nanodet_plus/python/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vision/detection/nanodet_plus/python/README.md b/examples/vision/detection/nanodet_plus/python/README.md
index b5085662ce2..a89e15d1b17 100644
--- a/examples/vision/detection/nanodet_plus/python/README.md
+++ b/examples/vision/detection/nanodet_plus/python/README.md
@@ -69,7 +69,7 @@ NanoDetPlus模型加载和初始化，其中model_file为导出的ONNX模型格
 > > * **padding_value**(list[float]): 通过此参数可以修改图片在resize时候做填充(padding)的值, 包含三个浮点型元素, 分别表示三个通道的值, 默认值为[0, 0, 0]
 > > * **keep_ratio**(bool): 通过此参数指定resize时是否保持宽高比例不变，默认是fasle.
 > > * **reg_max**(int): GFL回归中的reg_max参数，默认是7.
-> > * **downsample_strides**(list[int]): 通过此参数可以修改生成anchor的特征图的下采样倍数, 包含三个整型元素, 分别表示默认的生成anchor的下采样倍数, 默认值为[8, 16, 32, 64]
+> > * **downsample_strides**(list[int]): 通过此参数可以修改生成anchor的特征图的下采样倍数, 包含四个整型元素, 分别表示默认的生成anchor的下采样倍数, 默认值为[8, 16, 32, 64]
 
 
 

From 6310ddc8d63efe8e2893a38929d898e3696e23dc Mon Sep 17 00:00:00 2001
From: heliqi <1101791222@qq.com>
Date: Fri, 30 Dec 2022 16:43:47 +0800
Subject: [PATCH 15/20] [Serving]update np.object to np.object_  (#1021)

np.object to np.object_
---
 examples/text/uie/serving/models/uie/1/model.py                 | 2 +-
 .../paddleclas/serving/models/postprocess/1/model.py            | 2 +-
 .../paddledetection/serving/models/postprocess/1/model.py       | 2 +-
 .../detection/yolov5/serving/models/postprocess/1/model.py      | 2 +-
 .../detection/yolov5/serving/models/preprocess/1/model.py       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/text/uie/serving/models/uie/1/model.py b/examples/text/uie/serving/models/uie/1/model.py
index 5bb1c816417..b839ae0657e 100644
--- a/examples/text/uie/serving/models/uie/1/model.py
+++ b/examples/text/uie/serving/models/uie/1/model.py
@@ -141,7 +141,7 @@ def execute(self, requests):
                 self.uie_model_.set_schema(schema)
             results = self.uie_model_.predict(texts, return_dict=True)
 
-            results = np.array(results, dtype=np.object)
+            results = np.array(results, dtype=np.object_)
             out_tensor = pb_utils.Tensor(self.output_names[0], results)
             inference_response = pb_utils.InferenceResponse(
                 output_tensors=[out_tensor, ])
diff --git a/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py b/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py
index 0ab7dcdc4ae..de000f6eee8 100755
--- a/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py
+++ b/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py
@@ -92,7 +92,7 @@ def execute(self, requests):
             results = self.postprocess_.run([infer_outputs, ])
             r_str = fd.vision.utils.fd_result_to_json(results)
 
-            r_np = np.array(r_str, dtype=np.object)
+            r_np = np.array(r_str, dtype=np.object_)
             out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
             inference_response = pb_utils.InferenceResponse(
                 output_tensors=[out_tensor, ])
diff --git a/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py b/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py
index 4872b0dee2c..35054e51657 100644
--- a/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py
+++ b/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py
@@ -95,7 +95,7 @@ def execute(self, requests):
             results = self.postprocess_.run(infer_outputs)
             r_str = fd.vision.utils.fd_result_to_json(results)
 
-            r_np = np.array(r_str, dtype=np.object)
+            r_np = np.array(r_str, dtype=np.object_)
             out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
             inference_response = pb_utils.InferenceResponse(
                 output_tensors=[out_tensor, ])
diff --git a/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py b/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py
index 7c608db4372..1204446c438 100644
--- a/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py
+++ b/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py
@@ -96,7 +96,7 @@ def execute(self, requests):
 
             results = self.postprocessor_.run([infer_outputs], im_infos)
             r_str = fd.vision.utils.fd_result_to_json(results)
-            r_np = np.array(r_str, dtype=np.object)
+            r_np = np.array(r_str, dtype=np.object_)
 
             out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
             inference_response = pb_utils.InferenceResponse(
diff --git a/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py b/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py
index cf4f7e8e8b3..d60de6541bc 100644
--- a/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py
+++ b/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py
@@ -95,7 +95,7 @@ def execute(self, requests):
                                                           dlpack_tensor)
             output_tensor_1 = pb_utils.Tensor(
                 self.output_names[1], np.array(
-                    im_infos, dtype=np.object))
+                    im_infos, dtype=np.object_))
             inference_response = pb_utils.InferenceResponse(
                 output_tensors=[output_tensor_0, output_tensor_1])
             responses.append(inference_response)

From c41e71b13326ae2d1e0004c94efbd96787e8fa7d Mon Sep 17 00:00:00 2001
From: leiqing <54695910+leiqing1@users.noreply.github.com>
Date: Tue, 3 Jan 2023 08:09:13 +0800
Subject: [PATCH 16/20] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e8f014f6ffd..b1dd06cd5cb 120000
--- a/README.md
+++ b/README.md
@@ -1 +1 @@
-README_CN.md
+README_EN.md

From 971cc051f4bb8c02395a97f10fb9cebf8546ba99 Mon Sep 17 00:00:00 2001
From: Zeref996 <53218160+Zeref996@users.noreply.github.com>
Date: Tue, 3 Jan 2023 09:37:53 +0800
Subject: [PATCH 17/20] fresh doc version release/1.0.2 (#1023)

fresh doc version
---
 .../download_prebuilt_libraries.md            | 18 +++++++--------
 .../download_prebuilt_libraries.md            | 22 +++++++++----------
 serving/README.md                             |  4 ++--
 serving/README_CN.md                          |  4 ++--
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/docs/cn/build_and_install/download_prebuilt_libraries.md b/docs/cn/build_and_install/download_prebuilt_libraries.md
index c3c733467da..30ad094f977 100755
--- a/docs/cn/build_and_install/download_prebuilt_libraries.md
+++ b/docs/cn/build_and_install/download_prebuilt_libraries.md
@@ -22,7 +22,7 @@ FastDeploy提供各平台预编译库，供开发者直接下载安装使用。
 
 ### Python安装
 
-Release版本（当前最新1.0.1）安装
+Release版本（当前最新1.0.2）安装
 ```bash
 pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
 ```
@@ -43,8 +43,8 @@ Release版本
 
 | 平台 | 文件 | 说明 |
 | :--- | :--- | :---- |
-| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.1.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2编译产出 |
-| Windows x64 | [fastdeploy-win-x64-gpu-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.1.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2编译产出 |
+| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.2.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2编译产出 |
+| Windows x64 | [fastdeploy-win-x64-gpu-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.2.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2编译产出 |
 
 Develop版本（Nightly build）
 
@@ -65,7 +65,7 @@ Develop版本（Nightly build）
 
 ### Python安装
 
-Release版本（当前最新1.0.1）安装
+Release版本（当前最新1.0.2）安装
 ```bash
 pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
 ```
@@ -81,11 +81,11 @@ Release版本
 
 | 平台 | 文件 | 说明 |
 | :--- | :--- | :---- |
-| Linux x64 | [fastdeploy-linux-x64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.1.tgz) | g++ 8.2编译产出 |
-| Windows x64 | [fastdeploy-win-x64-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.1.zip) | Visual Studio 16 2019编译产出 |
-| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.1.tgz) | clang++ 10.0.0编译产出|
-| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.1.tgz) | clang++ 13.0.0编译产出 |
-| Linux aarch64 | [fastdeploy-linux-aarch64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.1.tgz) | gcc 6.3编译产出 |  
+| Linux x64 | [fastdeploy-linux-x64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz) | g++ 8.2编译产出 |
+| Windows x64 | [fastdeploy-win-x64-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.2.zip) | Visual Studio 16 2019编译产出 |
+| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.2.tgz) | clang++ 10.0.0编译产出|
+| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.2.tgz) | clang++ 13.0.0编译产出 |
+| Linux aarch64 | [fastdeploy-linux-aarch64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.2.tgz) | gcc 6.3编译产出 |  
 | Android armv7&v8 | [fastdeploy-android-1.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz) | NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
 
 ## Java SDK安装
diff --git a/docs/en/build_and_install/download_prebuilt_libraries.md b/docs/en/build_and_install/download_prebuilt_libraries.md
index 19da3ae1018..dc340009523 100644
--- a/docs/en/build_and_install/download_prebuilt_libraries.md
+++ b/docs/en/build_and_install/download_prebuilt_libraries.md
@@ -23,7 +23,7 @@ FastDeploy supports Computer Vision, Text and NLP model deployment on CPU and Nv
 
 ### Python SDK
 
-Install the released version（the newest 1.0.1 for now）
+Install the released version（the newest 1.0.2 for now）
 
 ```
 pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
@@ -43,12 +43,12 @@ conda config --add channels conda-forge && conda install cudatoolkit=11.2 cudnn=
 
 ### C++ SDK
 
-Install the released version（Latest 1.0.1）
+Install the released version（Latest 1.0.2）
 
 | Platform    | File                                                                                                                  | Description                                               |
 |:----------- |:--------------------------------------------------------------------------------------------------------------------- |:--------------------------------------------------------- |
-| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.1.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2 |
-| Windows x64 | [fastdeploy-win-x64-gpu-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.1.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 |
+| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.2.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2 |
+| Windows x64 | [fastdeploy-win-x64-gpu-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.2.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 |
 
 Install the Develop version（Nightly build）
 
@@ -70,7 +70,7 @@ FastDeploy supports computer vision, text and NLP model deployment on CPU with P
 
 ### Python SDK
 
-Install the released version（Latest 1.0.1 for now）
+Install the released version（Latest 1.0.2 for now）
 
 ```
 pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
@@ -84,15 +84,15 @@ pip install fastdeploy-python==0.0.0 -f https://www.paddlepaddle.org.cn/whl/fast
 
 ### C++ SDK
 
-Install the released version（Latest 1.0.1 for now, Android is 1.0.1）
+Install the released version（Latest 1.0.2 for now, Android is 1.0.0）
 
 | Platform      | File                                                                                                                  | Description                    |
 |:------------- |:--------------------------------------------------------------------------------------------------------------------- |:------------------------------ |
-| Linux x64 | [fastdeploy-linux-x64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.1.tgz) | g++ 8.2 |
-| Windows x64 | [fastdeploy-win-x64-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.1.zip) | Visual Studio 16 2019 |
-| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.1.tgz) | clang++ 10.0.0|
-| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.1.tgz) | clang++ 13.0.0 |
-| Linux aarch64 | [fastdeploy-osx-arm64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.1.tgz) | gcc 6.3 |  
+| Linux x64 | [fastdeploy-linux-x64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz) | g++ 8.2 |
+| Windows x64 | [fastdeploy-win-x64-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.2.zip) | Visual Studio 16 2019 |
+| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.2.tgz) | clang++ 10.0.0|
+| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.2.tgz) | clang++ 13.0.0 |
+| Linux aarch64 | [fastdeploy-osx-arm64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.2.tgz) | gcc 6.3 |  
 | Android armv7&v8 | [fastdeploy-android-1.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz)| NDK 25, clang++, support arm64-v8a and armeabi-v7a |
 
 ## Java SDK
diff --git a/serving/README.md b/serving/README.md
index 51b136045c1..4f7a74da956 100644
--- a/serving/README.md
+++ b/serving/README.md
@@ -20,7 +20,7 @@ FastDeploy builds an end-to-end serving deployment based on [Triton Inference Se
 CPU images only support Paddle/ONNX models for serving deployment on CPUs, and supported inference backends include OpenVINO, Paddle Inference, and ONNX Runtime
 
 ```shell
-docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-cpu-only-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-cpu-only-21.10
 ```
 
 #### GPU Image
@@ -28,7 +28,7 @@ docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-cpu-only-21.10
 GPU images support Paddle/ONNX models for serving deployment on GPU and CPU, and supported inference backends including OpenVINO, TensorRT, Paddle Inference, and ONNX Runtime
 
 ```
-docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-gpu-cuda11.4-trt8.4-21.10
 ```
 
 Users can also compile the image by themselves according to their own needs, referring to the following documents:
diff --git a/serving/README_CN.md b/serving/README_CN.md
index 801be56ea7f..f3f436f641c 100644
--- a/serving/README_CN.md
+++ b/serving/README_CN.md
@@ -17,13 +17,13 @@ FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-se
 #### CPU镜像
 CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署，支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime
 ``` shell
-docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-cpu-only-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-cpu-only-21.10
 ```
 
 #### GPU镜像
 GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署，支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime
 ```
-docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-gpu-cuda11.4-trt8.4-21.10
 ```
 
 用户也可根据自身需求，参考如下文档自行编译镜像

From 42f2e8d22b47e0237118cf25b622f3bc41cd4be7 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 3 Jan 2023 09:45:42 +0800
Subject: [PATCH 18/20] [Other] Add a interface to get all pretrained models
 available from hub model server (#1022)

add get model list
---
 python/fastdeploy/__init__.py               |  2 +-
 python/fastdeploy/download.py               | 26 +++++++++++++++++++++
 python/fastdeploy/utils/hub_model_server.py | 14 +++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/python/fastdeploy/__init__.py b/python/fastdeploy/__init__.py
index 42db5c281cc..31735c6855d 100644
--- a/python/fastdeploy/__init__.py
+++ b/python/fastdeploy/__init__.py
@@ -36,5 +36,5 @@
 from . import vision
 from . import pipeline
 from . import text
-from .download import download, download_and_decompress, download_model
+from .download import download, download_and_decompress, download_model, get_model_list
 from . import serving
diff --git a/python/fastdeploy/download.py b/python/fastdeploy/download.py
index 0b14ccf8e54..7af6042a8d2 100644
--- a/python/fastdeploy/download.py
+++ b/python/fastdeploy/download.py
@@ -213,6 +213,30 @@ def download_and_decompress(url, path='.', rename=None):
     return
 
 
+def get_model_list(category: str=None):
+    '''
+    Get all pre-trained models information supported by fd.download_model.
+    Args:
+        category(str): model category, if None, list all models in all categories.
+    Returns:
+        results(dict): a dictionary, key is category, value is a list which contains models information.
+    '''
+    result = model_server.get_model_list()
+    if result['status'] != 0:
+        raise ValueError(
+            'Failed to get pretrained models information from hub model server.'
+        )
+    result = result['data']
+    if category is None:
+        return result
+    elif category in result:
+        return {category: result[category]}
+    else:
+        raise ValueError(
+            'No pretrained model in category {} can be downloaded now.'.format(
+                category))
+
+
 def download_model(name: str,
                    path: str=None,
                    format: str=None,
@@ -237,11 +261,13 @@ def download_model(name: str,
         if format == 'paddle':
             if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count(
                     "zip") > 0:
+                archive_path = fullpath
                 fullpath = decompress(fullpath)
                 try:
                     os.rename(fullpath,
                               os.path.join(os.path.dirname(fullpath), name))
                     fullpath = os.path.join(os.path.dirname(fullpath), name)
+                    os.remove(archive_path)
                 except FileExistsError:
                     pass
         print('Successfully download model at path: {}'.format(fullpath))
diff --git a/python/fastdeploy/utils/hub_model_server.py b/python/fastdeploy/utils/hub_model_server.py
index 849763b9f61..3eb891e64c8 100644
--- a/python/fastdeploy/utils/hub_model_server.py
+++ b/python/fastdeploy/utils/hub_model_server.py
@@ -98,6 +98,20 @@ def request(self, path: str, params: dict) -> dict:
         except requests.exceptions.ConnectionError as e:
             raise ServerConnectionError(self._url)
 
+    def get_model_list(self):
+        '''
+        Get all pre-trained models information in dataset.
+        Return:
+            result(dict): key is category name, value is a list which contains models \
+                information such as name, format and version.
+        '''
+        api = '{}/{}'.format(self._url, 'fastdeploy_listmodels')
+        try:
+            result = requests.get(api, timeout=self._timeout)
+            return result.json()
+        except requests.exceptions.ConnectionError as e:
+            raise ServerConnectionError(self._url)
+
     def is_connected(self):
         return self.check(self._url)
 

From f72846c7179d42760909f484715e33f8312b28b4 Mon Sep 17 00:00:00 2001
From: charl-u <115439700+charl-u@users.noreply.github.com>
Date: Tue, 3 Jan 2023 10:01:10 +0800
Subject: [PATCH 19/20] [Doc]Revise one wording (#1028)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 第一次提交

* 补充一处漏翻译

* deleted:    docs/en/quantize.md

* Update one translation

* Update en version

* Update one translation in code

* Standardize one writing

* Standardize one writing

* Update some en version

* Fix a grammer problem

* Update en version for api/vision result

* Merge branch 'develop' of https://github.com/charl-u/FastDeploy into develop

* Checkout the link in README in vision_results/ to the en documents

* Modify a title

* Add link to serving/docs/

* Finish translation of demo.md

* Update english version of serving/docs/

* Update title of readme

* Update some links

* Modify a title

* Update some links

* Update en version of java android README

* Modify some titles

* Modify some titles

* Modify some titles

* modify article to document
---
 docs/en/build_and_install/download_prebuilt_libraries.md | 2 +-
 serving/docs/EN/compile-en.md                            | 2 +-
 serving/docs/EN/demo-en.md                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/build_and_install/download_prebuilt_libraries.md b/docs/en/build_and_install/download_prebuilt_libraries.md
index dc340009523..b8e40035f37 100644
--- a/docs/en/build_and_install/download_prebuilt_libraries.md
+++ b/docs/en/build_and_install/download_prebuilt_libraries.md
@@ -4,7 +4,7 @@ English | [中文](../../cn/build_and_install/download_prebuilt_libraries.md)
 
 FastDeploy provides pre-built libraries for developers to download and install directly. Meanwhile, FastDeploy also offers easy access to compile so that developers can compile FastDeploy according to their own needs.
 
-This article is divided into two parts:
+This document is divided into two parts:
 - [1.GPU Deployment Environment](#1)
 - [2.CPU Deployment Environment](#2)
 
diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md
index 298603e50ff..af59854f908 100644
--- a/serving/docs/EN/compile-en.md
+++ b/serving/docs/EN/compile-en.md
@@ -1,7 +1,7 @@
 English | [中文](../zh_CN/compile.md)
 # FastDeploy Serving Deployment Image Compilation
 
-This article is about how to create a FastDploy image.
+This document is about how to create a FastDploy image.
 
 ## GPU Image
 
diff --git a/serving/docs/EN/demo-en.md b/serving/docs/EN/demo-en.md
index 0f47d8fa5ad..786af289715 100644
--- a/serving/docs/EN/demo-en.md
+++ b/serving/docs/EN/demo-en.md
@@ -1,6 +1,6 @@
 English | [中文](../zh_CN/demo.md)
 # Service-oriented Deployment Demo
-We take the YOLOv5 model as an simple example, and introduce how to execute a service-oriented deployment. For the detailed code, please refer to [Service-oriented Deployment of YOLOv5](../../../examples/vision/detection/yolov5/serving). It is recommend that you read the following documents before reading this article.
+We take the YOLOv5 model as an simple example, and introduce how to execute a service-oriented deployment. For the detailed code, please refer to [Service-oriented Deployment of YOLOv5](../../../examples/vision/detection/yolov5/serving). It is recommend that you read the following documents before reading this document.
 - [Service-oriented Model Catalog Description](model_repository-en.md) (how to prepare the model catalog)
 - [Service-oriented Deployment Configuration Description](model_configuration-en.md) (the configuration option for runtime)
 

From 87bcb5df21a406372afc208d96dc48c75fe09999 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 3 Jan 2023 10:47:08 +0800
Subject: [PATCH 20/20] [Model] add style transfer model (#922)

* add style transfer model

* add examples for generation model

* add unit test

* add speed comparison

* add speed comparison

* add variable for constant

* add preprocessor and postprocessor

* add preprocessor and postprocessor

* fix

* fix according to review

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
---
 examples/vision/generation/anemigan/README.md |  36 +++++++
 .../generation/anemigan/cpp/CMakeLists.txt    |  13 +++
 .../vision/generation/anemigan/cpp/README.md  |  84 +++++++++++++++
 .../vision/generation/anemigan/cpp/infer.cc   |  69 ++++++++++++
 .../generation/anemigan/python/README.md      |  70 ++++++++++++
 .../generation/anemigan/python/infer.py       |  43 ++++++++
 fastdeploy/vision.h                           |   1 +
 .../vision/generation/contrib/animegan.cc     |  80 ++++++++++++++
 .../vision/generation/contrib/animegan.h      |  79 ++++++++++++++
 .../generation/contrib/animegan_pybind.cc     |  78 ++++++++++++++
 .../generation/contrib/postprocessor.cc       |  49 +++++++++
 .../vision/generation/contrib/postprocessor.h |  43 ++++++++
 .../vision/generation/contrib/preprocessor.cc |  63 +++++++++++
 .../vision/generation/contrib/preprocessor.h  |  42 ++++++++
 .../vision/generation/generation_pybind.cc    |  25 +++++
 fastdeploy/vision/vision_pybind.cc            |   2 +
 python/fastdeploy/vision/__init__.py          |   1 +
 .../fastdeploy/vision/generation/__init__.py  |  16 +++
 .../vision/generation/contrib/__init__.py     |  15 +++
 .../vision/generation/contrib/anemigan.py     | 102 ++++++++++++++++++
 tests/models/test_animegan.py                 |  46 ++++++++
 tests/models/test_basicvsr.py                 |   4 +
 tests/models/test_edvr.py                     |   6 +-
 23 files changed, 966 insertions(+), 1 deletion(-)
 create mode 100644 examples/vision/generation/anemigan/README.md
 create mode 100755 examples/vision/generation/anemigan/cpp/CMakeLists.txt
 create mode 100755 examples/vision/generation/anemigan/cpp/README.md
 create mode 100644 examples/vision/generation/anemigan/cpp/infer.cc
 create mode 100644 examples/vision/generation/anemigan/python/README.md
 create mode 100644 examples/vision/generation/anemigan/python/infer.py
 create mode 100644 fastdeploy/vision/generation/contrib/animegan.cc
 create mode 100644 fastdeploy/vision/generation/contrib/animegan.h
 create mode 100644 fastdeploy/vision/generation/contrib/animegan_pybind.cc
 create mode 100644 fastdeploy/vision/generation/contrib/postprocessor.cc
 create mode 100644 fastdeploy/vision/generation/contrib/postprocessor.h
 create mode 100644 fastdeploy/vision/generation/contrib/preprocessor.cc
 create mode 100644 fastdeploy/vision/generation/contrib/preprocessor.h
 create mode 100644 fastdeploy/vision/generation/generation_pybind.cc
 create mode 100644 python/fastdeploy/vision/generation/__init__.py
 create mode 100644 python/fastdeploy/vision/generation/contrib/__init__.py
 create mode 100644 python/fastdeploy/vision/generation/contrib/anemigan.py
 create mode 100644 tests/models/test_animegan.py

diff --git a/examples/vision/generation/anemigan/README.md b/examples/vision/generation/anemigan/README.md
new file mode 100644
index 00000000000..721ed5644b4
--- /dev/null
+++ b/examples/vision/generation/anemigan/README.md
@@ -0,0 +1,36 @@
+# 图像生成模型
+
+FastDeploy目前支持PaddleHub预训练模型库中如下风格迁移模型的部署
+
+| 模型 | 说明 | 模型格式 |
+| :--- | :--- | :------- |
+|[animegan_v1_hayao_60](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v1_hayao_60&en_category=GANs)|可将输入的图像转换成宫崎骏动漫风格，模型权重转换自AnimeGAN V1官方开源项目|paddle|
+|[animegan_v2_paprika_97](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_97&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_hayao_64](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_hayao_64&en_category=GANs)|可将输入的图像转换成宫崎骏动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_shinkai_53](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_shinkai_53&en_category=GANs)|可将输入的图像转换成新海诚动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_shinkai_33](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_shinkai_33&en_category=GANs)|可将输入的图像转换成新海诚动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_paprika_54](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_54&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_hayao_99](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_hayao_99&en_category=GANs)|可将输入的图像转换成宫崎骏动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_paprika_74](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_74&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+|[animegan_v2_paprika_98](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_98&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格，模型权重转换自AnimeGAN V2官方开源项目|paddle|
+
+## FastDeploy paddle backend部署和hub速度对比(ips, 越高越好)
+| Device | FastDeploy | Hub |
+| :--- | :--- | :------- |
+|  CPU   |  0.075     | 0.069|
+|  GPU   |  8.33      | 8.26 |
+
+
+
+## 下载预训练模型
+使用fastdeploy.download_model即可以下载模型, 例如下载animegan_v1_hayao_60
+```python
+import fastdeploy as fd
+fd.download_model(name='animegan_v1_hayao_60', path='./', format='paddle')
+```
+将会在当前目录获得animegan_v1_hayao_60的预训练模型。
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
diff --git a/examples/vision/generation/anemigan/cpp/CMakeLists.txt b/examples/vision/generation/anemigan/cpp/CMakeLists.txt
new file mode 100755
index 00000000000..7d1bd2ee11a
--- /dev/null
+++ b/examples/vision/generation/anemigan/cpp/CMakeLists.txt
@@ -0,0 +1,13 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake)
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} ${GFLAGS_LIBRARIES})
diff --git a/examples/vision/generation/anemigan/cpp/README.md b/examples/vision/generation/anemigan/cpp/README.md
new file mode 100755
index 00000000000..9d58c6ad3f0
--- /dev/null
+++ b/examples/vision/generation/anemigan/cpp/README.md
@@ -0,0 +1,84 @@
+# AnimeGAN C++部署示例
+
+本目录下提供`infer.cc`快速完成AnimeGAN在CPU/GPU部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上AnimeGAN推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.2以上(x.x.x>=1.0.2)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载准备好的模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_testimg.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/animegan_v1_hayao_60_v1.0.0.tgz
+tar xvfz animegan_v1_hayao_60_v1.0.0.tgz
+
+# CPU推理
+./infer_demo --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg  --device cpu
+# GPU推理
+./infer_demo --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg  --device gpu
+```
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## AnimeGAN C++接口
+
+### AnimeGAN类
+
+```c++
+fastdeploy::vision::generation::AnimeGAN(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+AnimeGAN模型加载和初始化，其中model_file为导出的Paddle模型结构文件，params_file为模型参数文件。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+#### Predict函数
+
+> ```c++
+> bool AnimeGAN::Predict(cv::Mat& image, cv::Mat* result)
+> ```
+>
+> 模型预测入口，输入图像输出风格迁移后的结果。
+>
+> **参数**
+>
+> > * **image**: 输入数据，注意需为HWC，BGR格式
+> > * **result**: 风格转换后的图像，BGR格式
+
+#### BatchPredict函数
+
+> ```c++
+> bool AnimeGAN::BatchPredict(const std::vector<cv::Mat>& images, std::vector<cv::Mat>* results);
+> ```
+>
+> 模型预测入口，输入一组图像并输出风格迁移后的结果。
+>
+> **参数**
+>
+> > * **images**: 输入数据，一组图像数据，注意需为HWC，BGR格式
+> > * **results**: 风格转换后的一组图像，BGR格式
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/generation/anemigan/cpp/infer.cc b/examples/vision/generation/anemigan/cpp/infer.cc
new file mode 100644
index 00000000000..ad10797e973
--- /dev/null
+++ b/examples/vision/generation/anemigan/cpp/infer.cc
@@ -0,0 +1,69 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "gflags/gflags.h"
+
+DEFINE_string(model, "", "Directory of the inference model.");
+DEFINE_string(image, "", "Path of the image file.");
+DEFINE_string(device, "cpu",
+              "Type of inference device, support 'cpu' or 'gpu'.");
+
+void PrintUsage() {
+  std::cout << "Usage: infer_demo --model model_path --image img_path --device [cpu|gpu]"
+            << std::endl;
+  std::cout << "Default value of device: cpu" << std::endl;
+}
+
+bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+  if (FLAGS_device == "gpu") {
+    option->UseGpu();
+    } 
+  else if (FLAGS_device == "cpu") {
+    option->SetPaddleMKLDNN(false);
+    return true;
+  } else {
+    std::cerr << "Only support device CPU/GPU now, "  << FLAGS_device << " is not supported." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option)) {
+    PrintUsage();
+    return -1;
+  }
+
+  auto model = fastdeploy::vision::generation::AnimeGAN(FLAGS_model+"/model.pdmodel", FLAGS_model+"/model.pdiparams", option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return -1;
+  }
+
+  auto im = cv::imread(FLAGS_image);
+  cv::Mat res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return -1;
+  }
+
+  cv::imwrite("style_transfer_result.png", res);
+  std::cout << "Visualized result saved in ./style_transfer_result.png" << std::endl;
+  
+  return 0;
+}
diff --git a/examples/vision/generation/anemigan/python/README.md b/examples/vision/generation/anemigan/python/README.md
new file mode 100644
index 00000000000..9c4562402db
--- /dev/null
+++ b/examples/vision/generation/anemigan/python/README.md
@@ -0,0 +1,70 @@
+# AnimeGAN Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成AnimeGAN在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/generation/anemigan/python
+# 下载准备好的测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_testimg.jpg
+
+# CPU推理
+python infer.py --model animegan_v1_hayao_60  --image style_transfer_testimg.jpg  --device cpu
+# GPU推理
+python infer.py --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg  --device gpu
+```
+
+## AnimeGAN Python接口
+
+```python
+fd.vision.generation.AnimeGAN(model_file, params_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+AnimeGAN模型加载和初始化，其中model_file和params_file为用于Paddle inference的模型结构文件和参数文件。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+
+### predict函数
+
+> ```python
+> AnimeGAN.predict(input_image)
+> ```
+>
+> 模型预测入口，输入图像输出风格迁移后的结果。
+>
+> **参数**
+>
+> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回** np.ndarray, 风格转换后的图像，BGR格式
+
+### batch_predict函数
+> ```python
+> AnimeGAN.batch_predict函数(input_images)
+> ```
+>
+> 模型预测入口，输入一组图像并输出风格迁移后的结果。
+>
+> **参数**
+>
+> > * **input_images**(list(np.ndarray)): 输入数据，一组图像数据，注意需为HWC，BGR格式
+
+> **返回** list(np.ndarray), 风格转换后的一组图像，BGR格式
+
+## 其它文档
+
+- [风格迁移 模型介绍](..)
+- [C++部署](../cpp)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/generation/anemigan/python/infer.py b/examples/vision/generation/anemigan/python/infer.py
new file mode 100644
index 00000000000..69f610eda14
--- /dev/null
+++ b/examples/vision/generation/anemigan/python/infer.py
@@ -0,0 +1,43 @@
+import cv2
+import os
+import fastdeploy as fd
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", required=True, help="Name of the model.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    if args.device.lower() == "gpu":
+        option.use_gpu()
+    else:
+        option.set_paddle_mkldnn(False)
+    return option
+
+
+args = parse_arguments()
+
+# 配置runtime，加载模型
+runtime_option = build_option(args)
+fd.download_model(name=args.model, path='./', format='paddle')
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+model = fd.vision.generation.AnimeGAN(
+    model_file, params_file, runtime_option=runtime_option)
+
+# 预测图片并保存结果
+im = cv2.imread(args.image)
+result = model.predict(im)
+cv2.imwrite('style_transfer_result.png', result)
diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
index ef2fc90a635..0714a976630 100644
--- a/fastdeploy/vision.h
+++ b/fastdeploy/vision.h
@@ -55,6 +55,7 @@
 #include "fastdeploy/vision/segmentation/ppseg/model.h"
 #include "fastdeploy/vision/sr/ppsr/model.h"
 #include "fastdeploy/vision/tracking/pptracking/model.h"
+#include "fastdeploy/vision/generation/contrib/animegan.h"
 
 #endif
 
diff --git a/fastdeploy/vision/generation/contrib/animegan.cc b/fastdeploy/vision/generation/contrib/animegan.cc
new file mode 100644
index 00000000000..22962daa1cb
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/animegan.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/generation/contrib/animegan.h"
+#include "fastdeploy/function/functions.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace generation {
+
+AnimeGAN::AnimeGAN(const std::string& model_file, const std::string& params_file,
+           const RuntimeOption& custom_option,
+           const ModelFormat& model_format) {
+
+  valid_cpu_backends = {Backend::PDINFER};
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+
+  initialized = Initialize();
+}
+
+bool AnimeGAN::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+
+bool AnimeGAN::Predict(cv::Mat& img, cv::Mat* result) {
+  std::vector<cv::Mat> results;
+  if (!BatchPredict({img}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool AnimeGAN::BatchPredict(const std::vector<cv::Mat>& images, std::vector<cv::Mat>* results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+  std::vector<FDTensor> processed_data(1);
+  if (!preprocessor_.Run(fd_images, &(processed_data))) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+  std::vector<FDTensor> infer_result(1);
+  processed_data[0].name = InputInfoOfRuntime(0).name;
+
+  if (!Infer(processed_data, &infer_result)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+  if (!postprocessor_.Run(infer_result, results)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace generation
+}  // namespace vision
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/vision/generation/contrib/animegan.h b/fastdeploy/vision/generation/contrib/animegan.h
new file mode 100644
index 00000000000..9d1f9aa2789
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/animegan.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/generation/contrib/preprocessor.h"
+#include "fastdeploy/vision/generation/contrib/postprocessor.h"
+
+namespace fastdeploy {
+
+namespace vision {
+
+namespace generation {
+/*! @brief AnimeGAN model object is used when load a AnimeGAN model.
+ */
+class FASTDEPLOY_DECL AnimeGAN : public FastDeployModel {
+ public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g ./model.pdiparams, if the model format is ONNX, this parameter will be ignored
+   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
+   * \param[in] model_format Model format of the loaded model, default is PADDLE format
+   */
+  AnimeGAN(const std::string& model_file, const std::string& params_file = "",
+         const RuntimeOption& custom_option = RuntimeOption(),
+         const ModelFormat& model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "styletransfer/animegan"; }
+
+  /** \brief Predict the style transfer result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] result The output style transfer result will be writen to this structure
+   * \return true if the prediction successed, otherwise false
+   */
+  bool Predict(cv::Mat& img, cv::Mat* result);
+
+  /** \brief Predict the style transfer result for a batch of input images
+   *
+   * \param[in] images The list of input images, each element comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] results The list of output style transfer results will be writen to this structure
+   * \return true if the batch prediction successed, otherwise false
+   */
+  bool BatchPredict(const std::vector<cv::Mat>& images,
+       std::vector<cv::Mat>* results);
+
+  // Get preprocessor reference of AnimeGAN
+  AnimeGANPreprocessor& GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  // Get postprocessor reference of AnimeGAN
+  AnimeGANPostprocessor& GetPostprocessor() {
+    return postprocessor_;
+  }
+
+ private:
+  bool Initialize();
+
+  AnimeGANPreprocessor preprocessor_;
+  AnimeGANPostprocessor postprocessor_;
+};
+
+}  // namespace generation
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/generation/contrib/animegan_pybind.cc b/fastdeploy/vision/generation/contrib/animegan_pybind.cc
new file mode 100644
index 00000000000..853069d71bc
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/animegan_pybind.cc
@@ -0,0 +1,78 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+void BindAnimeGAN(pybind11::module& m) {
+  pybind11::class_<vision::generation::AnimeGAN, FastDeployModel>(m, "AnimeGAN")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::generation::AnimeGAN& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             cv::Mat res;
+             self.Predict(mat, &res);
+             auto ret = pybind11::array_t<unsigned char>(
+                   {res.rows, res.cols, res.channels()}, res.data);
+             return ret;
+           })
+      .def("batch_predict",
+           [](vision::generation::AnimeGAN& self, std::vector<pybind11::array>& data) {
+            std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<cv::Mat> results;
+        self.BatchPredict(images, &results);
+        std::vector<pybind11::array_t<unsigned char>> ret;
+        for(size_t i = 0; i < results.size(); ++i){
+          ret.push_back(pybind11::array_t<unsigned char>(
+                   {results[i].rows, results[i].cols, results[i].channels()}, results[i].data));
+        }
+        return ret;
+        })
+      .def_property_readonly("preprocessor", &vision::generation::AnimeGAN::GetPreprocessor)
+      .def_property_readonly("postprocessor", &vision::generation::AnimeGAN::GetPostprocessor);
+
+  pybind11::class_<vision::generation::AnimeGANPreprocessor>(
+      m, "AnimeGANPreprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::generation::AnimeGANPreprocessor& self, std::vector<pybind11::array>& im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        if (!self.Run(images, &outputs)) {
+          throw std::runtime_error("Failed to preprocess the input data in PaddleClasPreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return outputs;
+      });  
+  pybind11::class_<vision::generation::AnimeGANPostprocessor>(
+      m, "AnimeGANPostprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::generation::AnimeGANPostprocessor& self, std::vector<FDTensor>& inputs) {
+        std::vector<cv::Mat> results;
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error("Failed to postprocess the runtime result in YOLOv5Postprocessor.");
+        }
+        return results;
+      });
+
+}
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/vision/generation/contrib/postprocessor.cc b/fastdeploy/vision/generation/contrib/postprocessor.cc
new file mode 100644
index 00000000000..68dbaf8f357
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/postprocessor.cc
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/generation/contrib/postprocessor.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace generation {
+
+bool AnimeGANPostprocessor::Run(std::vector<FDTensor>& infer_results,
+                           std::vector<cv::Mat>* results) {
+  // 1. Reverse normalization
+  // 2. RGB2BGR
+  FDTensor& output_tensor = infer_results.at(0);  
+  std::vector<int64_t> shape  = output_tensor.Shape(); // n, h, w, c
+  int size = shape[1] * shape[2] * shape[3];
+  results->resize(shape[0]);
+  float* infer_result_data = reinterpret_cast<float*>(output_tensor.Data());
+  for(size_t i = 0; i < results->size(); ++i){
+    Mat result_mat = Mat::Create(shape[1], shape[2], 3, FDDataType::FP32, infer_result_data+i*size);
+    std::vector<float> mean{127.5f, 127.5f, 127.5f};
+    std::vector<float> std{127.5f, 127.5f, 127.5f};
+    Convert::Run(&result_mat, mean, std);
+    // tmp data type is float[0-1.0],convert to uint type
+    auto temp = result_mat.GetOpenCVMat();
+    cv::Mat res = cv::Mat::zeros(temp->size(), CV_8UC3);
+    temp->convertTo(res, CV_8UC3, 1);
+    Mat fd_image = WrapMat(res);
+    BGR2RGB::Run(&fd_image);
+    res = *(fd_image.GetOpenCVMat());
+    res.copyTo(results->at(i));
+  }
+  return true;
+}
+
+}  // namespace generation
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/generation/contrib/postprocessor.h b/fastdeploy/vision/generation/contrib/postprocessor.h
new file mode 100644
index 00000000000..3f3a7728bc1
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/postprocessor.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/function/functions.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace generation {
+/*! @brief Postprocessor object for AnimeGAN serials model.
+ */
+class FASTDEPLOY_DECL AnimeGANPostprocessor {
+ public:
+  /** \brief Create a postprocessor instance for AnimeGAN serials model
+   */
+  AnimeGANPostprocessor() {}
+
+  /** \brief Process the result of runtime
+   *
+   * \param[in] infer_results The inference results from runtime
+   * \param[in] results The output results of style transfer
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDTensor>& infer_results,
+                           std::vector<cv::Mat>* results);
+};
+
+}  // namespace generation
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/generation/contrib/preprocessor.cc b/fastdeploy/vision/generation/contrib/preprocessor.cc
new file mode 100644
index 00000000000..24e75fdc3b0
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/preprocessor.cc
@@ -0,0 +1,63 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/generation/contrib/preprocessor.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace generation {
+
+bool AnimeGANPreprocessor::Run(std::vector<Mat>& images, std::vector<FDTensor>* outputs) {
+  // 1. BGR2RGB
+  // 2. Convert(opencv style) or Normalize
+  for (size_t i = 0; i < images.size(); ++i) {
+      auto ret = BGR2RGB::Run(&images[i]);
+      if (!ret) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << "BGR2RGB" << "." << std::endl;
+        return false;
+      }
+      ret = Cast::Run(&images[i], "float");
+      if (!ret) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << "Cast" << "." << std::endl;
+        return false;
+      }
+      std::vector<float> mean{1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
+      std::vector<float> std {-1.f, -1.f, -1.f};
+      ret = Convert::Run(&images[i], mean, std);
+      if (!ret) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << "Cast" << "." << std::endl;
+        return false;
+      }
+    }
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images.size()); 
+  for (size_t i = 0; i < images.size(); ++i) {
+    images[i].ShareWithTensor(&(tensors[i]));
+    tensors[i].ExpandDim(0);
+  }
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+}  // namespace generation
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/generation/contrib/preprocessor.h b/fastdeploy/vision/generation/contrib/preprocessor.h
new file mode 100644
index 00000000000..4fcf94a15b1
--- /dev/null
+++ b/fastdeploy/vision/generation/contrib/preprocessor.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/function/functions.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace generation {
+/*! @brief Preprocessor object for AnimeGAN serials model.
+ */
+class FASTDEPLOY_DECL AnimeGANPreprocessor {
+ public:
+  /** \brief Create a preprocessor instance for AnimeGAN serials model
+   */
+  AnimeGANPreprocessor() {}
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned wrapped by FDMat.
+   * \param[in] output The output tensors which will feed in runtime
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<Mat>& images, std::vector<FDTensor>* output);
+};
+
+}  // namespace generation
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/generation/generation_pybind.cc b/fastdeploy/vision/generation/generation_pybind.cc
new file mode 100644
index 00000000000..d4f02612e1a
--- /dev/null
+++ b/fastdeploy/vision/generation/generation_pybind.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+
+void BindAnimeGAN(pybind11::module& m);
+
+void BindGeneration(pybind11::module& m) {
+  auto generation_module = m.def_submodule("generation", "image generation submodule");
+  BindAnimeGAN(generation_module);
+}
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc
index cecd4f7c37d..aa387b4305d 100644
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -28,6 +28,7 @@ void BindTracking(pybind11::module& m);
 void BindKeyPointDetection(pybind11::module& m);
 void BindHeadPose(pybind11::module& m);
 void BindSR(pybind11::module& m);
+void BindGeneration(pybind11::module& m);
 #ifdef ENABLE_VISION_VISUALIZE
 void BindVisualize(pybind11::module& m);
 #endif
@@ -213,6 +214,7 @@ void BindVision(pybind11::module& m) {
   BindKeyPointDetection(m);
   BindHeadPose(m);
   BindSR(m);
+  BindGeneration(m);
 #ifdef ENABLE_VISION_VISUALIZE
   BindVisualize(m);
 #endif
diff --git a/python/fastdeploy/vision/__init__.py b/python/fastdeploy/vision/__init__.py
index a5531a8a908..ba9a2d0ca99 100755
--- a/python/fastdeploy/vision/__init__.py
+++ b/python/fastdeploy/vision/__init__.py
@@ -26,6 +26,7 @@
 from . import headpose
 from . import sr
 from . import evaluation
+from . import generation
 from .utils import fd_result_to_json
 from .visualize import *
 from .. import C
diff --git a/python/fastdeploy/vision/generation/__init__.py b/python/fastdeploy/vision/generation/__init__.py
new file mode 100644
index 00000000000..f568ed84d88
--- /dev/null
+++ b/python/fastdeploy/vision/generation/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .contrib.anemigan import AnimeGAN
diff --git a/python/fastdeploy/vision/generation/contrib/__init__.py b/python/fastdeploy/vision/generation/contrib/__init__.py
new file mode 100644
index 00000000000..8034e10bfc5
--- /dev/null
+++ b/python/fastdeploy/vision/generation/contrib/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
diff --git a/python/fastdeploy/vision/generation/contrib/anemigan.py b/python/fastdeploy/vision/generation/contrib/anemigan.py
new file mode 100644
index 00000000000..eaed21c5e09
--- /dev/null
+++ b/python/fastdeploy/vision/generation/contrib/anemigan.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from .... import FastDeployModel, ModelFormat
+from .... import c_lib_wrap as C
+
+
+class AnimeGANPreprocessor:
+    def __init__(self, config_file):
+        """Create a preprocessor for AnimeGAN.
+        """
+        self._preprocessor = C.vision.generation.AnimeGANPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for AnimeGAN.
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class AnimeGANPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for AnimeGAN.
+        """
+        self._postprocessor = C.vision.generation.AnimeGANPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for AnimeGAN
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :return: results: (list) Final results
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class AnimeGAN(FastDeployModel):
+    def __init__(self,
+                 model_file,
+                 params_file="",
+                 runtime_option=None,
+                 model_format=ModelFormat.PADDLE):
+        """Load a AnimeGAN model.
+
+        :param model_file: (str)Path of model file, e.g ./model.pdmodel
+        :param params_file: (str)Path of parameters file, e.g ./model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model
+        """
+        # call super constructor to initialize self._runtime_option
+        super(AnimeGAN, self).__init__(runtime_option)
+
+        self._model = C.vision.generation.AnimeGAN(
+            model_file, params_file, self._runtime_option, model_format)
+        # assert self.initialized to confirm initialization successfully.
+        assert self.initialized, "AnimeGAN initialize failed."
+
+    def predict(self, input_image):
+        """ Predict the style transfer result for an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: style transfer result
+        """
+        return self._model.predict(input_image)
+
+    def batch_predict(self, input_images):
+        """ Predict the style transfer result for multiple input images
+
+        :param input_images: (list of numpy.ndarray)The list of input image data, each image is a 3-D array with layout HWC, BGR format
+        :return: a list of style transfer results
+        """
+        return self._model.batch_predict(input_images)
+
+    @property
+    def preprocessor(self):
+        """Get AnimeGANPreprocessor object of the loaded model
+
+        :return AnimeGANPreprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get AnimeGANPostprocessor object of the loaded model
+
+        :return AnimeGANPostprocessor
+        """
+        return self._model.postprocessor
diff --git a/tests/models/test_animegan.py b/tests/models/test_animegan.py
new file mode 100644
index 00000000000..d698b05a854
--- /dev/null
+++ b/tests/models/test_animegan.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+import numpy as np
+
+
+def test_animegan():
+    model_name = 'animegan_v1_hayao_60'
+    model_path = fd.download_model(
+        name=model_name, path='./resources', format='paddle')
+    test_img = 'https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_testimg.jpg'
+    label_img = 'https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_result.png'
+    fd.download(test_img, "./resources")
+    fd.download(label_img, "./resources")
+    # use default backend
+    runtime_option = fd.RuntimeOption()
+    runtime_option.set_paddle_mkldnn(False)
+    model_file = os.path.join(model_path, "model.pdmodel")
+    params_file = os.path.join(model_path, "model.pdiparams")
+    animegan = fd.vision.generation.AnimeGAN(
+        model_file, params_file, runtime_option=runtime_option)
+
+    src_img = cv2.imread("./resources/style_transfer_testimg.jpg")
+    label_img = cv2.imread("./resources/style_transfer_result.png")
+    res = animegan.predict(src_img)
+
+    diff = np.fabs(res.astype(np.float32) - label_img.astype(np.float32)) / 255
+    assert diff.max() < 1e-04, "There's diff in prediction."
+
+
+if __name__ == "__main__":
+    test_animegan()
diff --git a/tests/models/test_basicvsr.py b/tests/models/test_basicvsr.py
index 479343444e3..9aeabc50907 100644
--- a/tests/models/test_basicvsr.py
+++ b/tests/models/test_basicvsr.py
@@ -69,3 +69,7 @@ def test_basicvsr():
         if t >= 10:
             break
     capture.release()
+
+
+if __name__ == "__main__":
+    test_basicvsr()
diff --git a/tests/models/test_edvr.py b/tests/models/test_edvr.py
index a9f9517e7d6..a874c7d3b59 100644
--- a/tests/models/test_edvr.py
+++ b/tests/models/test_edvr.py
@@ -1,4 +1,4 @@
-test_pptracking.py  # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -74,3 +74,7 @@ def test_edvr():
         if t >= 10:
             break
     capture.release()
+
+
+if __name__ == "__main__":
+    test_edvr()