PaddlePaddle · Jiang-Jia-Jun · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
diff --git a/benchmarks/paddleocr_vl/PaddleOCR-VL.yaml b/benchmarks/paddleocr_vl/PaddleOCR-VL.yaml
@@ -0,0 +1,97 @@
+
+pipeline_name: PaddleOCR-VL
+
+batch_size: 64
+
+use_queues: True
+
+use_doc_preprocessor: False
+use_layout_detection: True
+use_chart_recognition: False
+format_block_content: False
+
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: PP-DocLayoutV2
+    model_dir: null
+    batch_size: 8
+    threshold:
+      0: 0.5 # abstract
+      1: 0.5 # algorithm
+      2: 0.5 # aside_text
+      3: 0.5 # chart
+      4: 0.5 # content
+      5: 0.4 # formula
+      6: 0.4 # doc_title
+      7: 0.5 # figure_title
+      8: 0.5 # footer
+      9: 0.5 # footer
+      10: 0.5 # footnote
+      11: 0.5 # formula_number
+      12: 0.5 # header
+      13: 0.5 # header
+      14: 0.5 # image
+      15: 0.4 # formula
+      16: 0.5 # number
+      17: 0.4 # paragraph_title
+      18: 0.5 # reference
+      19: 0.5 # reference_content
+      20: 0.45 # seal
+      21: 0.5 # table
+      22: 0.4 # text
+      23: 0.4 # text
+      24: 0.5 # vision_footnote
+    layout_nms: True
+    layout_unclip_ratio: [1.0, 1.0]
+    layout_merge_bboxes_mode:
+      0: "union" # abstract
+      1: "union" # algorithm
+      2: "union" # aside_text
+      3: "large" # chart
+      4: "union" # content
+      5: "large" # display_formula
+      6: "large" # doc_title
+      7: "union" # figure_title
+      8: "union" # footer
+      9: "union" # footer
+      10: "union" # footnote
+      11: "union" # formula_number
+      12: "union" # header
+      13: "union" # header
+      14: "union" # image
+      15: "large" # inline_formula
+      16: "union" # number
+      17: "large" # paragraph_title
+      18: "union" # reference
+      19: "union" # reference_content
+      20: "union" # seal
+      21: "union" # table
+      22: "union" # text
+      23: "union" # text
+      24: "union" # vision_footnote
+  VLRecognition:
+    module_name: vl_recognition
+    model_name: PaddleOCR-VL-0.9B
+    model_dir: null
+    batch_size: 4096
+    genai_config:
+      backend: fastdeploy-server
+      server_url: http://127.0.0.1:8118/v1
+
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    batch_size: 8
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+        batch_size: 8
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null
diff --git a/benchmarks/paddleocr_vl/README.md b/benchmarks/paddleocr_vl/README.md
@@ -0,0 +1,139 @@
+## FastDeploy 服务化性能压测工具（PaddleOCR-VL）
+
+本文档主要介绍如何对 [PaddleOCR-VL](https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html) 进行性能测试。
+
+### 数据集：
+
+下载数据集到本地用于性能测试：
+
+<table>
+  <thead>
+    <tr>
+      <th>数据集</th>
+      <th>获取地址</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>OmniDocBench v1 数据集，共 981 个 pdf 文件</td>
+      <td><code>https://github.com/opendatalab/OmniDocBench</code></td>
+    </tr>
+  </tbody>
+</table>
+
+### 使用方式
+
+1. 启动 FastDeploy 服务，下面为 A100-80G 测试时使用的参数，可以根据实际情况进行调整：
+
+    ```shell
+    python -m fastdeploy.entrypoints.openai.api_server \
+            --model PaddlePaddle/PaddleOCR-VL \
+            --port 8118 \
+            --metrics-port 8471 \
+            --engine-worker-queue-port 8472 \
+            --cache-queue-port 55660 \
+            --max-model-len 16384 \
+            --max-num-batched-tokens 16384 \
+            --gpu-memory-utilization 0.7 \
+            --max-num-seqs 256 \
+            --workers 2 \
+            --graph-optimization-config '{"graph_opt_level":0, "use_cudagraph":true}'
+    ```
+
+2. 在同一环境安装依赖后启动测试脚本：
+
+    ```shell
+    # 安装依赖
+    pip install -U paddlex
+    # 启动测试脚本
+    python benchmark.py ./test_data -b 512 -o ./benchmark.json --paddlex_config_path ./PaddleOCR-VL.yaml --gpu_ids 0
+    ```
+
+    测试脚本参数说明：
+
+    <table>
+        <thead>
+            <tr>
+                <th>参数</th>
+                <th>说明</th>
+            </tr>
+        </thead>
+        <tbody>
+            <tr>
+                <td><code>input_dirs</code></td>
+                <td>输入的目录路径，会自动识别到目录下的 pdf 或图片。可以提供一个或多个。</td>
+            </tr>
+            <tr>
+                <td><code>-b, --batch_size</code></td>
+                <td>推理时使用的批处理大小。</td>
+            </tr>
+            <tr>
+                <td><code>-o, --output_path</code></td>
+                <td>输出结果文件的路径。</td>
+            </tr>
+            <tr>
+                <td><code>--paddlex_config_path</code></td>
+                <td>PaddleX 的 YAML 配置文件路径。</td>
+            </tr>
+            <tr>
+                <td><code>--gpu_ids</code></td>
+                <td>指定要使用的 GPU 设备 ID，可提供一个或多个。</td>
+            </tr>
+        </tbody>
+    </table>
+
+3. 测试结束后，会输出类似于下面的结果：
+
+    ```text
+    Throughput (file): 1.3961 files per second
+    Average latency (batch): 351.0812 seconds
+    Processed pages: 981
+    Throughput (page): 1.3961 pages per second
+    Generated tokens: 1510337
+    Throughput (token): 2149.5 tokens per second
+    GPU utilization (%): 100.0, 0.0, 68.1
+    GPU memory usage (MB): 77664.8, 58802.8, 74402.7
+    ```
+
+    输出结果说明：
+
+    <table>
+        <thead>
+            <tr>
+                <th>参数</th>
+                <th>说明</th>
+            </tr>
+        </thead>
+        <tr>
+            <td>Throughput (file)</td>
+            <td>每秒处理的文件数量</td>
+        </tr>
+        <tr>
+            <td>Average latency (batch)</td>
+            <td>每批次处理的平均延迟时间，单位为秒</td>
+        </tr>
+        <tr>
+            <td>Processed pages</td>
+            <td>已处理的页面总数</td>
+        </tr>
+        <tr>
+            <td>Throughput (page)</td>
+            <td>每秒处理的页面数量</td>
+        </tr>
+        <tr>
+            <td>Generated tokens</td>
+            <td>生成的token总数</td>
+        </tr>
+        <tr>
+            <td>Throughput (token)</td>
+            <td>每秒生成的token数量</td>
+        </tr>
+        <tr>
+            <td>GPU utilization (%)</td>
+            <td>GPU 的最大、最小、平均利用率</td>
+        </tr>
+        <tr>
+            <td>GPU memory usage (MB)</td>
+            <td>GPU 的最大、最小、平均显存占用，单位为 MB</td>
+        </tr>
+    </table>