From ed9ef8d1510c187da0036622cad7cad843ec6527 Mon Sep 17 00:00:00 2001 From: DrRyanHuang Date: Tue, 14 Oct 2025 19:15:55 +0800 Subject: [PATCH 1/3] remove CINN limitation --- fastdeploy/config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 20de8534451..fef9ecfa122 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1442,10 +1442,6 @@ def __init__( else: self.graph_opt_config.init_with_cudagrpah_size(max_capture_size=self.scheduler_config.max_num_seqs) - # TODO(wangmingkai02): change graph_opt_level=2 when using static mode with cinn - if self.graph_opt_config.graph_opt_level == 2: - self.graph_opt_config.graph_opt_level = 1 - self.tokenizer = tokenizer self.ips = ips self.tool_parser = tool_parser From a1d35be8ae9fe6f50654e44f9d8c947e3ad454d7 Mon Sep 17 00:00:00 2001 From: DrRyanHuang Date: Thu, 16 Oct 2025 20:41:03 +0800 Subject: [PATCH 2/3] fix unitest --- .../test_graph_opt_backend.py | 13 ++++++++++--- .../test_static_graph_cuda_graph_split.py | 17 +++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/graph_optimization/test_graph_opt_backend.py b/tests/graph_optimization/test_graph_opt_backend.py index 27cb4b1615f..24d5b8929f4 100644 --- a/tests/graph_optimization/test_graph_opt_backend.py +++ b/tests/graph_optimization/test_graph_opt_backend.py @@ -32,6 +32,7 @@ from fastdeploy.model_executor.graph_optimization.decorator import ( support_graph_optimization, ) +from fastdeploy.model_executor.graph_optimization.utils import sot_warmup_guard @support_graph_optimization @@ -46,7 +47,7 @@ def __init__(self, fd_config: FDConfig) -> None: def forward( self, - ids_remove_padding, + ids_remove_padding: paddle.Tensor, forward_meta: ForwardMeta, ): hidden_states = self.embed_tokens(forward_meta.ids_remove_padding) @@ -58,7 +59,7 @@ def forward( def forward_dynamic( self, - ids_remove_padding, + ids_remove_padding: paddle.Tensor, forward_meta: ForwardMeta, ): hidden_states = self.embed_tokens(forward_meta.ids_remove_padding) @@ -164,15 +165,21 @@ def _run_model_test(self, fd_config, test_name, compare_with_baseline=True): """ test_model = Attention(fd_config=fd_config, **self.model_config) + with sot_warmup_guard(True): + _ = test_model(ids_remove_padding=self.input_tensor, forward_meta=self.forward_meta) + # Run model test output = test_model(ids_remove_padding=self.input_tensor, forward_meta=self.forward_meta) # Validate results if comparison is requested if compare_with_baseline: np.testing.assert_allclose( - self.baseline_result, output.numpy(), err_msg=f"Test {test_name} failed: output mismatch" + self.baseline_result, output.numpy(), err_msg=f"Test {test_name} failed: output mismatch", + atol=1e-6 # for CINN ) + paddle.jit.sot.opcode_translator.executor.executor_cache.OpcodeExecutorCache().clear() + def test_dynamic_graph(self): """Test dynamic graph mode""" fd_config = self._setup_test_config(graph_opt_level=0, use_cudagraph=False) diff --git a/tests/graph_optimization/test_static_graph_cuda_graph_split.py b/tests/graph_optimization/test_static_graph_cuda_graph_split.py index 8dcbf2e8fbb..4ccc34f0b1d 100644 --- a/tests/graph_optimization/test_static_graph_cuda_graph_split.py +++ b/tests/graph_optimization/test_static_graph_cuda_graph_split.py @@ -14,17 +14,17 @@ # limitations under the License. """ -import os - -os.environ["FLAGS_cuda_graph_blacklist"] = "pd_op.matmul,pd_op.transpose" - - import unittest from unittest.mock import Mock import paddle import paddle.nn as nn +from fastdeploy.model_executor.graph_optimization.utils import sot_warmup_guard + +paddle.set_flags({"FLAGS_cuda_graph_blacklist": "pd_op.matmul,pd_op.transpose"}) + + from fastdeploy.config import ( CacheConfig, FDConfig, @@ -77,10 +77,10 @@ def __init__(self, fd_config: FDConfig, **kwargs): super().__init__() self.model = Attention(fd_config) - def forward(self, ids_remove_padding, forward_meta: ForwardMeta): + def forward(self, ids_remove_padding: paddle.Tensor, forward_meta: ForwardMeta): return self.model(ids_remove_padding=ids_remove_padding, forward_meta=forward_meta) - def forward_correct(self, ids_remove_padding, forward_meta: ForwardMeta): + def forward_correct(self, ids_remove_padding: paddle.Tensor, forward_meta: ForwardMeta): return self.model.forward_dynamic(ids_remove_padding=ids_remove_padding, forward_meta=forward_meta) @@ -111,7 +111,8 @@ def test(self): forward_meta1 = ForwardMeta(input_ids=x, ids_remove_padding=x, step_use_cudagraph=True) # Trigger Capture - _ = test_model1(x, forward_meta=forward_meta1) + with sot_warmup_guard(True): + _ = test_model1(x, forward_meta=forward_meta1) # Replay _ = test_model1(x, forward_meta=forward_meta1) From 630121c6ef3017b77da1bf8c26a298d590308bc5 Mon Sep 17 00:00:00 2001 From: DrRyanHuang Date: Thu, 16 Oct 2025 20:49:19 +0800 Subject: [PATCH 3/3] fix codestyle --- tests/graph_optimization/test_graph_opt_backend.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/graph_optimization/test_graph_opt_backend.py b/tests/graph_optimization/test_graph_opt_backend.py index 24d5b8929f4..6a4b56ccf53 100644 --- a/tests/graph_optimization/test_graph_opt_backend.py +++ b/tests/graph_optimization/test_graph_opt_backend.py @@ -174,8 +174,10 @@ def _run_model_test(self, fd_config, test_name, compare_with_baseline=True): # Validate results if comparison is requested if compare_with_baseline: np.testing.assert_allclose( - self.baseline_result, output.numpy(), err_msg=f"Test {test_name} failed: output mismatch", - atol=1e-6 # for CINN + self.baseline_result, + output.numpy(), + err_msg=f"Test {test_name} failed: output mismatch", + atol=1e-6, # for CINN ) paddle.jit.sot.opcode_translator.executor.executor_cache.OpcodeExecutorCache().clear()