Skip to content

Commit 6cbd832

Browse files
committed
[TRTLLM-8763][chore] Deprecate pybind based GuidedDecodingConfig usage in torch backend
Signed-off-by: leslie-fang25 <leslief@nvidia.com>
1 parent 858d643 commit 6cbd832

File tree

4 files changed

+29
-6
lines changed

4 files changed

+29
-6
lines changed

tensorrt_llm/_torch/pyexecutor/grammar_matcher.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import torch
77
import xgrammar
88

9-
from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams
9+
from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig
10+
11+
from ...bindings.executor import GuidedDecodingParams
1012

1113

1214
class GrammarMatcher(ABC):

tensorrt_llm/_torch/pyexecutor/guided_decoder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
import torch
77

8+
from tensorrt_llm.llmapi.llm_args import GuidedDecodingConfig
9+
810
from ..._utils import nvtx_range
9-
from ...bindings.executor import GuidedDecodingConfig, GuidedDecodingParams
11+
from ...bindings.executor import GuidedDecodingParams
1012
from ...bindings.internal.batch_manager import LlmRequestType
1113
from ...logger import logger
1214
from ..hostfunc import hostfunc

tensorrt_llm/_torch/pyexecutor/py_executor_creator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
import tensorrt_llm
1515
from tensorrt_llm._torch.pyexecutor.resource_manager import ResourceManagerType
1616
from tensorrt_llm._utils import get_sm_version, mpi_disabled
17-
from tensorrt_llm.bindings.executor import GuidedDecodingConfig
1817
from tensorrt_llm.llmapi.llm_args import (CapacitySchedulerPolicy,
19-
ContextChunkingPolicy, LoadFormat,
18+
ContextChunkingPolicy,
19+
GuidedDecodingConfig, LoadFormat,
2020
TorchLlmArgs)
2121
from tensorrt_llm.llmapi.tokenizer import (TokenizerBase,
2222
_llguidance_tokenizer_info,

tensorrt_llm/llmapi/llm_args.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
KvCacheConfig as _KvCacheConfig,
4646
LookaheadDecodingConfig as _LookaheadDecodingConfig,
4747
PeftCacheConfig as _PeftCacheConfig,
48-
SchedulerConfig as _SchedulerConfig,
49-
GuidedDecodingConfig as _GuidedDecodingConfig) # isort: skip
48+
SchedulerConfig as _SchedulerConfig) # isort: skip
5049
# isort: on
5150

5251
# yapf: enable
@@ -165,6 +164,26 @@ def _generate_cuda_graph_batch_sizes(max_batch_size: int,
165164
return batch_sizes
166165

167166

167+
class GuidedDecodingConfig(StrictBaseModel):
168+
169+
class GuidedDecodingBackend(Enum):
170+
XGRAMMAR = 0
171+
LLGUIDANCE = 1
172+
173+
backend: GuidedDecodingBackend = Field(
174+
default=GuidedDecodingBackend.XGRAMMAR,
175+
description="The backend for guided decoding config.")
176+
encoded_vocab: Optional[List[str]] = Field(
177+
default=None,
178+
description="The encoded vocab for guided decoding config.")
179+
tokenizer_str: Optional[str] = Field(
180+
default=None,
181+
description="The tokenizer string for guided decoding config.")
182+
stop_token_ids: Optional[List[int]] = Field(
183+
default=None,
184+
description="The stop token ids for guided decoding config.")
185+
186+
168187
class BaseSparseAttentionConfig(StrictBaseModel):
169188
"""
170189
Configuration for sparse attention.

0 commit comments

Comments
 (0)