Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion tensorrt_llm/bench/benchmark/low_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from tensorrt_llm.bench.utils.data import (create_dataset_from_stream,
initialize_tokenizer,
update_metadata_for_multimodal)
from tensorrt_llm.bench.utils.scenario import (
prepare_llm_api_options_for_recipe, process_recipe_scenario)
from tensorrt_llm.logger import logger
from tensorrt_llm.sampling_params import SamplingParams

Expand All @@ -45,6 +47,16 @@
default=None,
help="Path to a serialized TRT-LLM engine.",
)
@optgroup.option(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we make a new option group for these, we can classify them as mutually exclusive. I don't think we should allow both to be specified.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally thought since recipe contains more information, we allow and prioritize the information in extra_llm_api_options when such a conflict arises - and print a warning clearly telling which source we're prioritizing.

but i agree, its a lot easier to reduce the UX space here by making it mutually exclusive

"--recipe",
type=click.Path(exists=True,
readable=True,
path_type=Path,
resolve_path=True),
default=None,
help=
"Path to a recipe YAML file containing scenario and LLM API configuration. "
"CLI flags explicitly set will override recipe values.")
@optgroup.option(
"--extra_llm_api_options",
type=str,
Expand Down Expand Up @@ -196,6 +208,19 @@ def latency_command(
# Model, experiment, and engine params
options = get_general_cli_options(params, bench_env)

# Process recipe scenario if present
cli_defaults = {
'concurrency': 1, # Latency default is 1 (not -1 like throughput)
'target_input_len': None,
'target_output_len': None,
'num_requests': 0,
'tp': 1,
'pp': 1,
'ep': None,
}
params, options, scenario = process_recipe_scenario(params, options,
bench_env, cli_defaults)

# Speculative Decode Options
medusa_choices = params.get("medusa_choices")
# Initialize the HF tokenizer for the specified model.
Expand Down Expand Up @@ -274,7 +299,16 @@ def latency_command(
exec_settings["performance_options"]["cuda_graphs"] = True
exec_settings["performance_options"]["multi_block_mode"] = True

exec_settings["extra_llm_api_options"] = params.get("extra_llm_api_options")
# Process recipe format if detected - extract llm_api_options only
# Priority: --extra_llm_api_options > --recipe
recipe_path = params.get("recipe", None)
extra_llm_api_options_path = params.get("extra_llm_api_options", None)
config_path = extra_llm_api_options_path if extra_llm_api_options_path else recipe_path
# Convert Path to string if needed
if config_path is not None:
config_path = str(config_path)
exec_settings["extra_llm_api_options"] = prepare_llm_api_options_for_recipe(
config_path, scenario)

# Decoding Options
if medusa_choices is not None:
Expand Down
38 changes: 37 additions & 1 deletion tensorrt_llm/bench/benchmark/throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from tensorrt_llm.bench.utils.data import (create_dataset_from_stream,
initialize_tokenizer,
update_metadata_for_multimodal)
from tensorrt_llm.bench.utils.scenario import (
prepare_llm_api_options_for_recipe, process_recipe_scenario)
from tensorrt_llm.llmapi import CapacitySchedulerPolicy
from tensorrt_llm.logger import logger
from tensorrt_llm.sampling_params import SamplingParams
Expand Down Expand Up @@ -60,6 +62,16 @@
multiple=True,
help="Paths to custom module directories to import.",
)
@optgroup.option(
"--recipe",
type=click.Path(exists=True,
readable=True,
path_type=Path,
resolve_path=True),
default=None,
help=
"Path to a recipe YAML file containing scenario and LLM API configuration. "
"CLI flags explicitly set will override recipe values.")
@optgroup.option(
"--extra_llm_api_options",
type=str,
Expand Down Expand Up @@ -302,6 +314,20 @@ def throughput_command(
options: GeneralExecSettings = get_general_cli_options(params, bench_env)
tokenizer = initialize_tokenizer(options.checkpoint_path)

# Process recipe scenario if present
cli_defaults = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we populate this with what we get from the CLI?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean the cli defaults or cli args expicitly specified by the user?
in that case, the recipe will override the cli args. but we want the cli args provided by user to override the recipe right?
say the user does trtllm-bench --model <model> throughput --target_input_len=1024 --recipe recipe.yaml

basically there are 3 information sources for a given scenario arg (say, isl):

  • the default if neither CLI nor recipe specifies it
  • the value user specifies via CLI
  • the value user indirectly specifies via recipe

'concurrency': -1,
'target_input_len': None,
'target_output_len': None,
'num_requests': 0,
'tp': 1,
'pp': 1,
'ep': None,
'streaming': False,
}
params, options, scenario = process_recipe_scenario(params, options,
bench_env, cli_defaults)

# Extract throughput-specific options not handled by GeneralExecSettings
max_batch_size = params.get("max_batch_size")
max_num_tokens = params.get("max_num_tokens")
Expand Down Expand Up @@ -397,7 +423,17 @@ def throughput_command(
exec_settings["settings_config"]["dynamic_max_batch_size"] = True

# LlmArgs
exec_settings["extra_llm_api_options"] = params.pop("extra_llm_api_options")
# Process recipe format if detected - extract llm_api_options only
# Priority: --extra_llm_api_options > --recipe
recipe_path = params.pop("recipe", None)
extra_llm_api_options_path = params.pop("extra_llm_api_options", None)
config_path = extra_llm_api_options_path if extra_llm_api_options_path else recipe_path
# Convert Path to string if needed
if config_path is not None:
config_path = str(config_path)
exec_settings["extra_llm_api_options"] = prepare_llm_api_options_for_recipe(
config_path, scenario)

exec_settings["iteration_log"] = options.iteration_log

# Construct the runtime configuration dataclass.
Expand Down
26 changes: 25 additions & 1 deletion tensorrt_llm/bench/benchmark/utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,31 @@ def get_settings(params: dict, dataset_metadata: DatasetMetadata, model: str,
kv_cache_config = {}
if extra_llm_api_options:
with open(extra_llm_api_options, 'r') as f:
llm_args_dict = yaml.safe_load(f)
loaded_data = yaml.safe_load(f)

# Detect recipe format (has 'scenario' and 'llm_api_options' keys)
if isinstance(
loaded_data, dict
) and 'scenario' in loaded_data and 'llm_api_options' in loaded_data:
# Recipe format - extract llm_api_options section for LLM args
llm_args_dict = loaded_data['llm_api_options']

# TODO: Add llm_api_options validation once PR #8331 merges
# (standardizes LlmArgs with Pydantic - validation will happen automatically)

# Set environment variables from 'env' section (if not already set)
import os
env_vars = loaded_data.get('env', {})
for key, value in env_vars.items():
if key not in os.environ:
os.environ[key] = str(value)
logger.info(
f"Set environment variable from recipe: {key}={value}"
)
else:
# Simple format - use loaded data directly
llm_args_dict = loaded_data

kv_cache_config = llm_args_dict.get("kv_cache_config", {
"dtype": "auto",
})
Expand Down
Loading
Loading