NVIDIA · venkywonka · Nov 1, 2025 · Nov 1, 2025 · Nov 4, 2025 · Nov 4, 2025
@@ -29,6 +29,8 @@
 from tensorrt_llm.bench.utils.data import (create_dataset_from_stream,
                                            initialize_tokenizer,
                                            update_metadata_for_multimodal)
+from tensorrt_llm.bench.utils.scenario import (
+    prepare_llm_api_options_for_recipe, process_recipe_scenario)
 from tensorrt_llm.logger import logger
 from tensorrt_llm.sampling_params import SamplingParams
 
@@ -45,6 +47,16 @@
     default=None,
     help="Path to a serialized TRT-LLM engine.",
 )
+@optgroup.option(
+    "--recipe",
+    type=click.Path(exists=True,
+                    readable=True,
+                    path_type=Path,
+                    resolve_path=True),
+    default=None,
+    help=
+    "Path to a recipe YAML file containing scenario and LLM API configuration. "
+    "CLI flags explicitly set will override recipe values.")
 @optgroup.option(
     "--extra_llm_api_options",
     type=str,
@@ -196,6 +208,19 @@ def latency_command(
     # Model, experiment, and engine params
     options = get_general_cli_options(params, bench_env)
 
+    # Process recipe scenario if present
+    cli_defaults = {
+        'concurrency': 1,  # Latency default is 1 (not -1 like throughput)
+        'target_input_len': None,
+        'target_output_len': None,
+        'num_requests': 0,
+        'tp': 1,
+        'pp': 1,
+        'ep': None,
+    }
+    params, options, scenario = process_recipe_scenario(params, options,
+                                                        bench_env, cli_defaults)
+
     # Speculative Decode Options
     medusa_choices = params.get("medusa_choices")
     # Initialize the HF tokenizer for the specified model.
@@ -274,7 +299,16 @@ def latency_command(
     exec_settings["performance_options"]["cuda_graphs"] = True
     exec_settings["performance_options"]["multi_block_mode"] = True
 
-    exec_settings["extra_llm_api_options"] = params.get("extra_llm_api_options")
+    # Process recipe format if detected - extract llm_api_options only
+    # Priority: --extra_llm_api_options > --recipe
+    recipe_path = params.get("recipe", None)
+    extra_llm_api_options_path = params.get("extra_llm_api_options", None)
+    config_path = extra_llm_api_options_path if extra_llm_api_options_path else recipe_path
+    # Convert Path to string if needed
+    if config_path is not None:
+        config_path = str(config_path)
+    exec_settings["extra_llm_api_options"] = prepare_llm_api_options_for_recipe(
+        config_path, scenario)
 
     # Decoding Options
     if medusa_choices is not None:

@@ -28,6 +28,8 @@
 from tensorrt_llm.bench.utils.data import (create_dataset_from_stream,
                                            initialize_tokenizer,
                                            update_metadata_for_multimodal)
+from tensorrt_llm.bench.utils.scenario import (
+    prepare_llm_api_options_for_recipe, process_recipe_scenario)
 from tensorrt_llm.llmapi import CapacitySchedulerPolicy
 from tensorrt_llm.logger import logger
 from tensorrt_llm.sampling_params import SamplingParams
@@ -60,6 +62,16 @@
     multiple=True,
     help="Paths to custom module directories to import.",
 )
+@optgroup.option(
+    "--recipe",
+    type=click.Path(exists=True,
+                    readable=True,
+                    path_type=Path,
+                    resolve_path=True),
+    default=None,
+    help=
+    "Path to a recipe YAML file containing scenario and LLM API configuration. "
+    "CLI flags explicitly set will override recipe values.")
 @optgroup.option(
     "--extra_llm_api_options",
     type=str,
@@ -302,6 +314,20 @@ def throughput_command(
     options: GeneralExecSettings = get_general_cli_options(params, bench_env)
     tokenizer = initialize_tokenizer(options.checkpoint_path)
 
+    # Process recipe scenario if present
+    cli_defaults = {
+        'concurrency': -1,
+        'target_input_len': None,
+        'target_output_len': None,
+        'num_requests': 0,
+        'tp': 1,
+        'pp': 1,
+        'ep': None,
+        'streaming': False,
+    }
+    params, options, scenario = process_recipe_scenario(params, options,
+                                                        bench_env, cli_defaults)
+
     # Extract throughput-specific options not handled by GeneralExecSettings
     max_batch_size = params.get("max_batch_size")
     max_num_tokens = params.get("max_num_tokens")
@@ -397,7 +423,17 @@ def throughput_command(
     exec_settings["settings_config"]["dynamic_max_batch_size"] = True
 
     # LlmArgs
-    exec_settings["extra_llm_api_options"] = params.pop("extra_llm_api_options")
+    # Process recipe format if detected - extract llm_api_options only
+    # Priority: --extra_llm_api_options > --recipe
+    recipe_path = params.pop("recipe", None)
+    extra_llm_api_options_path = params.pop("extra_llm_api_options", None)
+    config_path = extra_llm_api_options_path if extra_llm_api_options_path else recipe_path
+    # Convert Path to string if needed
+    if config_path is not None:
+        config_path = str(config_path)
+    exec_settings["extra_llm_api_options"] = prepare_llm_api_options_for_recipe(
+        config_path, scenario)
+
     exec_settings["iteration_log"] = options.iteration_log
 
     # Construct the runtime configuration dataclass.

@@ -84,7 +84,31 @@ def get_settings(params: dict, dataset_metadata: DatasetMetadata, model: str,
     kv_cache_config = {}
     if extra_llm_api_options:
         with open(extra_llm_api_options, 'r') as f:
-            llm_args_dict = yaml.safe_load(f)
+            loaded_data = yaml.safe_load(f)
+
+            # Detect recipe format (has 'scenario' and 'llm_api_options' keys)
+            if isinstance(
+                    loaded_data, dict
+            ) and 'scenario' in loaded_data and 'llm_api_options' in loaded_data:
+                # Recipe format - extract llm_api_options section for LLM args
+                llm_args_dict = loaded_data['llm_api_options']
+
+                # TODO: Add llm_api_options validation once PR #8331 merges
+                # (standardizes LlmArgs with Pydantic - validation will happen automatically)
+
+                # Set environment variables from 'env' section (if not already set)
+                import os
+                env_vars = loaded_data.get('env', {})
+                for key, value in env_vars.items():
+                    if key not in os.environ:
+                        os.environ[key] = str(value)
+                        logger.info(
+                            f"Set environment variable from recipe: {key}={value}"
+                        )
+            else:
+                # Simple format - use loaded data directly
+                llm_args_dict = loaded_data
+
             kv_cache_config = llm_args_dict.get("kv_cache_config", {
                 "dtype": "auto",
             })