From ed8583f8dc994a2ffb2b25337c4e8fa3f293c36e Mon Sep 17 00:00:00 2001
From: Larry the Laptop Lobster <larry@651-pudding-hill.local>
Date: Wed, 18 Feb 2026 14:49:44 -0500
Subject: [PATCH 1/8] feat: add multi-profile llm config switching across
 frontend/api/worker

---
 docs/llm_config.md                            | 108 ++++++-----
 frontend_multi_user/src/app.py                |  13 +-
 frontend_multi_user/templates/demo_run.html   |   3 +-
 frontend_multi_user/templates/index.html      |   7 +
 frontend_single_user/app.py                   |  55 ++++--
 llm_config.custom.json                        | 170 ++++++++++++++++++
 llm_config.frontier.json                      | 170 ++++++++++++++++++
 llm_config.premium.json                       | 170 ++++++++++++++++++
 mcp_cloud/app.py                              |  17 +-
 mcp_cloud/http_server.py                      |   6 +
 mcp_cloud/tests/test_speed_vs_detail.py       |  19 +-
 mcp_cloud/tool_models.py                      |   4 +
 worker_plan/Dockerfile                        |   2 +-
 worker_plan/README.md                         |   2 +
 worker_plan/app.py                            |  20 ++-
 worker_plan/worker_plan_api/model_profile.py  | 126 +++++++++++++
 worker_plan/worker_plan_api/planexe_config.py | 159 ++++++++--------
 .../tests/test_model_profile.py               |  37 ++++
 .../worker_plan_internal/llm_factory.py       |  34 +++-
 .../plan/pipeline_environment.py              |   5 +-
 .../plan/run_plan_pipeline.py                 |  36 +++-
 .../utils/planexe_llmconfig.py                |  12 +-
 worker_plan_database/Dockerfile               |   2 +-
 worker_plan_database/app.py                   |  77 ++++++--
 worker_plan_database/model_profile.py         |   9 +
 worker_plan_database/railway.toml             |   2 +-
 .../tests/test_model_profile.py               |  22 +++
 27 files changed, 1111 insertions(+), 176 deletions(-)
 create mode 100644 llm_config.custom.json
 create mode 100644 llm_config.frontier.json
 create mode 100644 llm_config.premium.json
 create mode 100644 worker_plan/worker_plan_api/model_profile.py
 create mode 100644 worker_plan/worker_plan_api/tests/test_model_profile.py
 create mode 100644 worker_plan_database/model_profile.py
 create mode 100644 worker_plan_database/tests/test_model_profile.py

diff --git a/docs/llm_config.md b/docs/llm_config.md
index 076a3fae5..3a03e98d3 100644
--- a/docs/llm_config.md
+++ b/docs/llm_config.md
@@ -1,16 +1,72 @@
 ---
-title: LLM config (llm_config.json)
+title: LLM config profiles
 ---
 
-# LLM config (llm_config.json)
+# LLM config profiles
 
-This file defines which LLM providers and models PlanExe can use. Each top‑level key is a model id used in the UI and pipeline.
+PlanExe supports **4 model profiles**:
 
-`llm_config.json` lives in the PlanExe repo root and is read at runtime. Environment variables are substituted from `.env`.
+- `baseline`
+- `premium`
+- `frontier`
+- `custom`
+
+Each profile maps to a separate config file:
+
+- `baseline` → `llm_config.json`
+- `premium` → `llm_config.premium.json`
+- `frontier` → `llm_config.frontier.json`
+- `custom` → `llm_config.custom.json` (or `PLANEXE_LLM_CONFIG_CUSTOM_FILENAME`)
+
+If the selected profile file is missing or invalid, PlanExe safely falls back to `llm_config.json`.
+
+---
+
+## How profile selection works
+
+### Runtime env var
+
+Set:
+
+- `PLANEXE_MODEL_PROFILE=baseline|premium|frontier|custom`
+
+This is passed end-to-end in worker execution paths (frontend/API/task parameters → worker pipeline).
+
+### Request/task parameter
+
+Task producers (web frontend, MCP) can include:
+
+- `model_profile`
+
+Invalid values are normalized to `baseline`.
+
+---
+
+## Strict filename validation
+
+Config filenames are strictly validated:
+
+- must be a **filename only** (no `/`, `\\`, absolute path)
+- must match: `llm_config*.json`
+
+This prevents path traversal and unsafe file selection.
+
+Legacy override `PLANEXE_LLM_CONFIG_NAME` is still supported for backward compatibility, but profile-based selection is preferred.
+
+---
+
+## Provider-priority ordering per profile
+
+Within each profile config file, priority is defined per model entry:
+
+- lower `priority` value = tried first
+- higher `priority` value = fallback order
+
+`auto` mode uses this profile-specific priority ordering.
 
 ---
 
-## File structure
+## File format (same for all profile files)
 
 ```json
 {
@@ -24,8 +80,6 @@ This file defines which LLM providers and models PlanExe can use. Each top‑lev
       "api_key": "${OPENROUTER_API_KEY}",
       "temperature": 0.1,
       "timeout": 60.0,
-      "is_function_calling_model": false,
-      "is_chat_model": true,
       "max_tokens": 8192,
       "max_retries": 5
     }
@@ -35,41 +89,11 @@ This file defines which LLM providers and models PlanExe can use. Each top‑lev
 
 ---
 
-## Top-level fields
+## Backward compatibility
 
-- **comment**: Plain‑text description for humans. Optional.
-- **priority**: Lower number = higher priority when `auto` is selected. Optional.
-- **luigi_workers**: Number of Luigi workers used for this model. Use `1` for local models (Ollama/LM Studio).
-- **class**: Provider class name (e.g., `OpenRouter`, `OpenAI`, `Ollama`, `LMStudio`, `OpenAILike`).
-- **arguments**: Provider‑specific settings passed to the LLM client.
-
----
-
-## Common arguments
-
-These keys are common across most providers:
-
-- **model** / **model_name**: Provider model identifier.
-- **api_key**: API key reference (usually `${ENV_VAR}`).
-- **base_url** / **api_base**: Override the provider base URL.
-- **temperature**: Controls randomness. Lower is more deterministic.
-- **timeout** / **request_timeout**: Max time per request in seconds.
-- **max_tokens** / **max_completion_tokens**: Output token limit (provider specific).
-- **max_retries**: Retry count on transient errors.
-- **is_function_calling_model**: Whether the model supports structured/tool output.
-- **is_chat_model**: Whether the model uses chat format.
-
----
-
-## Choosing values
-
-- Use **luigi_workers = 1** for local models (Ollama / LM Studio).
-- Use **luigi_workers > 1** for cloud models if you want parallel tasks.
-- Keep **timeout** higher for slower models.
-
----
+When no profile is provided, PlanExe defaults to:
 
-## Notes
+- `baseline`
+- `llm_config.json`
 
-- If `llm_config.json` is missing, PlanExe logs a warning and proceeds with defaults.
-- Changes to `llm_config.json` require a container restart (or rebuild if baked into the image).
+So existing deployments continue to work without changes.
diff --git a/frontend_multi_user/src/app.py b/frontend_multi_user/src/app.py
index 17e8b415c..dde0063f7 100644
--- a/frontend_multi_user/src/app.py
+++ b/frontend_multi_user/src/app.py
@@ -53,6 +53,7 @@
 
 from worker_plan_api.planexe_dotenv import DotEnvKeyEnum, PlanExeDotEnv
 from worker_plan_api.planexe_config import PlanExeConfig
+from worker_plan_api.model_profile import normalize_model_profile
 
 RUN_DIR = "run"
 
@@ -2401,6 +2402,12 @@ def run():
             if len(parameters) == 0:
                 parameters = None
 
+            # Normalize model profile to a known value with backward-compatible baseline default.
+            if not isinstance(parameters, dict):
+                parameters = {}
+            raw_profile = parameters.get("model_profile")
+            parameters["model_profile"] = normalize_model_profile(raw_profile).value
+
             # Get length of prompt_param in bytes and in characters
             prompt_param_bytes = len(prompt_param.encode('utf-8'))
             prompt_param_characters = len(prompt_param)
@@ -2502,8 +2509,10 @@ def create_plan():
             parameters.pop('user_id', None)
             parameters.pop('nonce', None)
             parameters.pop('redirect_to_plan', None)
-            if len(parameters) == 0:
-                parameters = None
+
+            # Normalize model profile to a known value with backward-compatible baseline default.
+            raw_profile = parameters.get("model_profile")
+            parameters["model_profile"] = normalize_model_profile(raw_profile).value
 
             prompt_param_bytes = len(prompt_param.encode('utf-8'))
             prompt_param_characters = len(prompt_param)
diff --git a/frontend_multi_user/templates/demo_run.html b/frontend_multi_user/templates/demo_run.html
index c588c2c9d..3b7a86d96 100644
--- a/frontend_multi_user/templates/demo_run.html
+++ b/frontend_multi_user/templates/demo_run.html
@@ -211,6 +211,7 @@ <h1>Demo Run</h1>
         <input type="hidden" name="nonce" value="{{ nonce }}">
         <!-- Values are submitted only when enabled (not disabled) -->
         <input type="hidden" name="speed_vs_detail" id="form-speed-vs-detail" value="ping_llm">
+        <input type="hidden" name="model_profile" id="form-model-profile" value="baseline">
         <input type="hidden" name="developer" id="form-developer" value="true">
     </form>
 
@@ -282,7 +283,7 @@ <h1>Demo Run</h1>
 
             if (methodSelect.value === 'GET') {
                 // GET method: build URL with query parameters
-                let url = `/run?prompt=${encodeURIComponent(promptValue)}&user_id={{ user_id }}&nonce={{ nonce }}&speed_vs_detail=${encodeURIComponent(speedVsDetailValue)}`;
+                let url = `/run?prompt=${encodeURIComponent(promptValue)}&user_id={{ user_id }}&nonce={{ nonce }}&speed_vs_detail=${encodeURIComponent(speedVsDetailValue)}&model_profile=baseline`;
                 if (developerChecked) {
                     url += '&developer';
                 }
diff --git a/frontend_multi_user/templates/index.html b/frontend_multi_user/templates/index.html
index 83e6cb8ad..1da3eb207 100644
--- a/frontend_multi_user/templates/index.html
+++ b/frontend_multi_user/templates/index.html
@@ -468,6 +468,13 @@ <h2>Start a New Plan</h2>
         <form id="new-plan-form" method="POST" action="{{ url_for('create_plan') }}">
             <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
             <input type="hidden" name="speed_vs_detail" value="all_details_but_slow">
+            <label for="model-profile" style="display:block; margin-bottom:8px; font-size:0.9rem; color:#a0aec0;">Model profile</label>
+            <select id="model-profile" name="model_profile" style="margin-bottom:12px; width:100%; max-width:240px; padding:8px; border-radius:8px;">
+                <option value="baseline" selected>baseline</option>
+                <option value="premium">premium</option>
+                <option value="frontier">frontier</option>
+                <option value="custom">custom</option>
+            </select>
             <textarea name="prompt" id="plan-prompt" placeholder="Describe your project or idea in detail. The more context you provide, the better the plan will be." required></textarea>
             <div class="char-count" id="char-count">0 characters</div>
             <div class="new-plan-footer">
diff --git a/frontend_single_user/app.py b/frontend_single_user/app.py
index 78d421569..813160193 100644
--- a/frontend_single_user/app.py
+++ b/frontend_single_user/app.py
@@ -21,6 +21,7 @@
 load_dotenv()
 from worker_plan_api.llm_info import LLMInfo, OllamaStatus
 from worker_plan_api.speedvsdetail import SpeedVsDetailEnum
+from worker_plan_api.model_profile import ModelProfileEnum
 from worker_plan_api.prompt_catalog import PromptCatalog
 
 logger = logging.getLogger(__name__)
@@ -292,6 +293,8 @@ def __init__(self):
         self.llm_model = default_model_value
         # Settings: The speedvsdetail that the user has picked.
         self.speedvsdetail = SpeedVsDetailEnum.ALL_DETAILS_BUT_SLOW
+        # Settings: selected model profile.
+        self.model_profile = ModelProfileEnum.BASELINE.value
         # The run id of the currently running pipeline process (managed by worker service).
         self.active_run_id: Optional[str] = None
         # A threading.Event used to signal that the running process should stop.
@@ -318,6 +321,7 @@ def initialize_browser_settings(browser_state, session_state: SessionState):
     openrouter_api_key = settings.get("openrouter_api_key_text", "")
     model = settings.get("model_radio", default_model_value)
     speedvsdetail = settings.get("speedvsdetail_radio", SpeedVsDetailEnum.ALL_DETAILS_BUT_SLOW)
+    model_profile = settings.get("model_profile_radio", ModelProfileEnum.BASELINE.value)
 
     # When making changes to the llm_config.json, it may happen that the selected model is no longer among the available_model_names.
     # In that case, set the model to the default_model_value.
@@ -325,12 +329,16 @@ def initialize_browser_settings(browser_state, session_state: SessionState):
         logger.info(f"initialize_browser_settings: model '{model}' is not in available_model_names. Setting to default_model_value: {default_model_value}")
         model = default_model_value
 
+    if model_profile not in [e.value for e in ModelProfileEnum]:
+        model_profile = ModelProfileEnum.BASELINE.value
+
     session_state.openrouter_api_key = openrouter_api_key
     session_state.llm_model = model
     session_state.speedvsdetail = speedvsdetail
-    return openrouter_api_key, model, speedvsdetail, browser_state, session_state
+    session_state.model_profile = model_profile
+    return openrouter_api_key, model, speedvsdetail, model_profile, browser_state, session_state
 
-def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, browser_state, session_state: SessionState):
+def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, model_profile, browser_state, session_state: SessionState):
     try:
         settings = json.loads(browser_state) if browser_state else {}
     except Exception:
@@ -338,11 +346,13 @@ def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, b
     settings["openrouter_api_key_text"] = openrouter_api_key
     settings["model_radio"] = model
     settings["speedvsdetail_radio"] = speedvsdetail
+    settings["model_profile_radio"] = model_profile
     updated_browser_state = json.dumps(settings)
     session_state.openrouter_api_key = openrouter_api_key
     session_state.llm_model = model
     session_state.speedvsdetail = speedvsdetail
-    return updated_browser_state, openrouter_api_key, model, speedvsdetail, session_state
+    session_state.model_profile = model_profile
+    return updated_browser_state, openrouter_api_key, model, speedvsdetail, model_profile, session_state
 
 def run_planner(submit_or_retry_button, plan_prompt, browser_state, session_state: SessionState):
     """
@@ -358,6 +368,7 @@ def run_planner(submit_or_retry_button, plan_prompt, browser_state, session_stat
     session_state.openrouter_api_key = settings.get("openrouter_api_key_text", session_state.openrouter_api_key)
     session_state.llm_model = settings.get("model_radio", session_state.llm_model)
     session_state.speedvsdetail = settings.get("speedvsdetail_radio", session_state.speedvsdetail)
+    session_state.model_profile = settings.get("model_profile_radio", session_state.model_profile)
 
     # Check if an OpenRouter API key is required and provided.
     if CONFIG.run_planner_check_api_key_is_provided:
@@ -394,6 +405,7 @@ def run_planner(submit_or_retry_button, plan_prompt, browser_state, session_stat
         "plan_prompt": plan_prompt,
         "llm_model": session_state.llm_model,
         "speed_vs_detail": speedvsdetail_string,
+        "model_profile": session_state.model_profile,
         "openrouter_api_key": session_state.openrouter_api_key or None,
     }
     if run_id:
@@ -714,6 +726,19 @@ def check_api_key(session_state: SessionState):
             interactive=True 
         )
 
+        model_profile_radio = gr.Radio(
+            [
+                ("Baseline", ModelProfileEnum.BASELINE.value),
+                ("Premium", ModelProfileEnum.PREMIUM.value),
+                ("Frontier", ModelProfileEnum.FRONTIER.value),
+                ("Custom", ModelProfileEnum.CUSTOM.value),
+            ],
+            value=ModelProfileEnum.BASELINE.value,
+            label="Model Profile",
+            info="Select which llm_config profile file to use.",
+            interactive=True,
+        )
+
         speedvsdetail_items = [
             ("All details, but slow", SpeedVsDetailEnum.ALL_DETAILS_BUT_SLOW),
             ("Fast, but few details", SpeedVsDetailEnum.FAST_BUT_SKIP_DETAILS),
@@ -803,8 +828,8 @@ def check_api_key(session_state: SessionState):
     # Unified change callbacks for settings.
     openrouter_api_key_text.change(
         fn=update_browser_settings_callback,
-        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, session_state]
+        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -813,8 +838,8 @@ def check_api_key(session_state: SessionState):
 
     model_radio.change(
         fn=update_browser_settings_callback,
-        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, session_state]
+        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -823,8 +848,18 @@ def check_api_key(session_state: SessionState):
 
     speedvsdetail_radio.change(
         fn=update_browser_settings_callback,
-        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, session_state]
+        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
+    ).then(
+        fn=check_api_key,
+        inputs=[session_state],
+        outputs=[api_key_warning]
+    )
+
+    model_profile_radio.change(
+        fn=update_browser_settings_callback,
+        inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -841,7 +876,7 @@ def check_api_key(session_state: SessionState):
     demo_text2plan.load(
         fn=initialize_browser_settings,
         inputs=[browser_state, session_state],
-        outputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, browser_state, session_state]
+        outputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
diff --git a/llm_config.custom.json b/llm_config.custom.json
new file mode 100644
index 000000000..3fbc022ae
--- /dev/null
+++ b/llm_config.custom.json
@@ -0,0 +1,170 @@
+{
+    "openai-paid-gpt-5-nano": {
+        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan.",
+        "luigi_workers": 4,
+        "class": "OpenAI",
+        "arguments": {
+            "model": "gpt-5-nano",
+            "api_key": "${OPENAI_API_KEY}",
+            "temperature": 1.0,
+            "timeout": 120.0,
+            "context_window": 400000,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 128000,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-openai-gpt-oss-20b": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created August 5, 2025. 131,072 context. $0.05/M input tokens. $0.20/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "openai/gpt-oss-20b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Dec 14, 2025. 262,144 context. $0.06/M input tokens. $0.24/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "nvidia/nemotron-3-nano-30b-a3b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-gemini-2.0-flash-001": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Feb 25, 2025. 1,048,576 context. $0.075/M input tokens. $0.30/M output tokens.",
+        "priority": 1,
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "google/gemini-2.0-flash-001",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-openai-gpt-4o-mini": {
+        "comment": "This is medium fast. It's paid, so check the pricing before use. Created Jul 18, 2024. 128,000 context. Starting at $0.15/M input tokens. Starting at $0.60/M output tokens.",
+        "priority": 2,
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "openai/gpt-4o-mini",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-qwen3-30b-a3b": {
+        "comment": "This is slow. It's paid, so check the pricing before use. Created Apr 28, 2025. 40,960 context. $0.08/M input tokens. $0.29/M output tokens.",
+        "priority": 3,
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "qwen/qwen3-30b-a3b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "alibabacloud-paid-qwen-flash-2025-07-28": {
+        "comment": "This is slow. It's paid, so check the pricing before use. Snapshot from 2025-07-28. 1,000,000 context. $0.05/M input tokens. $0.40/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenAILike",
+        "arguments": {
+            "model": "qwen-flash-2025-07-28",
+            "api_key": "${DASHSCOPE_API_KEY}",
+            "api_base": "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 16384,
+            "max_retries": 5
+        }
+    },
+    "ollama-llama3.1": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "llama3.1:latest",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "docker-ollama-llama3.1": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "llama3.1:latest",
+            "base_url": "http://host.docker.internal:11434",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "ollama-qwen2.5-coder": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "qwen2.5-coder:latest",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "lmstudio-qwen2.5-7b-instruct-1m": {
+        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "LMStudio",
+        "arguments": {
+            "model_name": "qwen2.5-7b-instruct-1m",
+            "base_url": "http://localhost:1234/v1",
+            "temperature": 0.2,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "docker-lmstudio-qwen2.5-7b-instruct-1m": {
+        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
+        "luigi_workers": 1,
+        "class": "LMStudio",
+        "arguments": {
+            "model_name": "qwen2.5-7b-instruct-1m",
+            "base_url": "http://host.docker.internal:1234/v1",
+            "temperature": 0.2,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    }
+}
diff --git a/llm_config.frontier.json b/llm_config.frontier.json
new file mode 100644
index 000000000..2133a1fa3
--- /dev/null
+++ b/llm_config.frontier.json
@@ -0,0 +1,170 @@
+{
+    "openai-paid-gpt-5-nano": {
+        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan.",
+        "luigi_workers": 4,
+        "class": "OpenAI",
+        "arguments": {
+            "model": "gpt-5-nano",
+            "api_key": "${OPENAI_API_KEY}",
+            "temperature": 1.0,
+            "timeout": 120.0,
+            "context_window": 400000,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 128000,
+            "max_retries": 5
+        },
+        "priority": 1
+    },
+    "openrouter-paid-openai-gpt-oss-20b": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created August 5, 2025. 131,072 context. $0.05/M input tokens. $0.20/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "openai/gpt-oss-20b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        },
+        "priority": 2
+    },
+    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Dec 14, 2025. 262,144 context. $0.06/M input tokens. $0.24/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "nvidia/nemotron-3-nano-30b-a3b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        },
+        "priority": 3
+    },
+    "openrouter-paid-gemini-2.0-flash-001": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Feb 25, 2025. 1,048,576 context. $0.075/M input tokens. $0.30/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "google/gemini-2.0-flash-001",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-openai-gpt-4o-mini": {
+        "comment": "This is medium fast. It's paid, so check the pricing before use. Created Jul 18, 2024. 128,000 context. Starting at $0.15/M input tokens. Starting at $0.60/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "openai/gpt-4o-mini",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-qwen3-30b-a3b": {
+        "comment": "This is slow. It's paid, so check the pricing before use. Created Apr 28, 2025. 40,960 context. $0.08/M input tokens. $0.29/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "qwen/qwen3-30b-a3b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "alibabacloud-paid-qwen-flash-2025-07-28": {
+        "comment": "This is slow. It's paid, so check the pricing before use. Snapshot from 2025-07-28. 1,000,000 context. $0.05/M input tokens. $0.40/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenAILike",
+        "arguments": {
+            "model": "qwen-flash-2025-07-28",
+            "api_key": "${DASHSCOPE_API_KEY}",
+            "api_base": "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 16384,
+            "max_retries": 5
+        }
+    },
+    "ollama-llama3.1": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "llama3.1:latest",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "docker-ollama-llama3.1": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "llama3.1:latest",
+            "base_url": "http://host.docker.internal:11434",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "ollama-qwen2.5-coder": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "qwen2.5-coder:latest",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "lmstudio-qwen2.5-7b-instruct-1m": {
+        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "LMStudio",
+        "arguments": {
+            "model_name": "qwen2.5-7b-instruct-1m",
+            "base_url": "http://localhost:1234/v1",
+            "temperature": 0.2,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "docker-lmstudio-qwen2.5-7b-instruct-1m": {
+        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
+        "luigi_workers": 1,
+        "class": "LMStudio",
+        "arguments": {
+            "model_name": "qwen2.5-7b-instruct-1m",
+            "base_url": "http://host.docker.internal:1234/v1",
+            "temperature": 0.2,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    }
+}
diff --git a/llm_config.premium.json b/llm_config.premium.json
new file mode 100644
index 000000000..3e0a755da
--- /dev/null
+++ b/llm_config.premium.json
@@ -0,0 +1,170 @@
+{
+    "openai-paid-gpt-5-nano": {
+        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan.",
+        "luigi_workers": 4,
+        "class": "OpenAI",
+        "arguments": {
+            "model": "gpt-5-nano",
+            "api_key": "${OPENAI_API_KEY}",
+            "temperature": 1.0,
+            "timeout": 120.0,
+            "context_window": 400000,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 128000,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-openai-gpt-oss-20b": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created August 5, 2025. 131,072 context. $0.05/M input tokens. $0.20/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "openai/gpt-oss-20b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Dec 14, 2025. 262,144 context. $0.06/M input tokens. $0.24/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "nvidia/nemotron-3-nano-30b-a3b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        }
+    },
+    "openrouter-paid-gemini-2.0-flash-001": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Feb 25, 2025. 1,048,576 context. $0.075/M input tokens. $0.30/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "google/gemini-2.0-flash-001",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        },
+        "priority": 2
+    },
+    "openrouter-paid-openai-gpt-4o-mini": {
+        "comment": "This is medium fast. It's paid, so check the pricing before use. Created Jul 18, 2024. 128,000 context. Starting at $0.15/M input tokens. Starting at $0.60/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "openai/gpt-4o-mini",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        },
+        "priority": 1
+    },
+    "openrouter-paid-qwen3-30b-a3b": {
+        "comment": "This is slow. It's paid, so check the pricing before use. Created Apr 28, 2025. 40,960 context. $0.08/M input tokens. $0.29/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenRouter",
+        "arguments": {
+            "model": "qwen/qwen3-30b-a3b",
+            "api_key": "${OPENROUTER_API_KEY}",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 8192,
+            "max_retries": 5
+        },
+        "priority": 3
+    },
+    "alibabacloud-paid-qwen-flash-2025-07-28": {
+        "comment": "This is slow. It's paid, so check the pricing before use. Snapshot from 2025-07-28. 1,000,000 context. $0.05/M input tokens. $0.40/M output tokens.",
+        "luigi_workers": 4,
+        "class": "OpenAILike",
+        "arguments": {
+            "model": "qwen-flash-2025-07-28",
+            "api_key": "${DASHSCOPE_API_KEY}",
+            "api_base": "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
+            "temperature": 0.1,
+            "timeout": 60.0,
+            "is_function_calling_model": false,
+            "is_chat_model": true,
+            "max_tokens": 16384,
+            "max_retries": 5
+        }
+    },
+    "ollama-llama3.1": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "llama3.1:latest",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "docker-ollama-llama3.1": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "llama3.1:latest",
+            "base_url": "http://host.docker.internal:11434",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "ollama-qwen2.5-coder": {
+        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "Ollama",
+        "arguments": {
+            "model": "qwen2.5-coder:latest",
+            "temperature": 0.5,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "lmstudio-qwen2.5-7b-instruct-1m": {
+        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in .venv on the host computer. No use of docker.",
+        "luigi_workers": 1,
+        "class": "LMStudio",
+        "arguments": {
+            "model_name": "qwen2.5-7b-instruct-1m",
+            "base_url": "http://localhost:1234/v1",
+            "temperature": 0.2,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    },
+    "docker-lmstudio-qwen2.5-7b-instruct-1m": {
+        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
+        "luigi_workers": 1,
+        "class": "LMStudio",
+        "arguments": {
+            "model_name": "qwen2.5-7b-instruct-1m",
+            "base_url": "http://host.docker.internal:1234/v1",
+            "temperature": 0.2,
+            "request_timeout": 120.0,
+            "is_function_calling_model": false
+        }
+    }
+}
diff --git a/mcp_cloud/app.py b/mcp_cloud/app.py
index b14173272..b78fb4f0b 100644
--- a/mcp_cloud/app.py
+++ b/mcp_cloud/app.py
@@ -29,6 +29,7 @@
 from mcp.server.stdio import stdio_server
 from mcp.types import CallToolResult, Tool, TextContent
 from pydantic import BaseModel
+from worker_plan_api.model_profile import normalize_model_profile
 
 from mcp_cloud.dotenv_utils import load_planexe_dotenv
 _dotenv_loaded, _dotenv_paths = load_planexe_dotenv(Path(__file__).parent)
@@ -148,6 +149,12 @@ def ensure_taskitem_stop_columns() -> None:
     "fast",
     "all",
 ]
+ModelProfileInput = Literal[
+    "baseline",
+    "premium",
+    "frontier",
+    "custom",
+]
 SPEED_VS_DETAIL_ALIASES = {
     "ping": "ping_llm",
     "fast": "fast_but_skip_details",
@@ -157,6 +164,7 @@ def ensure_taskitem_stop_columns() -> None:
 class TaskCreateRequest(BaseModel):
     prompt: str
     speed_vs_detail: Optional[SpeedVsDetailInput] = None
+    model_profile: Optional[ModelProfileInput] = None
     user_api_key: Optional[str] = None
 
 class TaskStatusRequest(BaseModel):
@@ -251,6 +259,7 @@ def _create_task_sync(
     with app.app_context():
         parameters = dict(config or {})
         parameters["speed_vs_detail"] = resolve_speed_vs_detail(parameters)
+        parameters["model_profile"] = normalize_model_profile(parameters.get("model_profile")).value
         parameters["trigger_source"] = "mcp task_create"
 
         task = TaskItem(
@@ -628,12 +637,17 @@ def resolve_speed_vs_detail(config: Optional[dict[str, Any]]) -> str:
 def _merge_task_create_config(
     config: Optional[dict[str, Any]],
     speed_vs_detail: Optional[str],
+    model_profile: Optional[str],
 ) -> Optional[dict[str, Any]]:
     merged = dict(config or {})
     if isinstance(speed_vs_detail, str):
         candidate = speed_vs_detail.strip()
         if candidate and "speed_vs_detail" not in merged and "speed" not in merged:
             merged["speed_vs_detail"] = candidate
+    if isinstance(model_profile, str):
+        candidate_profile = model_profile.strip()
+        if candidate_profile and "model_profile" not in merged:
+            merged["model_profile"] = candidate_profile
     return merged or None
 
 # Context var set by HTTP server so download URLs use the request's host when
@@ -908,6 +922,7 @@ async def handle_task_create(arguments: dict[str, Any]) -> CallToolResult:
     Args:
         - prompt: What the plan should cover (goal, context, constraints).
         - speed_vs_detail: Optional mode ("ping" | "fast" | "all").
+        - model_profile: Optional profile ("baseline" | "premium" | "frontier" | "custom").
 
     Returns:
         - content: JSON string matching structuredContent.
@@ -916,7 +931,7 @@ async def handle_task_create(arguments: dict[str, Any]) -> CallToolResult:
     """
     req = TaskCreateRequest(**arguments)
 
-    merged_config = _merge_task_create_config(None, req.speed_vs_detail)
+    merged_config = _merge_task_create_config(None, req.speed_vs_detail, req.model_profile)
     require_user_key = os.environ.get("PLANEXE_MCP_REQUIRE_USER_KEY", "false").lower() in ("1", "true", "yes", "on")
     user_context = None
     if req.user_api_key:
diff --git a/mcp_cloud/http_server.py b/mcp_cloud/http_server.py
index a4d51b8d0..d63142bc2 100644
--- a/mcp_cloud/http_server.py
+++ b/mcp_cloud/http_server.py
@@ -316,6 +316,7 @@ def _normalize_tool_result(result: Any) -> tuple[list[dict[str, Any]], Optional[
 
 
 SpeedVsDetailInput = Literal["ping", "fast", "all"]
+ModelProfileInput = Literal["baseline", "premium", "frontier", "custom"]
 ResultArtifactInput = Literal["report", "zip"]
 
 
@@ -327,12 +328,17 @@ async def task_create(
             description="Defaults to ping (alias for ping_llm). Options: ping, fast, all.",
         ),
     ] = "ping",
+    model_profile: Annotated[
+        ModelProfileInput,
+        Field(description="LLM profile: baseline, premium, frontier, custom."),
+    ] = "baseline",
 ) -> Annotated[CallToolResult, TaskCreateOutput]:
     """Create a new PlanExe task. Use prompt_examples first for example prompts."""
     authenticated_user_api_key = _get_authenticated_user_api_key()
     arguments: dict[str, Any] = {
         "prompt": prompt,
         "speed_vs_detail": speed_vs_detail,
+        "model_profile": model_profile,
     }
     if authenticated_user_api_key:
         arguments["user_api_key"] = authenticated_user_api_key
diff --git a/mcp_cloud/tests/test_speed_vs_detail.py b/mcp_cloud/tests/test_speed_vs_detail.py
index 9047861ac..4f9119133 100644
--- a/mcp_cloud/tests/test_speed_vs_detail.py
+++ b/mcp_cloud/tests/test_speed_vs_detail.py
@@ -27,15 +27,15 @@ def test_passthrough(self):
         self.assertEqual(resolve_speed_vs_detail({"speed_vs_detail": "ping_llm"}), "ping_llm")
 
     def test_merge_task_create_config_injects_speed(self):
-        merged = _merge_task_create_config(None, "fast")
-        self.assertEqual(merged, {"speed_vs_detail": "fast"})
+        merged = _merge_task_create_config(None, "fast", "premium")
+        self.assertEqual(merged, {"speed_vs_detail": "fast", "model_profile": "premium"})
 
     def test_merge_task_create_config_preserves_existing(self):
-        merged = _merge_task_create_config({"speed_vs_detail": "all_details_but_slow"}, "fast")
-        self.assertEqual(merged, {"speed_vs_detail": "all_details_but_slow"})
+        merged = _merge_task_create_config({"speed_vs_detail": "all_details_but_slow", "model_profile": "frontier"}, "fast", "premium")
+        self.assertEqual(merged, {"speed_vs_detail": "all_details_but_slow", "model_profile": "frontier"})
 
     def test_merge_task_create_config_ignores_blank(self):
-        merged = _merge_task_create_config({}, "   ")
+        merged = _merge_task_create_config({}, "   ", "   ")
         self.assertIsNone(merged)
 
 
@@ -49,6 +49,15 @@ def test_speed_vs_detail_rejects_invalid(self):
         with self.assertRaises(ValidationError):
             TaskCreateRequest(prompt="demo", speed_vs_detail="slow")
 
+    def test_model_profile_accepts_enum(self):
+        for value in ("baseline", "premium", "frontier", "custom"):
+            req = TaskCreateRequest(prompt="demo", model_profile=value)
+            self.assertEqual(req.model_profile, value)
+
+    def test_model_profile_rejects_invalid(self):
+        with self.assertRaises(ValidationError):
+            TaskCreateRequest(prompt="demo", model_profile="enterprise")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/mcp_cloud/tool_models.py b/mcp_cloud/tool_models.py
index 402adec02..b8caf2edb 100644
--- a/mcp_cloud/tool_models.py
+++ b/mcp_cloud/tool_models.py
@@ -123,6 +123,10 @@ class TaskCreateInput(BaseModel):
         default="ping",
         description="Defaults to ping (alias for ping_llm). Options: ping, fast, all.",
     )
+    model_profile: Literal["baseline", "premium", "frontier", "custom"] = Field(
+        default="baseline",
+        description="LLM profile mapping to llm_config.<profile>.json (baseline, premium, frontier, custom).",
+    )
     user_api_key: str | None = Field(
         default=None,
         description="Optional user API key for credits and attribution.",
diff --git a/worker_plan/Dockerfile b/worker_plan/Dockerfile
index e33442f28..adeccbe6d 100644
--- a/worker_plan/Dockerfile
+++ b/worker_plan/Dockerfile
@@ -10,7 +10,7 @@ WORKDIR /app
 
 # Copy application code and supporting files
 COPY worker_plan /app/worker_plan
-COPY llm_config.json /app/
+COPY llm_config*.json /app/
 
 RUN pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir --prefer-binary /app/worker_plan
diff --git a/worker_plan/README.md b/worker_plan/README.md
index 7e14b8782..3ddff547f 100644
--- a/worker_plan/README.md
+++ b/worker_plan/README.md
@@ -44,6 +44,8 @@ If you must stay on Python 3.14, expect source builds and potential failures; ex
 | `PLANEXE_PURGE_MAX_AGE_HOURS` | `1` | Maximum age (hours) of runs to delete when purging (scheduler and manual default). |
 | `PLANEXE_PURGE_INTERVAL_SECONDS` | `3600` | How often the purge scheduler runs when enabled. |
 | `PLANEXE_PURGE_RUN_PREFIX` | `PlanExe_` | Only purge runs whose IDs start with this prefix. |
+| `PLANEXE_MODEL_PROFILE` | `baseline` | Selects which LLM profile config to load (`baseline`, `premium`, `frontier`, `custom`). |
+| `PLANEXE_LLM_CONFIG_CUSTOM_FILENAME` | `llm_config.custom.json` | Filename used when `PLANEXE_MODEL_PROFILE=custom` (strict filename validation; invalid names fallback safely to baseline). |
 | `PLANEXE_LOG_LEVEL` | `INFO` | Sets the console log level for the worker API and the pipeline process. Accepted values are the standard logging levels (e.g., `DEBUG`, `INFO`, `WARNING`, `ERROR`). |
 
 `PLANEXE_LOG_LEVEL` affects both the FastAPI worker and the spawned pipeline logs written to stdout. File logs in `run/<id>/log.txt` always include DEBUG and above.
diff --git a/worker_plan/app.py b/worker_plan/app.py
index d022b53ea..e6b2d485c 100644
--- a/worker_plan/app.py
+++ b/worker_plan/app.py
@@ -24,6 +24,7 @@
 from worker_plan_api.filenames import FilenameEnum, ExtraFilenameEnum
 from worker_plan_api.generate_run_id import generate_run_id
 from worker_plan_api.llm_info import LLMInfo
+from worker_plan_api.model_profile import ModelProfileEnum, DEFAULT_MODEL_PROFILE, normalize_model_profile
 from worker_plan_internal.plan.pipeline_environment import PipelineEnvironmentEnum
 from worker_plan_api.plan_file import PlanFile
 from worker_plan_api.start_time import StartTime
@@ -61,6 +62,7 @@ class StartRunRequest(BaseModel):
     plan_prompt: str = Field(..., description="The user provided plan description.")
     llm_model: str = Field(..., description="LLM model identifier.")
     speed_vs_detail: str = Field(..., description="Speed vs detail preference.")
+    model_profile: str = Field(DEFAULT_MODEL_PROFILE.value, description="LLM model profile (baseline, premium, frontier, custom).")
     openrouter_api_key: Optional[str] = Field(None, description="Optional OpenRouter API key.")
     run_id: Optional[str] = Field(None, description="Existing run ID to retry.")
 
@@ -162,12 +164,19 @@ def build_display_run_dir(run_dir: Path) -> str:
     return str(run_dir)
 
 
-def build_env(run_dir: Path, llm_model: str, speed_vs_detail: str, openrouter_api_key: Optional[str]) -> Dict[str, str]:
+def build_env(
+    run_dir: Path,
+    llm_model: str,
+    speed_vs_detail: str,
+    model_profile: ModelProfileEnum,
+    openrouter_api_key: Optional[str],
+) -> Dict[str, str]:
     env = os.environ.copy()
     env[PipelineEnvironmentEnum.RUN_ID_DIR.value] = str(run_dir)
     env["PLANEXE_TASK_ID"] = run_dir.name
     env[PipelineEnvironmentEnum.LLM_MODEL.value] = llm_model
     env[PipelineEnvironmentEnum.SPEED_VS_DETAIL.value] = speed_vs_detail
+    env[PipelineEnvironmentEnum.MODEL_PROFILE.value] = model_profile.value
     if openrouter_api_key:
         env["OPENROUTER_API_KEY"] = openrouter_api_key
     return env
@@ -215,7 +224,14 @@ def start_run(request: StartRunRequest) -> StartRunResponse:
         if existing and existing.is_running():
             raise HTTPException(status_code=409, detail=f"Run {run_id} is already active.")
 
-    env = build_env(run_dir=run_dir, llm_model=request.llm_model, speed_vs_detail=request.speed_vs_detail, openrouter_api_key=request.openrouter_api_key)
+    model_profile = normalize_model_profile(request.model_profile)
+    env = build_env(
+        run_dir=run_dir,
+        llm_model=request.llm_model,
+        speed_vs_detail=request.speed_vs_detail,
+        model_profile=model_profile,
+        openrouter_api_key=request.openrouter_api_key,
+    )
     process = start_pipeline_subprocess(env)
 
     info = RunProcessInfo(
diff --git a/worker_plan/worker_plan_api/model_profile.py b/worker_plan/worker_plan_api/model_profile.py
new file mode 100644
index 000000000..168380600
--- /dev/null
+++ b/worker_plan/worker_plan_api/model_profile.py
@@ -0,0 +1,126 @@
+"""Utilities for selecting LLM model profiles and resolving profile config filenames."""
+
+from __future__ import annotations
+
+from enum import Enum
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class ModelProfileEnum(str, Enum):
+    BASELINE = "baseline"
+    PREMIUM = "premium"
+    FRONTIER = "frontier"
+    CUSTOM = "custom"
+
+
+DEFAULT_MODEL_PROFILE = ModelProfileEnum.BASELINE
+ENV_PLANEXE_MODEL_PROFILE = "PLANEXE_MODEL_PROFILE"
+ENV_PLANEXE_LLM_CONFIG_CUSTOM_FILENAME = "PLANEXE_LLM_CONFIG_CUSTOM_FILENAME"
+ENV_PLANEXE_LLM_CONFIG_NAME_LEGACY = "PLANEXE_LLM_CONFIG_NAME"
+
+# Strict filename validation:
+# - must be a filename (no path separators, no absolute path)
+# - must start with "llm_config"
+# - must end with ".json"
+_FILENAME_PATTERN = re.compile(r"^llm_config(?:\.[a-z0-9][a-z0-9._-]*)?\.json$")
+
+
+def normalize_model_profile(raw_value: Optional[str]) -> ModelProfileEnum:
+    if not isinstance(raw_value, str):
+        return DEFAULT_MODEL_PROFILE
+    candidate = raw_value.strip().lower()
+    for enum_value in ModelProfileEnum:
+        if enum_value.value == candidate:
+            return enum_value
+    logger.warning("Invalid model profile %r. Falling back to %s.", raw_value, DEFAULT_MODEL_PROFILE.value)
+    return DEFAULT_MODEL_PROFILE
+
+
+def resolve_model_profile_from_parameters(parameters: Optional[dict[str, Any]]) -> ModelProfileEnum:
+    if not isinstance(parameters, dict):
+        return DEFAULT_MODEL_PROFILE
+    raw_value = parameters.get("model_profile") or parameters.get("llm_profile")
+    return normalize_model_profile(raw_value)
+
+
+def resolve_model_profile_from_env() -> ModelProfileEnum:
+    raw_value = os.environ.get(ENV_PLANEXE_MODEL_PROFILE)
+    return normalize_model_profile(raw_value)
+
+
+def is_valid_llm_config_filename(filename: str) -> bool:
+    if not isinstance(filename, str):
+        return False
+    candidate = filename.strip()
+    if not candidate:
+        return False
+    path = Path(candidate)
+    if path.is_absolute():
+        return False
+    if "/" in candidate or "\\" in candidate:
+        return False
+    return bool(_FILENAME_PATTERN.match(candidate))
+
+
+def default_filename_for_profile(model_profile: ModelProfileEnum) -> str:
+    if model_profile == ModelProfileEnum.BASELINE:
+        return "llm_config.json"
+    if model_profile == ModelProfileEnum.PREMIUM:
+        return "llm_config.premium.json"
+    if model_profile == ModelProfileEnum.FRONTIER:
+        return "llm_config.frontier.json"
+    # CUSTOM
+    custom_name = os.environ.get(ENV_PLANEXE_LLM_CONFIG_CUSTOM_FILENAME, "llm_config.custom.json")
+    if is_valid_llm_config_filename(custom_name):
+        return custom_name.strip()
+    logger.warning(
+        "Invalid %s=%r. Falling back to llm_config.json.",
+        ENV_PLANEXE_LLM_CONFIG_CUSTOM_FILENAME,
+        custom_name,
+    )
+    return "llm_config.json"
+
+
+def resolve_llm_config_filename(
+    model_profile: Optional[ModelProfileEnum] = None,
+    explicit_filename: Optional[str] = None,
+) -> str:
+    """Resolve selected config filename.
+
+    Precedence:
+    1) explicit_filename if provided and valid
+    2) legacy env PLANEXE_LLM_CONFIG_NAME if provided and valid
+    3) profile-based default filename
+    """
+    selected_profile = model_profile or resolve_model_profile_from_env()
+
+    if isinstance(explicit_filename, str) and explicit_filename.strip():
+        explicit_candidate = explicit_filename.strip()
+        if is_valid_llm_config_filename(explicit_candidate):
+            return explicit_candidate
+        logger.warning("Invalid explicit LLM config filename %r. Ignoring.", explicit_filename)
+
+    legacy_name = os.environ.get(ENV_PLANEXE_LLM_CONFIG_NAME_LEGACY)
+    if isinstance(legacy_name, str) and legacy_name.strip():
+        legacy_candidate = legacy_name.strip()
+        if is_valid_llm_config_filename(legacy_candidate):
+            logger.info(
+                "Using legacy %s=%s. Consider migrating to %s + profile files.",
+                ENV_PLANEXE_LLM_CONFIG_NAME_LEGACY,
+                legacy_candidate,
+                ENV_PLANEXE_MODEL_PROFILE,
+            )
+            return legacy_candidate
+        logger.warning(
+            "Invalid %s=%r. Ignoring legacy override and using profile mapping.",
+            ENV_PLANEXE_LLM_CONFIG_NAME_LEGACY,
+            legacy_name,
+        )
+
+    return default_filename_for_profile(selected_profile)
diff --git a/worker_plan/worker_plan_api/planexe_config.py b/worker_plan/worker_plan_api/planexe_config.py
index cbce471ec..c16845801 100644
--- a/worker_plan/worker_plan_api/planexe_config.py
+++ b/worker_plan/worker_plan_api/planexe_config.py
@@ -1,110 +1,120 @@
 """
-Locate PlanExe's config files, like .env and llm_config.json. The .env file is optional when the environment variables are provided by the host.
-
-Finds config files by checking the following locations in order:
-1. The directory specified by the PLANEXE_CONFIG_PATH environment variable. It must be an absolute path.
-2. The current working directory (CWD).
-3. The PlanExe project root directory (three levels above this file's location).
-
-Usage: without any PLANEXE_CONFIG_PATH environment variable.
-PROMPT> python -m worker_plan_api.planexe_config
-
-Usage: with a PLANEXE_CONFIG_PATH environment variable set.
-PROMPT> PLANEXE_CONFIG_PATH='/Users/neoneye/git/PlanExeGroup/PlanExe' python -m worker_plan_api.planexe_config
-
-IDEA: validate the contents of ".env"
-IDEA: validate the contents of "llm_config.json"
+Locate PlanExe config files (.env and llm_config profile files).
 """
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional, ClassVar
+from typing import Optional
 import logging
 import os
 from enum import Enum
 
+from worker_plan_api.model_profile import (
+    DEFAULT_MODEL_PROFILE,
+    ModelProfileEnum,
+    normalize_model_profile,
+    resolve_llm_config_filename,
+    resolve_model_profile_from_env,
+)
+
 logger = logging.getLogger(__name__)
 
+
 class ConfigNameEnum(str, Enum):
     DOTENV = ".env"
-    LLM_CONFIG_JSON = "llm_config.json"
+    LLM_CONFIG_JSON_DEFAULT = "llm_config.json"
+
+
+class EnvNameEnum(str, Enum):
+    PLANEXE_CONFIG_PATH = "PLANEXE_CONFIG_PATH"
+
 
 class PlanExeConfigError(Exception):
     """Raised when there is an error with the configuration."""
     pass
 
+
 @dataclass
 class PlanExeConfig:
-    """
-    Holds the resolved paths to PlanExe configuration files and the env var value used.
-    
-    Attributes:
-        planexe_config_path: Optional[Path] - The directory specified by PLANEXE_CONFIG_PATH
-        dotenv_path: Optional[Path] - Path to the .env file
-        llm_config_json_path: Optional[Path] - Path to the llm_config.json file
-    """
+    """Resolved config paths and selected LLM profile/config file."""
+
     planexe_config_path: Optional[Path]
     dotenv_path: Optional[Path]
+    model_profile: ModelProfileEnum
+    llm_config_json_name: str
     llm_config_json_path: Optional[Path]
-    
-    _instance: ClassVar[Optional['PlanExeConfig']] = None
 
     def raise_if_required_files_not_found(self) -> None:
-        """
-        Raises a PlanExeConfigError if required configuration files are not found.
-        The .env file is optional (environment variables can be provided by the host).
-
-        :raises: PlanExeConfigError if required files are not found
-        """
         missing_files = []
         if self.llm_config_json_path is None:
-            missing_files.append(ConfigNameEnum.LLM_CONFIG_JSON.value)
-        
+            missing_files.append(self.llm_config_json_name)
+
         if missing_files:
             msg = f"Required configuration file(s) not found: {', '.join(missing_files)}"
             logger.error(msg)
             raise PlanExeConfigError(msg)
         if self.dotenv_path is None:
             logger.info("Optional configuration file '.env' not found; relying on environment variables only.")
-        # If no missing files, method completes silently.
-    
-    @classmethod
-    def load(cls) -> 'PlanExeConfig':
-        """
-        Loads configuration paths by searching predefined locations.
-        Implements a singleton pattern to avoid repeated filesystem scans.
-        
-        :return: An instance of PlanExeConfig with resolved paths.
-        """
-        if cls._instance is not None:
-            return cls._instance
 
+    @classmethod
+    def load(
+        cls,
+        model_profile_override: Optional[ModelProfileEnum | str] = None,
+        llm_config_json_name_override: Optional[str] = None,
+    ) -> 'PlanExeConfig':
         logger.debug("PlanExeConfig.load() creating a new instance...")
         planexe_config_path = cls.resolve_planexe_config_path()
-        dotenv_path = cls.find_file_in_search_order(ConfigNameEnum.DOTENV.value, planexe_config_path, is_optional=True)
-        llm_config_json_path = cls.find_file_in_search_order(ConfigNameEnum.LLM_CONFIG_JSON.value, planexe_config_path)
-        cls._instance = cls(
+
+        if isinstance(model_profile_override, ModelProfileEnum):
+            model_profile = model_profile_override
+        elif isinstance(model_profile_override, str):
+            model_profile = normalize_model_profile(model_profile_override)
+        else:
+            model_profile = resolve_model_profile_from_env()
+
+        llm_config_json_name = resolve_llm_config_filename(
+            model_profile=model_profile,
+            explicit_filename=llm_config_json_name_override,
+        )
+
+        dotenv_path = cls.find_file_in_search_order(
+            ConfigNameEnum.DOTENV.value,
+            planexe_config_path,
+            is_optional=True,
+        )
+        llm_config_json_path = cls.find_file_in_search_order(llm_config_json_name, planexe_config_path)
+
+        # Safe fallback: if profile config is missing, use baseline config.
+        if llm_config_json_path is None and llm_config_json_name != ConfigNameEnum.LLM_CONFIG_JSON_DEFAULT.value:
+            baseline_name = ConfigNameEnum.LLM_CONFIG_JSON_DEFAULT.value
+            baseline_path = cls.find_file_in_search_order(baseline_name, planexe_config_path)
+            if baseline_path is not None:
+                logger.warning(
+                    "Selected profile config %r not found. Falling back to baseline config %r.",
+                    llm_config_json_name,
+                    baseline_name,
+                )
+                llm_config_json_name = baseline_name
+                llm_config_json_path = baseline_path
+                model_profile = DEFAULT_MODEL_PROFILE
+
+        return cls(
             planexe_config_path=planexe_config_path,
             dotenv_path=dotenv_path,
-            llm_config_json_path=llm_config_json_path
+            model_profile=model_profile,
+            llm_config_json_name=llm_config_json_name,
+            llm_config_json_path=llm_config_json_path,
         )
-        return cls._instance
 
     @classmethod
     def resolve_planexe_config_path(cls) -> Optional[Path]:
-        """
-        Resolves and validates the PLANEXE_CONFIG_PATH environment variable.
-        It's expected to be an absolute path to a directory.
-        
-        :return: A Path object if valid, otherwise None.
-        """
-        path_str = os.environ.get("PLANEXE_CONFIG_PATH")
+        path_str = os.environ.get(EnvNameEnum.PLANEXE_CONFIG_PATH.value)
         if path_str is None:
             logger.debug("PLANEXE_CONFIG_PATH is not set")
             return None
-            
+
         try:
             path_obj = Path(path_str)
-        except Exception as e: # If path_str is bizarre
+        except Exception as e:
             logger.error(f"Invalid PLANEXE_CONFIG_PATH string '{path_str!r}': {e!r}")
             return None
         if not path_obj.is_absolute():
@@ -117,36 +127,23 @@ def resolve_planexe_config_path(cls) -> Optional[Path]:
         return path_obj
 
     @classmethod
-    def find_file_in_search_order(cls, filename: str, planexe_config_path: Optional[Path], is_optional: bool = False) -> Optional[Path]:
-        """
-        Finds a specific configuration file based on a precedence of locations.
-
-        Search order:
-        1. Directory from validated PLANEXE_CONFIG_PATH (if provided and valid).
-        2. Current Working Directory (CWD).
-        3. PlanExe project root.
-
-        :param filename: The name of the file to find (e.g., ".env").
-        :param planexe_config_path: The validated absolute directory path from PLANEXE_CONFIG_PATH.
-        :param is_optional: When True, missing file is logged at INFO instead of WARNING.
-        :return: The Path to the file if found, otherwise None.
-        """
-        # Step 1: Check if PLANEXE_CONFIG_PATH is set and contains the file
+    def find_file_in_search_order(
+        cls,
+        filename: str,
+        planexe_config_path: Optional[Path],
+        is_optional: bool = False,
+    ) -> Optional[Path]:
         if planexe_config_path is not None:
             config_file_path = planexe_config_path / filename
             if config_file_path.is_file():
                 logger.debug(f"Found {filename!r} at config_file_path: {config_file_path!r}")
                 return config_file_path
 
-        # Step 2: Check if file exists in current working directory
         cwd_file_path = Path.cwd() / filename
         if cwd_file_path.is_file():
             logger.debug(f"Found {filename!r} at cwd_file_path: {cwd_file_path!r}")
             return cwd_file_path
 
-        # Step 3: Check if file exists in PlanExe root directory
-        # This file is at: worker_plan/worker_plan_api/planexe_config.py
-        # So we need 3 .parent calls to reach the PlanExe root.
         root_file_path = Path(__file__).parent.parent.parent / filename
         if root_file_path.is_file():
             logger.debug(f"Found {filename!r} at root_file_path: {root_file_path!r}")
@@ -158,9 +155,9 @@ def find_file_in_search_order(cls, filename: str, planexe_config_path: Optional[
             logger.warning(f"{filename!r} not found in any of the search locations (ENV_VAR, CWD, Project Root).")
         return None
 
+
 if __name__ == "__main__":
     logging.basicConfig(level=logging.DEBUG)
     config = PlanExeConfig.load()
     print(f"config: {config!r}")
     config.raise_if_required_files_not_found()
-    
diff --git a/worker_plan/worker_plan_api/tests/test_model_profile.py b/worker_plan/worker_plan_api/tests/test_model_profile.py
new file mode 100644
index 000000000..aee211be5
--- /dev/null
+++ b/worker_plan/worker_plan_api/tests/test_model_profile.py
@@ -0,0 +1,37 @@
+import os
+import unittest
+
+from worker_plan_api.model_profile import (
+    ModelProfileEnum,
+    is_valid_llm_config_filename,
+    normalize_model_profile,
+    resolve_llm_config_filename,
+)
+
+
+class TestModelProfile(unittest.TestCase):
+    def test_normalize(self):
+        self.assertEqual(normalize_model_profile("premium"), ModelProfileEnum.PREMIUM)
+        self.assertEqual(normalize_model_profile(" invalid "), ModelProfileEnum.BASELINE)
+
+    def test_filename_validation(self):
+        self.assertTrue(is_valid_llm_config_filename("llm_config.json"))
+        self.assertTrue(is_valid_llm_config_filename("llm_config.premium.json"))
+        self.assertFalse(is_valid_llm_config_filename("../llm_config.json"))
+        self.assertFalse(is_valid_llm_config_filename("/tmp/llm_config.json"))
+
+    def test_resolve_custom_invalid_fallback(self):
+        old_custom = os.environ.get("PLANEXE_LLM_CONFIG_CUSTOM_FILENAME")
+        try:
+            os.environ["PLANEXE_LLM_CONFIG_CUSTOM_FILENAME"] = "../bad.json"
+            result = resolve_llm_config_filename(model_profile=ModelProfileEnum.CUSTOM)
+            self.assertEqual(result, "llm_config.json")
+        finally:
+            if old_custom is None:
+                os.environ.pop("PLANEXE_LLM_CONFIG_CUSTOM_FILENAME", None)
+            else:
+                os.environ["PLANEXE_LLM_CONFIG_CUSTOM_FILENAME"] = old_custom
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/worker_plan/worker_plan_internal/llm_factory.py b/worker_plan/worker_plan_internal/llm_factory.py
index f46310375..02236635f 100644
--- a/worker_plan/worker_plan_internal/llm_factory.py
+++ b/worker_plan/worker_plan_internal/llm_factory.py
@@ -6,8 +6,8 @@
 import logging
 from typing import Optional, Any
 from worker_plan_api.planexe_dotenv import PlanExeDotEnv
-from worker_plan_api.planexe_config import PlanExeConfig, PlanExeConfigError
 from worker_plan_internal.utils.planexe_llmconfig import PlanExeLLMConfig
+from worker_plan_api.model_profile import ModelProfileEnum, resolve_model_profile_from_env
 from llama_index.core.llms.llm import LLM
 # from llama_index.llms.mistralai import MistralAI
 from llama_index.llms.ollama import Ollama
@@ -31,13 +31,29 @@
 
 __all__ = ["get_llm", "LLMInfo", "get_llm_names_by_priority", "SPECIAL_AUTO_ID", "is_valid_llm_name", "obtain_llm_info"]
 
-planexe_llmconfig = PlanExeLLMConfig.load()
 
-def obtain_llm_info() -> LLMInfo:
+def _resolve_model_profile(model_profile: Optional[ModelProfileEnum | str]) -> ModelProfileEnum:
+    if isinstance(model_profile, ModelProfileEnum):
+        return model_profile
+    if isinstance(model_profile, str):
+        for enum_value in ModelProfileEnum:
+            if enum_value.value == model_profile.strip().lower():
+                return enum_value
+    return resolve_model_profile_from_env()
+
+
+def _load_llm_config(model_profile: Optional[ModelProfileEnum | str]) -> PlanExeLLMConfig:
+    resolved_profile = _resolve_model_profile(model_profile)
+    return PlanExeLLMConfig.load(model_profile=resolved_profile)
+
+
+def obtain_llm_info(model_profile: Optional[ModelProfileEnum | str] = None) -> LLMInfo:
     """
     Returns a list of available LLM names and Ollama status.
     """
 
+    planexe_llmconfig = _load_llm_config(model_profile)
+
     # Probe each Ollama service endpoint just once.
     error_message_list = []
     ollama_info_per_host = {}
@@ -124,23 +140,25 @@ def obtain_llm_info() -> LLMInfo:
         error_message_list=error_message_list,
     )
 
-def get_llm_names_by_priority() -> list[str]:
+def get_llm_names_by_priority(model_profile: Optional[ModelProfileEnum | str] = None) -> list[str]:
     """
     Returns a list of LLM names sorted by priority.
     Lowest values comes first.
     Highest values comes last.
     """
+    planexe_llmconfig = _load_llm_config(model_profile)
     configs = [(name, config) for name, config in planexe_llmconfig.llm_config_dict.items() if config.get("priority") is not None]
     configs.sort(key=lambda x: x[1].get("priority", 0))
     return [name for name, _ in configs]
 
-def is_valid_llm_name(llm_name: str) -> bool:
+def is_valid_llm_name(llm_name: str, model_profile: Optional[ModelProfileEnum | str] = None) -> bool:
     """
     Returns True if the LLM name is valid, False otherwise.
     """
+    planexe_llmconfig = _load_llm_config(model_profile)
     return llm_name in planexe_llmconfig.llm_config_dict
 
-def get_llm(llm_name: Optional[str] = None, **kwargs: Any) -> LLM:
+def get_llm(llm_name: Optional[str] = None, model_profile: Optional[ModelProfileEnum | str] = None, **kwargs: Any) -> LLM:
     """
     Returns an LLM instance based on the config.json file or a fallback default.
 
@@ -153,11 +171,13 @@ def get_llm(llm_name: Optional[str] = None, **kwargs: Any) -> LLM:
         planexe_dotenv = PlanExeDotEnv.load()
         llm_name = planexe_dotenv.get("DEFAULT_LLM", "ollama-llama3.1")
 
+    planexe_llmconfig = _load_llm_config(model_profile)
+
     if llm_name == SPECIAL_AUTO_ID:
         logger.error(f"The special {SPECIAL_AUTO_ID!r} is not a LLM model that can be created. Please use a valid LLM name.")
         raise ValueError(f"The special {SPECIAL_AUTO_ID!r} is not a LLM model that can be created. Please use a valid LLM name.")
 
-    if not is_valid_llm_name(llm_name):
+    if not is_valid_llm_name(llm_name, model_profile=model_profile):
         logger.error(f"Cannot create LLM, the llm_name {llm_name!r} is not found in llm_config.json.")
         raise ValueError(f"Cannot create LLM, the llm_name {llm_name!r} is not found in llm_config.json.")
 
diff --git a/worker_plan/worker_plan_internal/plan/pipeline_environment.py b/worker_plan/worker_plan_internal/plan/pipeline_environment.py
index 8557fec92..eac62d7b9 100644
--- a/worker_plan/worker_plan_internal/plan/pipeline_environment.py
+++ b/worker_plan/worker_plan_internal/plan/pipeline_environment.py
@@ -9,6 +9,7 @@ class PipelineEnvironmentEnum(Enum):
     RUN_ID_DIR = "RUN_ID_DIR"
     LLM_MODEL = "LLM_MODEL"
     SPEED_VS_DETAIL = "SPEED_VS_DETAIL"
+    MODEL_PROFILE = "PLANEXE_MODEL_PROFILE"
 
 @dataclass
 class PipelineEnvironment:
@@ -16,6 +17,7 @@ class PipelineEnvironment:
     run_id_dir: Optional[str] = None
     llm_model: Optional[str] = None
     speed_vs_detail: Optional[str] = None
+    model_profile: Optional[str] = None
 
     @classmethod
     def from_env(cls) -> "PipelineEnvironment":
@@ -23,7 +25,8 @@ def from_env(cls) -> "PipelineEnvironment":
         return cls(
             run_id_dir=os.environ.get(PipelineEnvironmentEnum.RUN_ID_DIR.value),
             llm_model=os.environ.get(PipelineEnvironmentEnum.LLM_MODEL.value),
-            speed_vs_detail=os.environ.get(PipelineEnvironmentEnum.SPEED_VS_DETAIL.value)
+            speed_vs_detail=os.environ.get(PipelineEnvironmentEnum.SPEED_VS_DETAIL.value),
+            model_profile=os.environ.get(PipelineEnvironmentEnum.MODEL_PROFILE.value),
         )
     
     def get_run_id_dir(self) -> Path:
diff --git a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py
index 3965077b7..a974a86c9 100644
--- a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py
+++ b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py
@@ -87,6 +87,7 @@
 from worker_plan_internal.schedule.export_gantt_mermaid import ExportGanttMermaid
 from worker_plan_internal.llm_util.llm_executor import LLMExecutor, LLMModelFromName, ShouldStopCallbackParameters, PipelineStopRequested
 from worker_plan_internal.llm_factory import get_llm_names_by_priority, SPECIAL_AUTO_ID, is_valid_llm_name
+from worker_plan_api.model_profile import ModelProfileEnum, normalize_model_profile
 from worker_plan_internal.format_json_for_use_in_query import format_json_for_use_in_query
 from worker_plan_internal.report.report_generator import ReportGenerator
 from worker_plan_internal.luigi_util.obtain_output_files import ObtainOutputFiles
@@ -3834,6 +3835,7 @@ class ExecutePipeline:
     run_id_dir: Path
     speedvsdetail: SpeedVsDetailEnum
     llm_models: list[str]
+    model_profile: ModelProfileEnum = ModelProfileEnum.BASELINE
     full_plan_pipeline_task: Optional[FullPlanPipeline] = field(default=None)
     all_expected_filenames: list[str] = field(default_factory=list)
     luigi_build_return_value: Optional[bool] = field(default=None, init=False)
@@ -3864,7 +3866,7 @@ def setup(self) -> None:
     def resolve_luigi_workers(self) -> int:
         default_workers = 1
         try:
-            llm_config = PlanExeLLMConfig.load()
+            llm_config = PlanExeLLMConfig.load(model_profile=self.model_profile)
         except Exception as exc:
             logger.warning(f"Could not load llm_config.json; defaulting Luigi workers to {default_workers}: {exc}")
             return default_workers
@@ -3892,8 +3894,12 @@ def resolve_luigi_workers(self) -> int:
         return min(workers_candidates)
     
     @classmethod
-    def resolve_llm_models(cls, specified_llm_model: Optional[str]) -> list[str]:
-        llm_models = get_llm_names_by_priority()
+    def resolve_llm_models(
+        cls,
+        specified_llm_model: Optional[str],
+        model_profile: ModelProfileEnum = ModelProfileEnum.BASELINE,
+    ) -> list[str]:
+        llm_models = get_llm_names_by_priority(model_profile=model_profile)
         if len(llm_models) == 0:
             logger.error("No LLM models found. Please check your llm_config.json file and add 'priority' values.")
             llm_models = [DEFAULT_LLM_MODEL]
@@ -3902,7 +3908,7 @@ def resolve_llm_models(cls, specified_llm_model: Optional[str]) -> list[str]:
             llm_model = specified_llm_model
             logger.info(f"Using the specified LLM model: {llm_model!r}")
             if llm_model != SPECIAL_AUTO_ID:
-                if not is_valid_llm_name(llm_model):
+                if not is_valid_llm_name(llm_model, model_profile=model_profile):
                     logger.error(f"Invalid LLM model: {llm_model!r}. Please check your llm_config.json file and add the model.")
                     raise ValueError(f"Invalid LLM model: {llm_model!r}. Please check your llm_config.json file and add the model.")
                 llm_models = [llm_model]
@@ -4120,7 +4126,13 @@ def configure_logging(run_id_dir: Path) -> int:
 
     # logger.info("Environment variables Luigi:\n" + get_env_as_string() + "\n\n\n")
 
-    llm_models = ExecutePipeline.resolve_llm_models(pipeline_environment.llm_model)
+    model_profile = normalize_model_profile(pipeline_environment.model_profile)
+    logger.info(f"Model profile: {model_profile.value}")
+
+    llm_models = ExecutePipeline.resolve_llm_models(
+        pipeline_environment.llm_model,
+        model_profile=model_profile,
+    )
 
     if speedvsdetail == SpeedVsDetailEnum.PING_LLM:
         try:
@@ -4138,9 +4150,19 @@ def configure_logging(run_id_dir: Path) -> int:
         get_dispatcher().add_event_handler(track_activity)
 
     if True:
-        execute_pipeline = ExecutePipeline(run_id_dir=run_id_dir, speedvsdetail=speedvsdetail, llm_models=llm_models)
+        execute_pipeline = ExecutePipeline(
+            run_id_dir=run_id_dir,
+            speedvsdetail=speedvsdetail,
+            llm_models=llm_models,
+            model_profile=model_profile,
+        )
     else:
-        execute_pipeline = DemoStoppingExecutePipeline(run_id_dir=run_id_dir, speedvsdetail=speedvsdetail, llm_models=llm_models)
+        execute_pipeline = DemoStoppingExecutePipeline(
+            run_id_dir=run_id_dir,
+            speedvsdetail=speedvsdetail,
+            llm_models=llm_models,
+            model_profile=model_profile,
+        )
     
     try:
         execute_pipeline.setup()
diff --git a/worker_plan/worker_plan_internal/utils/planexe_llmconfig.py b/worker_plan/worker_plan_internal/utils/planexe_llmconfig.py
index e61839760..179efc501 100644
--- a/worker_plan/worker_plan_internal/utils/planexe_llmconfig.py
+++ b/worker_plan/worker_plan_internal/utils/planexe_llmconfig.py
@@ -8,6 +8,7 @@
 from typing import Any, Dict
 import json
 from worker_plan_api.planexe_config import PlanExeConfig
+from worker_plan_api.model_profile import ModelProfileEnum
 from worker_plan_api.planexe_dotenv import PlanExeDotEnv
 import logging
 
@@ -20,8 +21,15 @@ class PlanExeLLMConfig:
     llm_config_dict: dict[str, Any]
 
     @classmethod
-    def load(cls):
-        config = PlanExeConfig.load()
+    def load(
+        cls,
+        model_profile: ModelProfileEnum | str | None = None,
+        llm_config_json_name_override: str | None = None,
+    ):
+        config = PlanExeConfig.load(
+            model_profile_override=model_profile,
+            llm_config_json_name_override=llm_config_json_name_override,
+        )
         config.raise_if_required_files_not_found()
         planexe_dotenv = PlanExeDotEnv.load()
 
diff --git a/worker_plan_database/Dockerfile b/worker_plan_database/Dockerfile
index 5cc36db64..3b2d9cedc 100644
--- a/worker_plan_database/Dockerfile
+++ b/worker_plan_database/Dockerfile
@@ -14,7 +14,7 @@ WORKDIR /app
 COPY worker_plan /app/worker_plan
 COPY worker_plan_database /app/worker_plan_database
 COPY database_api /app/database_api
-COPY llm_config.json /app/
+COPY llm_config*.json /app/
 
 # Install the core planexe package plus Flask support and database drivers.
 RUN set -eux; \
diff --git a/worker_plan_database/app.py b/worker_plan_database/app.py
index 71fc3d994..53ce4d11b 100644
--- a/worker_plan_database/app.py
+++ b/worker_plan_database/app.py
@@ -14,6 +14,7 @@
 import logging
 from pathlib import Path
 from typing import Optional
+from worker_plan_api.model_profile import ModelProfileEnum
 from urllib.parse import quote_plus
 import uuid
 import io
@@ -141,6 +142,7 @@
     from database_api.model_credit_history import CreditHistory
     from database_api.model_token_metrics import TokenMetrics
     from worker_plan_database.speedvsdetail import resolve_speedvsdetail
+    from worker_plan_database.model_profile import resolve_model_profile
     from worker_plan_database.machai import MachAI
     from flask import Flask
     logger.debug("All modules imported successfully.")
@@ -321,8 +323,20 @@ def update_task_progress_with_retry(task_id: str, progress_percentage: float, pr
 
 
 class ServerExecutePipeline(ExecutePipeline):
-    def __init__(self, task_id: str, run_id_dir: Path, speedvsdetail: SpeedVsDetailEnum, llm_models: list[str]):
-        super().__init__(run_id_dir=run_id_dir, speedvsdetail=speedvsdetail, llm_models=llm_models)
+    def __init__(
+        self,
+        task_id: str,
+        run_id_dir: Path,
+        speedvsdetail: SpeedVsDetailEnum,
+        llm_models: list[str],
+        model_profile: ModelProfileEnum,
+    ):
+        super().__init__(
+            run_id_dir=run_id_dir,
+            speedvsdetail=speedvsdetail,
+            llm_models=llm_models,
+            model_profile=model_profile,
+        )
         self.task_id = task_id
 
     def _handle_task_completion(self, parameters: HandleTaskCompletionParameters) -> None:
@@ -591,19 +605,38 @@ def upload_report_to_worker_plan(run_id: str, report_path: Path) -> None:
             response.text[:500],
         )
 
-def execute_pipeline_for_job(task_id: str, user_id: str, run_id_dir: Path, speedvsdetail: SpeedVsDetailEnum, use_machai_developer_endpoint: bool):
+def execute_pipeline_for_job(
+    task_id: str,
+    user_id: str,
+    run_id_dir: Path,
+    speedvsdetail: SpeedVsDetailEnum,
+    model_profile: ModelProfileEnum,
+    use_machai_developer_endpoint: bool,
+):
     start_time = time.time()
-    logger.info(f"Executing pipeline for task_id: {task_id!r}, run_id_dir: {run_id_dir!r}, speedvsdetail: {speedvsdetail!r}, use_machai_developer_endpoint: {use_machai_developer_endpoint!r}...")
-
-    llm_models = ExecutePipeline.resolve_llm_models(None)
-    pipeline_instance = ServerExecutePipeline(task_id=task_id, run_id_dir=run_id_dir, speedvsdetail=speedvsdetail, llm_models=llm_models)
+    logger.info(
+        f"Executing pipeline for task_id: {task_id!r}, run_id_dir: {run_id_dir!r}, "
+        f"speedvsdetail: {speedvsdetail!r}, model_profile: {model_profile.value!r}, "
+        f"use_machai_developer_endpoint: {use_machai_developer_endpoint!r}..."
+    )
+
+    llm_models = ExecutePipeline.resolve_llm_models(None, model_profile=model_profile)
+    pipeline_instance = ServerExecutePipeline(
+        task_id=task_id,
+        run_id_dir=run_id_dir,
+        speedvsdetail=speedvsdetail,
+        llm_models=llm_models,
+        model_profile=model_profile,
+    )
     # Keep a Flask app context active while running pipeline tasks so db-backed
     # instrumentation (for example token metrics) can access db.session safely.
     with app.app_context():
         set_current_task_id(task_id)
         set_current_user_id(user_id)
         previous_track_activity_path = track_activity.jsonl_file_path
+        previous_model_profile = os.environ.get("PLANEXE_MODEL_PROFILE")
         try:
+            os.environ["PLANEXE_MODEL_PROFILE"] = model_profile.value
             # Always keep activity tracking in the task run directory, including PING_LLM mode.
             track_activity.jsonl_file_path = run_id_dir / ExtraFilenameEnum.TRACK_ACTIVITY_JSONL.value
 
@@ -619,6 +652,10 @@ def execute_pipeline_for_job(task_id: str, user_id: str, run_id_dir: Path, speed
 
                 pipeline_instance.run()
         finally:
+            if previous_model_profile is None:
+                os.environ.pop("PLANEXE_MODEL_PROFILE", None)
+            else:
+                os.environ["PLANEXE_MODEL_PROFILE"] = previous_model_profile
             track_activity.jsonl_file_path = previous_track_activity_path
             set_current_user_id(None)
             set_current_task_id(None)
@@ -650,7 +687,8 @@ def execute_pipeline_for_job(task_id: str, user_id: str, run_id_dir: Path, speed
         "task_id": str(task_id), 
         "user_id": str(user_id), 
         "run_id_dir": str(run_id_dir), 
-        "speedvsdetail": str(speedvsdetail), 
+        "speedvsdetail": str(speedvsdetail),
+        "model_profile": model_profile.value,
         "duration_between_processing_and_completion": str(duration_in_seconds),
         "has_report_file": str(pipeline_instance.has_report_file),
         "has_stop_flag_file": str(pipeline_instance.has_stop_flag_file),
@@ -760,6 +798,7 @@ def process_pending_tasks() -> bool:
     user_id: Optional[str] = None
     timestamp_created: Optional[datetime] = None
     speedvsdetail: SpeedVsDetailEnum = SpeedVsDetailEnum.ALL_DETAILS_BUT_SLOW
+    model_profile: ModelProfileEnum = ModelProfileEnum.BASELINE
 
     with app.app_context():
         try:
@@ -790,6 +829,7 @@ def process_pending_tasks() -> bool:
                 prompt = str(task_to_claim.prompt)
                 parameters = task_to_claim.parameters if isinstance(task_to_claim.parameters, dict) else None
                 speedvsdetail = resolve_speedvsdetail(parameters)
+                model_profile = resolve_model_profile(parameters)
                 use_machai_developer_endpoint = bool(task_to_claim.has_parameter_key('developer'))
                 user_id = str(task_to_claim.user_id)
                 timestamp_created = task_to_claim.timestamp_created
@@ -809,7 +849,11 @@ def process_pending_tasks() -> bool:
             return False # Error, sleep longer
 
 
-    logger.info(f"Successfully claimed task: {task_id!r}, user_id: {user_id!r}, timestamp_created: {timestamp_created!r}, use_machai_developer_endpoint: {use_machai_developer_endpoint!r}")
+    logger.info(
+        f"Successfully claimed task: {task_id!r}, user_id: {user_id!r}, "
+        f"timestamp_created: {timestamp_created!r}, model_profile: {model_profile.value!r}, "
+        f"use_machai_developer_endpoint: {use_machai_developer_endpoint!r}"
+    )
 
     with app.app_context():
         WorkerItem.upsert_heartbeat(worker_id=WORKER_ID, current_task_id=task_id)
@@ -840,7 +884,8 @@ def process_pending_tasks() -> bool:
             "task_id": str(task_id), 
             "user_id": str(user_id), 
             "run_id_dir": str(run_id_dir), 
-            "speedvsdetail": str(speedvsdetail), 
+            "speedvsdetail": str(speedvsdetail),
+            "model_profile": model_profile.value,
             "duration_between_pending_and_processing": str(duration_between_pending_and_processing),
             "WORKER_ID": str(WORKER_ID)
         }
@@ -854,7 +899,14 @@ def process_pending_tasks() -> bool:
 
     try:
         # Create run directory and execute pipeline
-        execute_pipeline_for_job(task_id=task_id, user_id=user_id, run_id_dir=run_id_dir, speedvsdetail=speedvsdetail, use_machai_developer_endpoint=use_machai_developer_endpoint)
+        execute_pipeline_for_job(
+            task_id=task_id,
+            user_id=user_id,
+            run_id_dir=run_id_dir,
+            speedvsdetail=speedvsdetail,
+            model_profile=model_profile,
+            use_machai_developer_endpoint=use_machai_developer_endpoint,
+        )
         with app.app_context():
             WorkerItem.upsert_heartbeat(worker_id=WORKER_ID)
         return True # We just processed a task. There may be more pending tasks, don't sleep that long, so we can process the next task.
@@ -873,7 +925,8 @@ def process_pending_tasks() -> bool:
                 "task_id": str(task_id), 
                 "user_id": str(user_id), 
                 "run_id_dir": str(run_id_dir), 
-                "speedvsdetail": str(speedvsdetail), 
+                "speedvsdetail": str(speedvsdetail),
+                "model_profile": model_profile.value,
                 "duration_between_pending_and_processing": str(duration_between_pending_and_processing),
                 "WORKER_ID": str(WORKER_ID),
                 "machai_error_message": str(machai_error_message),
diff --git a/worker_plan_database/model_profile.py b/worker_plan_database/model_profile.py
new file mode 100644
index 000000000..f069134fc
--- /dev/null
+++ b/worker_plan_database/model_profile.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from worker_plan_api.model_profile import ModelProfileEnum, resolve_model_profile_from_parameters
+
+
+def resolve_model_profile(parameters: Optional[dict[str, Any]]) -> ModelProfileEnum:
+    return resolve_model_profile_from_parameters(parameters)
diff --git a/worker_plan_database/railway.toml b/worker_plan_database/railway.toml
index 9f7358c02..4eb72e976 100644
--- a/worker_plan_database/railway.toml
+++ b/worker_plan_database/railway.toml
@@ -1,7 +1,7 @@
 [build]
 builder = "DOCKERFILE"
 dockerfilePath = "/worker_plan_database/Dockerfile"
-watchPatterns = ["/worker_plan_database/**", "/worker_plan/**", "/database_api/**", "/llm_config.json"]
+watchPatterns = ["/worker_plan_database/**", "/worker_plan/**", "/database_api/**", "/llm_config.json", "/llm_config.premium.json", "/llm_config.frontier.json", "/llm_config.custom.json"]
 context = "."
 
 [deploy]
diff --git a/worker_plan_database/tests/test_model_profile.py b/worker_plan_database/tests/test_model_profile.py
new file mode 100644
index 000000000..68ab2566e
--- /dev/null
+++ b/worker_plan_database/tests/test_model_profile.py
@@ -0,0 +1,22 @@
+import unittest
+
+from worker_plan_database.model_profile import resolve_model_profile
+from worker_plan_api.model_profile import ModelProfileEnum
+
+
+class TestModelProfile(unittest.TestCase):
+    def test_default_baseline(self):
+        self.assertEqual(resolve_model_profile(None), ModelProfileEnum.BASELINE)
+
+    def test_accepts_model_profile(self):
+        self.assertEqual(resolve_model_profile({"model_profile": "premium"}), ModelProfileEnum.PREMIUM)
+
+    def test_accepts_legacy_llm_profile(self):
+        self.assertEqual(resolve_model_profile({"llm_profile": "frontier"}), ModelProfileEnum.FRONTIER)
+
+    def test_invalid_falls_back_to_baseline(self):
+        self.assertEqual(resolve_model_profile({"model_profile": "unknown"}), ModelProfileEnum.BASELINE)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 7517bd35ff75e3ddb72705c51c30cc10989a5748 Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 16:15:51 -0500
Subject: [PATCH 2/8] feat(ui): display model lists per profile in frontend and
 single-user UI

Add dynamic model list rendering based on selected profile in both multi-user frontend and single-user Gradio app. Mount llm_config*.json files into frontend_multi_user container, parse each profile's config to extract model names sorted by priority, and display them in the UI. Update llm_config.custom.json with cleaner priority-based model ordering and remove unused entries.
---
 docker-compose.yml                       |   7 ++
 frontend_multi_user/Dockerfile           |   1 +
 frontend_multi_user/src/app.py           |  40 +++++-
 frontend_multi_user/templates/index.html |  44 ++++++-
 frontend_single_user/app.py              |  68 +++++++++--
 llm_config.custom.json                   | 128 +++-----------------
 llm_config.frontier.json                 | 148 ++++-------------------
 llm_config.premium.json                  |  83 +++++--------
 8 files changed, 215 insertions(+), 304 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 75252f51a..812fa6d59 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -196,6 +196,13 @@ services:
       PLANEXE_FRONTEND_MULTIUSER_ADMIN_PASSWORD: ${PLANEXE_FRONTEND_MULTIUSER_ADMIN_PASSWORD:-admin}
     ports:
       - "${PLANEXE_FRONTEND_MULTIUSER_PORT:-5001}:5000"
+    volumes:
+      - ./.env:/app/.env:ro
+      - ./llm_config.json:/app/llm_config.json:ro
+      - ./llm_config.premium.json:/app/llm_config.premium.json:ro
+      - ./llm_config.frontier.json:/app/llm_config.frontier.json:ro
+      - ./llm_config.custom.json:/app/llm_config.custom.json:ro
+      - ./run:/app/run
     healthcheck:
       test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/healthcheck').read()"]
       interval: 10s
diff --git a/frontend_multi_user/Dockerfile b/frontend_multi_user/Dockerfile
index 81aaca529..7d7dca39f 100644
--- a/frontend_multi_user/Dockerfile
+++ b/frontend_multi_user/Dockerfile
@@ -13,6 +13,7 @@ WORKDIR /app
 COPY worker_plan/worker_plan_api /app/worker_plan_api
 COPY database_api /app/database_api
 COPY frontend_multi_user /app/frontend_multi_user
+COPY llm_config*.json /app/
 
 # Install dependencies from frontend_multi_user pyproject
 RUN set -eux; \
diff --git a/frontend_multi_user/src/app.py b/frontend_multi_user/src/app.py
index dde0063f7..af16e9d93 100644
--- a/frontend_multi_user/src/app.py
+++ b/frontend_multi_user/src/app.py
@@ -53,7 +53,7 @@
 
 from worker_plan_api.planexe_dotenv import DotEnvKeyEnum, PlanExeDotEnv
 from worker_plan_api.planexe_config import PlanExeConfig
-from worker_plan_api.model_profile import normalize_model_profile
+from worker_plan_api.model_profile import ModelProfileEnum, normalize_model_profile
 
 RUN_DIR = "run"
 
@@ -123,6 +123,43 @@ def wrapper(*args, **kwargs):
         return view(*args, **kwargs)
     return wrapper
 
+
+def _profile_model_name_map() -> Dict[str, list[str]]:
+    profile_to_models: Dict[str, list[str]] = {}
+    for profile in ModelProfileEnum:
+        config = PlanExeConfig.load(model_profile_override=profile)
+        config_path = config.llm_config_json_path
+        if config_path is None:
+            profile_to_models[profile.value] = []
+            continue
+        try:
+            with config_path.open("r", encoding="utf-8") as fh:
+                model_map = json.load(fh)
+        except Exception:
+            profile_to_models[profile.value] = []
+            continue
+        if not isinstance(model_map, dict):
+            profile_to_models[profile.value] = []
+            continue
+
+        def sort_key(item: tuple[str, dict]) -> tuple[int, str]:
+            data = item[1] if isinstance(item[1], dict) else {}
+            priority = data.get("priority")
+            if not isinstance(priority, int):
+                priority = 999999
+            return priority, item[0]
+
+        names: list[str] = []
+        for model_id, model_data in sorted(model_map.items(), key=sort_key):
+            model_name = model_id
+            if isinstance(model_data, dict):
+                args = model_data.get("arguments")
+                if isinstance(args, dict) and isinstance(args.get("model"), str):
+                    model_name = args["model"]
+            names.append(model_name)
+        profile_to_models[profile.value] = names
+    return profile_to_models
+
 class MyFlaskApp:
     def __init__(self):
         logger.info(f"MyFlaskApp.__init__. Starting...")
@@ -1888,6 +1925,7 @@ def index():
                 nonce=nonce,
                 user_id=user_id,
                 example_prompts=example_prompts,
+                model_profile_models_json=json.dumps(_profile_model_name_map()),
             )
 
         @self.app.route('/healthcheck')
diff --git a/frontend_multi_user/templates/index.html b/frontend_multi_user/templates/index.html
index 1da3eb207..31295b88c 100644
--- a/frontend_multi_user/templates/index.html
+++ b/frontend_multi_user/templates/index.html
@@ -470,11 +470,19 @@ <h2>Start a New Plan</h2>
             <input type="hidden" name="speed_vs_detail" value="all_details_but_slow">
             <label for="model-profile" style="display:block; margin-bottom:8px; font-size:0.9rem; color:#a0aec0;">Model profile</label>
             <select id="model-profile" name="model_profile" style="margin-bottom:12px; width:100%; max-width:240px; padding:8px; border-radius:8px;">
-                <option value="baseline" selected>baseline</option>
-                <option value="premium">premium</option>
-                <option value="frontier">frontier</option>
-                <option value="custom">custom</option>
+                <option value="baseline" selected>baseline (default balanced)</option>
+                <option value="premium">premium (higher-cost ordering)</option>
+                <option value="frontier">frontier (highest-capability ordering)</option>
+                <option value="custom">custom (your custom file)</option>
             </select>
+            <div style="margin-bottom:12px; font-size:0.85rem; color:#6b7280; line-height:1.4;">
+                baseline -> <code>llm_config.json</code>,
+                premium -> <code>llm_config.premium.json</code>,
+                frontier -> <code>llm_config.frontier.json</code>,
+                custom -> <code>llm_config.custom.json</code> (or <code>PLANEXE_LLM_CONFIG_CUSTOM_FILENAME</code>).
+                The actual models are read from the selected file's priority order.
+            </div>
+            <div id="model-profile-models" style="margin-bottom:12px; font-size:0.85rem; color:#4b5563; line-height:1.4;"></div>
             <textarea name="prompt" id="plan-prompt" placeholder="Describe your project or idea in detail. The more context you provide, the better the plan will be." required></textarea>
             <div class="char-count" id="char-count">0 characters</div>
             <div class="new-plan-footer">
@@ -593,4 +601,32 @@ <h3>Avoid Surprises</h3>
     });
 </script>
 {% endif %}
+{% if user %}
+<script>
+    var profileToModels = {{ model_profile_models_json | safe }};
+    var profileSelect = document.getElementById('model-profile');
+    var profileModelsDiv = document.getElementById('model-profile-models');
+
+    function renderProfileModels() {
+        if (!profileSelect || !profileModelsDiv) {
+            return;
+        }
+        var profile = profileSelect.value || 'baseline';
+        var models = profileToModels[profile] || [];
+        if (models.length === 0) {
+            profileModelsDiv.innerHTML = '<strong>Models in ' + profile + ':</strong> none found';
+            return;
+        }
+        var lines = models.map(function(modelName) {
+            return '<li><code>' + modelName + '</code></li>';
+        }).join('');
+        profileModelsDiv.innerHTML = '<strong>Models in ' + profile + ':</strong><ul style="margin:6px 0 0 16px;">' + lines + '</ul>';
+    }
+
+    if (profileSelect) {
+        profileSelect.addEventListener('change', renderProfileModels);
+    }
+    renderProfileModels();
+</script>
+{% endif %}
 {% endblock %}
diff --git a/frontend_single_user/app.py b/frontend_single_user/app.py
index 813160193..59bcfdd32 100644
--- a/frontend_single_user/app.py
+++ b/frontend_single_user/app.py
@@ -21,7 +21,8 @@
 load_dotenv()
 from worker_plan_api.llm_info import LLMInfo, OllamaStatus
 from worker_plan_api.speedvsdetail import SpeedVsDetailEnum
-from worker_plan_api.model_profile import ModelProfileEnum
+from worker_plan_api.model_profile import ModelProfileEnum, default_filename_for_profile
+from worker_plan_api.planexe_config import PlanExeConfig
 from worker_plan_api.prompt_catalog import PromptCatalog
 
 logger = logging.getLogger(__name__)
@@ -243,6 +244,42 @@ def fetch_llm_info_with_retry(max_attempts: int = 15, delay_seconds: float = 2.0
     tuple_item = (config_item.label, config_item.id)
     available_model_names.append(tuple_item)
 
+
+def _profile_models_markdown(profile_value: str) -> str:
+    try:
+        profile = ModelProfileEnum(profile_value)
+    except Exception:
+        profile = ModelProfileEnum.BASELINE
+    profile_config = PlanExeConfig.load(model_profile_override=profile)
+    profile_path = profile_config.llm_config_json_path
+    profile_filename = profile_config.llm_config_json_name or default_filename_for_profile(profile)
+    if profile_path is None:
+        return f"**Models in `{profile.value}`** (`{profile_filename}`)\n- Config file not found."
+    try:
+        with profile_path.open("r", encoding="utf-8") as fh:
+            model_map = json.load(fh)
+    except Exception as exc:
+        return f"**Models in `{profile.value}`** (`{profile_filename}`)\n- Failed to read config: `{exc}`"
+    if not isinstance(model_map, dict) or len(model_map) == 0:
+        return f"**Models in `{profile.value}`** (`{profile_filename}`)\n- No models configured."
+
+    def sort_key(item: tuple[str, dict]) -> tuple[int, str]:
+        data = item[1] if isinstance(item[1], dict) else {}
+        priority = data.get("priority")
+        if not isinstance(priority, int):
+            priority = 999999
+        return priority, item[0]
+
+    rows: list[str] = []
+    for model_id, model_data in sorted(model_map.items(), key=sort_key):
+        model_name = model_id
+        if isinstance(model_data, dict):
+            arguments = model_data.get("arguments")
+            if isinstance(arguments, dict) and isinstance(arguments.get("model"), str):
+                model_name = arguments["model"]
+        rows.append(f"- `{model_name}`")
+    return "\n".join([f"**Models in `{profile.value}`** (`{profile_filename}`):"] + rows)
+
 class MarkdownBuilder:
     """
     Helper class to build Markdown-formatted strings.
@@ -336,7 +373,7 @@ def initialize_browser_settings(browser_state, session_state: SessionState):
     session_state.llm_model = model
     session_state.speedvsdetail = speedvsdetail
     session_state.model_profile = model_profile
-    return openrouter_api_key, model, speedvsdetail, model_profile, browser_state, session_state
+    return openrouter_api_key, model, speedvsdetail, model_profile, _profile_models_markdown(model_profile), browser_state, session_state
 
 def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, model_profile, browser_state, session_state: SessionState):
     try:
@@ -352,7 +389,7 @@ def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, m
     session_state.llm_model = model
     session_state.speedvsdetail = speedvsdetail
     session_state.model_profile = model_profile
-    return updated_browser_state, openrouter_api_key, model, speedvsdetail, model_profile, session_state
+    return updated_browser_state, openrouter_api_key, model, speedvsdetail, model_profile, _profile_models_markdown(model_profile), session_state
 
 def run_planner(submit_or_retry_button, plan_prompt, browser_state, session_state: SessionState):
     """
@@ -735,9 +772,22 @@ def check_api_key(session_state: SessionState):
             ],
             value=ModelProfileEnum.BASELINE.value,
             label="Model Profile",
-            info="Select which llm_config profile file to use.",
+            info="Select which profile file is used by auto model selection.",
             interactive=True,
         )
+        gr.Markdown(
+            "\n".join(
+                [
+                    "**Profile details**",
+                    "- `baseline` -> `llm_config.json` (default balanced profile).",
+                    "- `premium` -> `llm_config.premium.json` (higher-cost model ordering).",
+                    "- `frontier` -> `llm_config.frontier.json` (most capable model ordering).",
+                    "- `custom` -> `llm_config.custom.json` or `PLANEXE_LLM_CONFIG_CUSTOM_FILENAME`.",
+                    "- The exact models come from the selected JSON file priorities.",
+                ]
+            )
+        )
+        profile_models_markdown = gr.Markdown(_profile_models_markdown(ModelProfileEnum.BASELINE.value))
 
         speedvsdetail_items = [
             ("All details, but slow", SpeedVsDetailEnum.ALL_DETAILS_BUT_SLOW),
@@ -829,7 +879,7 @@ def check_api_key(session_state: SessionState):
     openrouter_api_key_text.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -839,7 +889,7 @@ def check_api_key(session_state: SessionState):
     model_radio.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -849,7 +899,7 @@ def check_api_key(session_state: SessionState):
     speedvsdetail_radio.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -859,7 +909,7 @@ def check_api_key(session_state: SessionState):
     model_profile_radio.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -876,7 +926,7 @@ def check_api_key(session_state: SessionState):
     demo_text2plan.load(
         fn=initialize_browser_settings,
         inputs=[browser_state, session_state],
-        outputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state]
+        outputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, browser_state, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
diff --git a/llm_config.custom.json b/llm_config.custom.json
index 3fbc022ae..9fac1344c 100644
--- a/llm_config.custom.json
+++ b/llm_config.custom.json
@@ -1,26 +1,11 @@
 {
-    "openai-paid-gpt-5-nano": {
-        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan.",
-        "luigi_workers": 4,
-        "class": "OpenAI",
-        "arguments": {
-            "model": "gpt-5-nano",
-            "api_key": "${OPENAI_API_KEY}",
-            "temperature": 1.0,
-            "timeout": 120.0,
-            "context_window": 400000,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 128000,
-            "max_retries": 5
-        }
-    },
-    "openrouter-paid-openai-gpt-oss-20b": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created August 5, 2025. 131,072 context. $0.05/M input tokens. $0.20/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+    "openrouter-paid-openai-gpt-oss-120b": {
+        "comment": "Strict-priority primary model. Paid OpenRouter route to OpenAI GPT-OSS-120B. Created Jul 18, 2024. 128,000 context. $0.15/M input tokens. $0.60/M output tokens.",
+        "priority": 1,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "openai/gpt-oss-20b",
+            "model": "openai/gpt-oss-120b",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -30,12 +15,13 @@
             "max_retries": 5
         }
     },
-    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created Dec 14, 2025. 262,144 context. $0.06/M input tokens. $0.24/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+    "openrouter-paid-gemini-3-flash-preview": {
+        "comment": "Secondary fallback. Paid OpenRouter access to Gemini 3 Flash Preview for balanced speed and context.",
+        "priority": 2,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "nvidia/nemotron-3-nano-30b-a3b",
+            "model": "google/gemini-3-flash-preview",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -45,13 +31,13 @@
             "max_retries": 5
         }
     },
-    "openrouter-paid-gemini-2.0-flash-001": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created Feb 25, 2025. 1,048,576 context. $0.075/M input tokens. $0.30/M output tokens.",
-        "priority": 1,
+    "openrouter-paid-minimax-m2.5": {
+        "comment": "Tertiary option. Paid OpenRouter route to MiniMax M2.5 for cost-aware reasoning depth.",
+        "priority": 3,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "google/gemini-2.0-flash-001",
+            "model": "minimax/minimax-01",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -61,13 +47,13 @@
             "max_retries": 5
         }
     },
-    "openrouter-paid-openai-gpt-4o-mini": {
-        "comment": "This is medium fast. It's paid, so check the pricing before use. Created Jul 18, 2024. 128,000 context. Starting at $0.15/M input tokens. Starting at $0.60/M output tokens.",
-        "priority": 2,
+    "openrouter-paid-qwen3-coder-next": {
+        "comment": "Code-focused fallback sharing priority tier 3, targeting Qwen3 Coder Next via OpenRouter.",
+        "priority": 3,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "openai/gpt-4o-mini",
+            "model": "qwen/qwq-32b-preview",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -77,13 +63,13 @@
             "max_retries": 5
         }
     },
-    "openrouter-paid-qwen3-30b-a3b": {
-        "comment": "This is slow. It's paid, so check the pricing before use. Created Apr 28, 2025. 40,960 context. $0.08/M input tokens. $0.29/M output tokens.",
-        "priority": 3,
+    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
+        "comment": "Last-resort fallback. Paid OpenRouter access to NVIDIA Nemotron 3 Nano 30B A3B for extended context needs.",
+        "priority": 4,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "qwen/qwen3-30b-a3b",
+            "model": "nvidia/nemotron-3-nano-30b-a3b",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -92,79 +78,5 @@
             "max_tokens": 8192,
             "max_retries": 5
         }
-    },
-    "alibabacloud-paid-qwen-flash-2025-07-28": {
-        "comment": "This is slow. It's paid, so check the pricing before use. Snapshot from 2025-07-28. 1,000,000 context. $0.05/M input tokens. $0.40/M output tokens.",
-        "luigi_workers": 4,
-        "class": "OpenAILike",
-        "arguments": {
-            "model": "qwen-flash-2025-07-28",
-            "api_key": "${DASHSCOPE_API_KEY}",
-            "api_base": "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
-            "temperature": 0.1,
-            "timeout": 60.0,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 16384,
-            "max_retries": 5
-        }
-    },
-    "ollama-llama3.1": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "llama3.1:latest",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "docker-ollama-llama3.1": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "llama3.1:latest",
-            "base_url": "http://host.docker.internal:11434",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "ollama-qwen2.5-coder": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "qwen2.5-coder:latest",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "lmstudio-qwen2.5-7b-instruct-1m": {
-        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "LMStudio",
-        "arguments": {
-            "model_name": "qwen2.5-7b-instruct-1m",
-            "base_url": "http://localhost:1234/v1",
-            "temperature": 0.2,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "docker-lmstudio-qwen2.5-7b-instruct-1m": {
-        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
-        "luigi_workers": 1,
-        "class": "LMStudio",
-        "arguments": {
-            "model_name": "qwen2.5-7b-instruct-1m",
-            "base_url": "http://host.docker.internal:1234/v1",
-            "temperature": 0.2,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
     }
 }
diff --git a/llm_config.frontier.json b/llm_config.frontier.json
index 2133a1fa3..8ac9b9b05 100644
--- a/llm_config.frontier.json
+++ b/llm_config.frontier.json
@@ -1,170 +1,66 @@
 {
-    "openai-paid-gpt-5-nano": {
-        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan.",
-        "luigi_workers": 4,
-        "class": "OpenAI",
-        "arguments": {
-            "model": "gpt-5-nano",
-            "api_key": "${OPENAI_API_KEY}",
-            "temperature": 1.0,
-            "timeout": 120.0,
-            "context_window": 400000,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 128000,
-            "max_retries": 5
-        },
-        "priority": 1
-    },
-    "openrouter-paid-openai-gpt-oss-20b": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created August 5, 2025. 131,072 context. $0.05/M input tokens. $0.20/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+    "openrouter-paid-z-ai-glm-5": {
+        "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 11, 2026. 204,800 context. $0.30/M input tokens. $2.55/M output tokens.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "openai/gpt-oss-20b",
+            "model": "z-ai/glm-5",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 60000,
             "max_retries": 5
         },
-        "priority": 2
+        "priority": 1
     },
-    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created Dec 14, 2025. 262,144 context. $0.06/M input tokens. $0.24/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+    "openrouter-paid-moonshotai-kimi-k2-5": {
+        "comment": "This is fast. It's paid, so check the pricing before use. Created Jan 27, 2026. 262,144 context. $0.23/M input tokens. $3.00/M output tokens.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "nvidia/nemotron-3-nano-30b-a3b",
+            "model": "moonshotai/kimi-k2.5",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 60000,
             "max_retries": 5
         },
-        "priority": 3
-    },
-    "openrouter-paid-gemini-2.0-flash-001": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created Feb 25, 2025. 1,048,576 context. $0.075/M input tokens. $0.30/M output tokens.",
-        "luigi_workers": 4,
-        "class": "OpenRouter",
-        "arguments": {
-            "model": "google/gemini-2.0-flash-001",
-            "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
-            "timeout": 60.0,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 8192,
-            "max_retries": 5
-        }
+        "priority": 2
     },
-    "openrouter-paid-openai-gpt-4o-mini": {
-        "comment": "This is medium fast. It's paid, so check the pricing before use. Created Jul 18, 2024. 128,000 context. Starting at $0.15/M input tokens. Starting at $0.60/M output tokens.",
+    "openrouter-paid-qwen-qwen3-5-397b-a17b": {
+        "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 16, 2026. 262,144 context. Pricing: $0.60/M input tokens. $3.60/M output tokens.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "openai/gpt-4o-mini",
+            "model": "qwen/qwen3.5-397b-a17b",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 60000,
             "max_retries": 5
-        }
+        },
+        "priority": 3
     },
-    "openrouter-paid-qwen3-30b-a3b": {
-        "comment": "This is slow. It's paid, so check the pricing before use. Created Apr 28, 2025. 40,960 context. $0.08/M input tokens. $0.29/M output tokens.",
+    "openrouter-paid-google-gemini-2-5-flash-lite": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Fallback option. Available on OpenRouter.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "qwen/qwen3-30b-a3b",
+            "model": "google/gemini-2.5-flash-lite",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 60000,
             "max_retries": 5
-        }
-    },
-    "alibabacloud-paid-qwen-flash-2025-07-28": {
-        "comment": "This is slow. It's paid, so check the pricing before use. Snapshot from 2025-07-28. 1,000,000 context. $0.05/M input tokens. $0.40/M output tokens.",
-        "luigi_workers": 4,
-        "class": "OpenAILike",
-        "arguments": {
-            "model": "qwen-flash-2025-07-28",
-            "api_key": "${DASHSCOPE_API_KEY}",
-            "api_base": "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
-            "temperature": 0.1,
-            "timeout": 60.0,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 16384,
-            "max_retries": 5
-        }
-    },
-    "ollama-llama3.1": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "llama3.1:latest",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "docker-ollama-llama3.1": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "llama3.1:latest",
-            "base_url": "http://host.docker.internal:11434",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "ollama-qwen2.5-coder": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "qwen2.5-coder:latest",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "lmstudio-qwen2.5-7b-instruct-1m": {
-        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "LMStudio",
-        "arguments": {
-            "model_name": "qwen2.5-7b-instruct-1m",
-            "base_url": "http://localhost:1234/v1",
-            "temperature": 0.2,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "docker-lmstudio-qwen2.5-7b-instruct-1m": {
-        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
-        "luigi_workers": 1,
-        "class": "LMStudio",
-        "arguments": {
-            "model_name": "qwen2.5-7b-instruct-1m",
-            "base_url": "http://host.docker.internal:1234/v1",
-            "temperature": 0.2,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
+        },
+        "priority": 4
     }
 }
diff --git a/llm_config.premium.json b/llm_config.premium.json
index 3e0a755da..34025bf77 100644
--- a/llm_config.premium.json
+++ b/llm_config.premium.json
@@ -1,26 +1,10 @@
 {
-    "openai-paid-gpt-5-nano": {
-        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan.",
-        "luigi_workers": 4,
-        "class": "OpenAI",
-        "arguments": {
-            "model": "gpt-5-nano",
-            "api_key": "${OPENAI_API_KEY}",
-            "temperature": 1.0,
-            "timeout": 120.0,
-            "context_window": 400000,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 128000,
-            "max_retries": 5
-        }
-    },
-    "openrouter-paid-openai-gpt-oss-20b": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created August 5, 2025. 131,072 context. $0.05/M input tokens. $0.20/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
+    "openrouter-paid-z-ai-glm-4-7-flash": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Jan 19, 2026. 202,752 context. $0.06/M input tokens. $0.40/M output tokens.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "openai/gpt-oss-20b",
+            "model": "z-ai/glm-4.7-flash",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -28,29 +12,15 @@
             "is_chat_model": true,
             "max_tokens": 8192,
             "max_retries": 5
-        }
-    },
-    "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created Dec 14, 2025. 262,144 context. $0.06/M input tokens. $0.24/M output tokens. This is a reasoning model, and uses more tokens than LLMs.",
-        "luigi_workers": 4,
-        "class": "OpenRouter",
-        "arguments": {
-            "model": "nvidia/nemotron-3-nano-30b-a3b",
-            "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
-            "timeout": 60.0,
-            "is_function_calling_model": false,
-            "is_chat_model": true,
-            "max_tokens": 8192,
-            "max_retries": 5
-        }
+        },
+        "priority": 1
     },
-    "openrouter-paid-gemini-2.0-flash-001": {
-        "comment": "This is very fast. It's paid, so check the pricing before use. Created Feb 25, 2025. 1,048,576 context. $0.075/M input tokens. $0.30/M output tokens.",
+    "openrouter-paid-stepfun-step-3-5-flash": {
+        "comment": "This is very fast. It's paid, so check the pricing before use. Created Jan 29, 2026. 256,000 context. $0.10/M input tokens. $0.30/M output tokens.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "google/gemini-2.0-flash-001",
+            "model": "stepfun/step-3.5-flash",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -61,12 +31,12 @@
         },
         "priority": 2
     },
-    "openrouter-paid-openai-gpt-4o-mini": {
-        "comment": "This is medium fast. It's paid, so check the pricing before use. Created Jul 18, 2024. 128,000 context. Starting at $0.15/M input tokens. Starting at $0.60/M output tokens.",
+    "openrouter-paid-z-ai-glm-5": {
+        "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 11, 2026. 204,800 context. $0.30/M input tokens. $2.55/M output tokens.",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "openai/gpt-4o-mini",
+            "model": "z-ai/glm-5",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -75,14 +45,14 @@
             "max_tokens": 8192,
             "max_retries": 5
         },
-        "priority": 1
+        "priority": 3
     },
-    "openrouter-paid-qwen3-30b-a3b": {
-        "comment": "This is slow. It's paid, so check the pricing before use. Created Apr 28, 2025. 40,960 context. $0.08/M input tokens. $0.29/M output tokens.",
+    "openrouter-paid-qwen-qwen3-5-397b-a17b": {
+        "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 16, 2026. 262,144 context. Pricing varies by context length: starting at $0.15/M input, $1.00/M output; highest tier $0.60/M input, $3.60/M output (when input > 128k tokens).",
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "qwen/qwen3-30b-a3b",
+            "model": "qwen/qwen3.5-397b-a17b",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 0.1,
             "timeout": 60.0,
@@ -91,23 +61,24 @@
             "max_tokens": 8192,
             "max_retries": 5
         },
-        "priority": 3
+        "priority": 4
     },
-    "alibabacloud-paid-qwen-flash-2025-07-28": {
-        "comment": "This is slow. It's paid, so check the pricing before use. Snapshot from 2025-07-28. 1,000,000 context. $0.05/M input tokens. $0.40/M output tokens.",
+    "openai-paid-gpt-5-mini": {
+        "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan. Last priority fallback.",
         "luigi_workers": 4,
-        "class": "OpenAILike",
+        "class": "OpenAI",
         "arguments": {
-            "model": "qwen-flash-2025-07-28",
-            "api_key": "${DASHSCOPE_API_KEY}",
-            "api_base": "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
-            "temperature": 0.1,
-            "timeout": 60.0,
+            "model": "gpt-5-mini",
+            "api_key": "${OPENAI_API_KEY}",
+            "temperature": 1.0,
+            "timeout": 120.0,
+            "context_window": 400000,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 16384,
+            "max_tokens": 128000,
             "max_retries": 5
-        }
+        },
+        "priority": 5
     },
     "ollama-llama3.1": {
         "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",

From 329e95015cf4ead2394087dd6498e83fcf2552bf Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 16:34:34 -0500
Subject: [PATCH 3/8] feat(docker): mount all llm_config profile files into
 frontend_single_user container

Add volume mounts and sync actions for llm_config.premium.json, llm_config.frontier.json, and llm_config.custom.json in docker-compose.yml. Update Dockerfile to copy all llm_config*.json files during build. Enables multi-profile support in single-user frontend.
---
 docker-compose.yml              | 12 ++++++++++++
 frontend_single_user/Dockerfile |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 812fa6d59..e1d8b3316 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -151,6 +151,9 @@ services:
     volumes:
       - ./.env:/app/.env:ro
       - ./llm_config.json:/app/llm_config.json:ro
+      - ./llm_config.premium.json:/app/llm_config.premium.json:ro
+      - ./llm_config.frontier.json:/app/llm_config.frontier.json:ro
+      - ./llm_config.custom.json:/app/llm_config.custom.json:ro
       - ./run:/app/run
     restart: unless-stopped
     develop:
@@ -166,6 +169,15 @@ services:
         - action: sync
           path: ./llm_config.json
           target: /app/llm_config.json
+        - action: sync
+          path: ./llm_config.premium.json
+          target: /app/llm_config.premium.json
+        - action: sync
+          path: ./llm_config.frontier.json
+          target: /app/llm_config.frontier.json
+        - action: sync
+          path: ./llm_config.custom.json
+          target: /app/llm_config.custom.json
         - action: sync
           path: ./.env
           target: /app/.env
diff --git a/frontend_single_user/Dockerfile b/frontend_single_user/Dockerfile
index d59902b44..cc272ebc3 100644
--- a/frontend_single_user/Dockerfile
+++ b/frontend_single_user/Dockerfile
@@ -19,7 +19,7 @@ RUN pip install --no-cache-dir --upgrade pip \
 # Copy application code and supporting files
 COPY worker_plan/worker_plan_api /app/worker_plan_api
 COPY frontend_single_user /app/frontend_single_user
-COPY llm_config.json /app/
+COPY llm_config*.json /app/
 
 # Default location for generated plans
 RUN mkdir -p /app/run

From 05a75fa9864144da43f920222574af236e8dda29 Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 16:49:45 -0500
Subject: [PATCH 4/8] feat(ui): add priority labels to model list and duplicate
 active config display

Add priority prefix (P0, P1, etc.) to each model in the profile models markdown. Create separate active_config_markdown component that mirrors profile_models_markdown to display current configuration. Update all callback outputs to populate both markdown components with identical content.
---
 frontend_single_user/app.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/frontend_single_user/app.py b/frontend_single_user/app.py
index 59bcfdd32..fab84bebb 100644
--- a/frontend_single_user/app.py
+++ b/frontend_single_user/app.py
@@ -272,12 +272,15 @@ def sort_key(item: tuple[str, dict]) -> tuple[int, str]:
 
     rows: list[str] = []
     for model_id, model_data in sorted(model_map.items(), key=sort_key):
+        priority_label = "n/a"
+        if isinstance(model_data, dict) and isinstance(model_data.get("priority"), int):
+            priority_label = str(model_data["priority"])
         model_name = model_id
         if isinstance(model_data, dict):
             arguments = model_data.get("arguments")
             if isinstance(arguments, dict) and isinstance(arguments.get("model"), str):
                 model_name = arguments["model"]
-        rows.append(f"- `{model_name}`")
+        rows.append(f"- P{priority_label}: `{model_name}`")
     return "\n".join([f"**Models in `{profile.value}`** (`{profile_filename}`):"] + rows)
 
 class MarkdownBuilder:
@@ -373,7 +376,8 @@ def initialize_browser_settings(browser_state, session_state: SessionState):
     session_state.llm_model = model
     session_state.speedvsdetail = speedvsdetail
     session_state.model_profile = model_profile
-    return openrouter_api_key, model, speedvsdetail, model_profile, _profile_models_markdown(model_profile), browser_state, session_state
+    profile_markdown = _profile_models_markdown(model_profile)
+    return openrouter_api_key, model, speedvsdetail, model_profile, profile_markdown, profile_markdown, browser_state, session_state
 
 def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, model_profile, browser_state, session_state: SessionState):
     try:
@@ -389,7 +393,8 @@ def update_browser_settings_callback(openrouter_api_key, model, speedvsdetail, m
     session_state.llm_model = model
     session_state.speedvsdetail = speedvsdetail
     session_state.model_profile = model_profile
-    return updated_browser_state, openrouter_api_key, model, speedvsdetail, model_profile, _profile_models_markdown(model_profile), session_state
+    profile_markdown = _profile_models_markdown(model_profile)
+    return updated_browser_state, openrouter_api_key, model, speedvsdetail, model_profile, profile_markdown, profile_markdown, session_state
 
 def run_planner(submit_or_retry_button, plan_prompt, browser_state, session_state: SessionState):
     """
@@ -733,6 +738,7 @@ def check_api_key(session_state: SessionState):
                     stop_btn = gr.Button("Stop")
                     retry_btn = gr.Button("Retry")
                     open_dir_btn = gr.Button("Open Output Dir", visible=OPEN_DIR_BUTTON_INITIAL_VISIBILITY)
+                active_config_markdown = gr.Markdown(_profile_models_markdown(ModelProfileEnum.BASELINE.value))
 
                 output_markdown = gr.Markdown("Output will appear here...")
                 status_markdown = gr.Markdown("Status messages will appear here...")
@@ -879,7 +885,7 @@ def check_api_key(session_state: SessionState):
     openrouter_api_key_text.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, active_config_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -889,7 +895,7 @@ def check_api_key(session_state: SessionState):
     model_radio.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, active_config_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -899,7 +905,7 @@ def check_api_key(session_state: SessionState):
     speedvsdetail_radio.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, active_config_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -909,7 +915,7 @@ def check_api_key(session_state: SessionState):
     model_profile_radio.change(
         fn=update_browser_settings_callback,
         inputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, browser_state, session_state],
-        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, session_state]
+        outputs=[browser_state, openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, active_config_markdown, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],
@@ -926,7 +932,7 @@ def check_api_key(session_state: SessionState):
     demo_text2plan.load(
         fn=initialize_browser_settings,
         inputs=[browser_state, session_state],
-        outputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, browser_state, session_state]
+        outputs=[openrouter_api_key_text, model_radio, speedvsdetail_radio, model_profile_radio, profile_models_markdown, active_config_markdown, browser_state, session_state]
     ).then(
         fn=check_api_key,
         inputs=[session_state],

From d7049ec9cbe9383a682be78ee2f118a1b2c5be9f Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 16:59:36 -0500
Subject: [PATCH 5/8] config(llm): increase max_tokens to 32k and adjust model
 priorities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Increase max_tokens from 8192 to 32000 across all OpenRouter models in custom and premium profiles. Reorder custom profile priorities (qwen3-coder-next 3→4, nemotron 4→5, gemini-3-flash comment updated to "last resort"). Swap premium profile priorities (gemini-3-flash 1→6, qwen3-coder-next 4→1). Remove local model entries (ollama-llama3.1, docker-ollama-llama3.1, ollama-qwen2.5-coder, lmstudio variants) from premium profile.
---
 llm_config.custom.json  | 16 +++++-----
 llm_config.premium.json | 70 ++++-------------------------------------
 2 files changed, 14 insertions(+), 72 deletions(-)

diff --git a/llm_config.custom.json b/llm_config.custom.json
index 9fac1344c..f81425169 100644
--- a/llm_config.custom.json
+++ b/llm_config.custom.json
@@ -11,12 +11,12 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         }
     },
     "openrouter-paid-gemini-3-flash-preview": {
-        "comment": "Secondary fallback. Paid OpenRouter access to Gemini 3 Flash Preview for balanced speed and context.",
+        "comment": "last resort fallback. Paid OpenRouter access to Gemini 3 Flash Preview for balanced speed and context.",
         "priority": 2,
         "luigi_workers": 4,
         "class": "OpenRouter",
@@ -27,7 +27,7 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         }
     },
@@ -43,13 +43,13 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         }
     },
     "openrouter-paid-qwen3-coder-next": {
         "comment": "Code-focused fallback sharing priority tier 3, targeting Qwen3 Coder Next via OpenRouter.",
-        "priority": 3,
+        "priority": 4,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -59,13 +59,13 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         }
     },
     "openrouter-paid-nvidia-nemotron-3-nano-30b-a3b": {
         "comment": "Last-resort fallback. Paid OpenRouter access to NVIDIA Nemotron 3 Nano 30B A3B for extended context needs.",
-        "priority": 4,
+        "priority": 5,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -75,7 +75,7 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         }
     }
diff --git a/llm_config.premium.json b/llm_config.premium.json
index 34025bf77..79d69cea1 100644
--- a/llm_config.premium.json
+++ b/llm_config.premium.json
@@ -10,10 +10,10 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         },
-        "priority": 1
+        "priority": 6
     },
     "openrouter-paid-stepfun-step-3-5-flash": {
         "comment": "This is very fast. It's paid, so check the pricing before use. Created Jan 29, 2026. 256,000 context. $0.10/M input tokens. $0.30/M output tokens.",
@@ -26,7 +26,7 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         },
         "priority": 2
@@ -42,7 +42,7 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         },
         "priority": 3
@@ -58,10 +58,10 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 8192,
+            "max_tokens": 32000,
             "max_retries": 5
         },
-        "priority": 4
+        "priority": 1
     },
     "openai-paid-gpt-5-mini": {
         "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan. Last priority fallback.",
@@ -79,63 +79,5 @@
             "max_retries": 5
         },
         "priority": 5
-    },
-    "ollama-llama3.1": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "llama3.1:latest",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "docker-ollama-llama3.1": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "llama3.1:latest",
-            "base_url": "http://host.docker.internal:11434",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "ollama-qwen2.5-coder": {
-        "comment": "This runs on your own computer. It's free. Requires Ollama to be installed. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "Ollama",
-        "arguments": {
-            "model": "qwen2.5-coder:latest",
-            "temperature": 0.5,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "lmstudio-qwen2.5-7b-instruct-1m": {
-        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in .venv on the host computer. No use of docker.",
-        "luigi_workers": 1,
-        "class": "LMStudio",
-        "arguments": {
-            "model_name": "qwen2.5-7b-instruct-1m",
-            "base_url": "http://localhost:1234/v1",
-            "temperature": 0.2,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
-    },
-    "docker-lmstudio-qwen2.5-7b-instruct-1m": {
-        "comment": "This runs on your own computer. It's free. Requires LM Studio to be installed. Great for inspecting the request/response. PlanExe runs in a Docker container, and ollama is installed on the host the computer.",
-        "luigi_workers": 1,
-        "class": "LMStudio",
-        "arguments": {
-            "model_name": "qwen2.5-7b-instruct-1m",
-            "base_url": "http://host.docker.internal:1234/v1",
-            "temperature": 0.2,
-            "request_timeout": 120.0,
-            "is_function_calling_model": false
-        }
     }
 }

From 0ff64a7164ac166b433c4e36cb7cee81d42ade26 Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 17:03:33 -0500
Subject: [PATCH 6/8] config(llm): standardize priority field placement and
 adjust model priorities

Move priority field before luigi_workers in all config files for consistency. In custom profile, demote gemini-3-flash from priority 2 to 12 and promote qwen3-coder-next from priority 4 to 2. In frontier profile, reduce max_tokens from 60000 to 32000 across all models. In premium profile, swap qwen3-5-397b-a17b to priority 1 and demote glm-4-7-flash to priority 6.
---
 llm_config.custom.json   |  4 ++--
 llm_config.frontier.json | 24 ++++++++++++------------
 llm_config.premium.json  | 20 ++++++++++----------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/llm_config.custom.json b/llm_config.custom.json
index f81425169..b25bd56db 100644
--- a/llm_config.custom.json
+++ b/llm_config.custom.json
@@ -17,7 +17,7 @@
     },
     "openrouter-paid-gemini-3-flash-preview": {
         "comment": "last resort fallback. Paid OpenRouter access to Gemini 3 Flash Preview for balanced speed and context.",
-        "priority": 2,
+        "priority": 12,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -49,7 +49,7 @@
     },
     "openrouter-paid-qwen3-coder-next": {
         "comment": "Code-focused fallback sharing priority tier 3, targeting Qwen3 Coder Next via OpenRouter.",
-        "priority": 4,
+        "priority": 2,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
diff --git a/llm_config.frontier.json b/llm_config.frontier.json
index 8ac9b9b05..a7d19683d 100644
--- a/llm_config.frontier.json
+++ b/llm_config.frontier.json
@@ -1,6 +1,7 @@
 {
     "openrouter-paid-z-ai-glm-5": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 11, 2026. 204,800 context. $0.30/M input tokens. $2.55/M output tokens.",
+        "priority": 1,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -10,13 +11,13 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 60000,
+            "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 1
+        }
     },
     "openrouter-paid-moonshotai-kimi-k2-5": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Jan 27, 2026. 262,144 context. $0.23/M input tokens. $3.00/M output tokens.",
+        "priority": 2,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -26,13 +27,13 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 60000,
+            "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 2
+        }
     },
     "openrouter-paid-qwen-qwen3-5-397b-a17b": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 16, 2026. 262,144 context. Pricing: $0.60/M input tokens. $3.60/M output tokens.",
+        "priority": 3,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -42,13 +43,13 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 60000,
+            "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 3
+        }
     },
     "openrouter-paid-google-gemini-2-5-flash-lite": {
         "comment": "This is very fast. It's paid, so check the pricing before use. Fallback option. Available on OpenRouter.",
+        "priority": 4,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -58,9 +59,8 @@
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
-            "max_tokens": 60000,
+            "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 4
+        }
     }
 }
diff --git a/llm_config.premium.json b/llm_config.premium.json
index 79d69cea1..cc8e45e17 100644
--- a/llm_config.premium.json
+++ b/llm_config.premium.json
@@ -1,6 +1,7 @@
 {
     "openrouter-paid-z-ai-glm-4-7-flash": {
         "comment": "This is very fast. It's paid, so check the pricing before use. Created Jan 19, 2026. 202,752 context. $0.06/M input tokens. $0.40/M output tokens.",
+        "priority": 6,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -12,11 +13,11 @@
             "is_chat_model": true,
             "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 6
+        }
     },
     "openrouter-paid-stepfun-step-3-5-flash": {
         "comment": "This is very fast. It's paid, so check the pricing before use. Created Jan 29, 2026. 256,000 context. $0.10/M input tokens. $0.30/M output tokens.",
+        "priority": 2,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -28,11 +29,11 @@
             "is_chat_model": true,
             "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 2
+        }
     },
     "openrouter-paid-z-ai-glm-5": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 11, 2026. 204,800 context. $0.30/M input tokens. $2.55/M output tokens.",
+        "priority": 3,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -44,11 +45,11 @@
             "is_chat_model": true,
             "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 3
+        }
     },
     "openrouter-paid-qwen-qwen3-5-397b-a17b": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 16, 2026. 262,144 context. Pricing varies by context length: starting at $0.15/M input, $1.00/M output; highest tier $0.60/M input, $3.60/M output (when input > 128k tokens).",
+        "priority": 1,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -60,11 +61,11 @@
             "is_chat_model": true,
             "max_tokens": 32000,
             "max_retries": 5
-        },
-        "priority": 1
+        }
     },
     "openai-paid-gpt-5-mini": {
         "comment": "This is slow. It's paid, and requires that you have a OPENAI_API_KEY in the .env file. This is a reasoning model, so it takes MUCH longer to create a plan. Last priority fallback.",
+        "priority": 5,
         "luigi_workers": 4,
         "class": "OpenAI",
         "arguments": {
@@ -77,7 +78,6 @@
             "is_chat_model": true,
             "max_tokens": 128000,
             "max_retries": 5
-        },
-        "priority": 5
+        }
     }
 }

From eac00f8a917fbb82cfd15c98e2d084fdc28dbf13 Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 18:00:10 -0500
Subject: [PATCH 7/8] config(llm): increase temperature from 0.1 to 1.0 across
 all profiles and update custom profile models

Raise temperature parameter from 0.1 to 1.0 for all models in custom, frontier, and premium profiles to increase response creativity. In custom profile, replace minimax-01 with minimax-m2.5 and swap qwq-32b-preview with qwen3.5-397b-a17b.
---
 llm_config.custom.json   | 14 +++++++-------
 llm_config.frontier.json |  8 ++++----
 llm_config.premium.json  |  8 ++++----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/llm_config.custom.json b/llm_config.custom.json
index b25bd56db..58080308b 100644
--- a/llm_config.custom.json
+++ b/llm_config.custom.json
@@ -7,7 +7,7 @@
         "arguments": {
             "model": "openai/gpt-oss-120b",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -23,7 +23,7 @@
         "arguments": {
             "model": "google/gemini-3-flash-preview",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -37,9 +37,9 @@
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "minimax/minimax-01",
+            "model": "minimax/minimax-m2.5",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -53,9 +53,9 @@
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "qwen/qwq-32b-preview",
+            "model": "qwen/qwen3.5-397b-a17b",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -71,7 +71,7 @@
         "arguments": {
             "model": "nvidia/nemotron-3-nano-30b-a3b",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
diff --git a/llm_config.frontier.json b/llm_config.frontier.json
index a7d19683d..a46d4f4e4 100644
--- a/llm_config.frontier.json
+++ b/llm_config.frontier.json
@@ -7,7 +7,7 @@
         "arguments": {
             "model": "z-ai/glm-5",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -23,7 +23,7 @@
         "arguments": {
             "model": "moonshotai/kimi-k2.5",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -39,7 +39,7 @@
         "arguments": {
             "model": "qwen/qwen3.5-397b-a17b",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -55,7 +55,7 @@
         "arguments": {
             "model": "google/gemini-2.5-flash-lite",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
diff --git a/llm_config.premium.json b/llm_config.premium.json
index cc8e45e17..4c6ed5f68 100644
--- a/llm_config.premium.json
+++ b/llm_config.premium.json
@@ -7,7 +7,7 @@
         "arguments": {
             "model": "z-ai/glm-4.7-flash",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -23,7 +23,7 @@
         "arguments": {
             "model": "stepfun/step-3.5-flash",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -39,7 +39,7 @@
         "arguments": {
             "model": "z-ai/glm-5",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,
@@ -55,7 +55,7 @@
         "arguments": {
             "model": "qwen/qwen3.5-397b-a17b",
             "api_key": "${OPENROUTER_API_KEY}",
-            "temperature": 0.1,
+            "temperature": 1,
             "timeout": 60.0,
             "is_function_calling_model": false,
             "is_chat_model": true,

From 2bfdf6aa1d59a7185589e3d53898776b9387cb89 Mon Sep 17 00:00:00 2001
From: Mark Barney <82deutschmark@gmail.com>
Date: Wed, 18 Feb 2026 21:53:32 -0500
Subject: [PATCH 8/8] config(llm): reprioritize models and switch to
 gemini-2.5-flash-lite across profiles

Demote gpt-oss-120b from priority 1 to 11 and promote qwen3-coder-next from priority 2 to 1 in custom profile. Replace gemini-3-flash-preview with gemini-2.5-flash-lite-preview-09-2025 at priority 12. In frontier profile, demote glm-5 from priority 1 to 11, demote kimi-k2-5 from priority 2 to 111, and promote qwen3-5-397b-a17b from priority 3 to 1. In premium profile, swap qwen3-5-397b-a17b to priority 2, replace
---
 llm_config.custom.json   |  8 ++++----
 llm_config.frontier.json |  6 +++---
 llm_config.premium.json  | 10 +++++-----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/llm_config.custom.json b/llm_config.custom.json
index 58080308b..363e838ba 100644
--- a/llm_config.custom.json
+++ b/llm_config.custom.json
@@ -1,7 +1,7 @@
 {
     "openrouter-paid-openai-gpt-oss-120b": {
         "comment": "Strict-priority primary model. Paid OpenRouter route to OpenAI GPT-OSS-120B. Created Jul 18, 2024. 128,000 context. $0.15/M input tokens. $0.60/M output tokens.",
-        "priority": 1,
+        "priority": 11,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -15,13 +15,13 @@
             "max_retries": 5
         }
     },
-    "openrouter-paid-gemini-3-flash-preview": {
+    "openrouter-paid-google/gemini-2.5-flash-lite-preview-09-2025": {
         "comment": "last resort fallback. Paid OpenRouter access to Gemini 3 Flash Preview for balanced speed and context.",
         "priority": 12,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "google/gemini-3-flash-preview",
+            "model": "google/gemini-2.5-flash-lite-preview-09-2025",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 1,
             "timeout": 60.0,
@@ -49,7 +49,7 @@
     },
     "openrouter-paid-qwen3-coder-next": {
         "comment": "Code-focused fallback sharing priority tier 3, targeting Qwen3 Coder Next via OpenRouter.",
-        "priority": 2,
+        "priority": 1,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
diff --git a/llm_config.frontier.json b/llm_config.frontier.json
index a46d4f4e4..e0551e1b8 100644
--- a/llm_config.frontier.json
+++ b/llm_config.frontier.json
@@ -1,7 +1,7 @@
 {
     "openrouter-paid-z-ai-glm-5": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 11, 2026. 204,800 context. $0.30/M input tokens. $2.55/M output tokens.",
-        "priority": 1,
+        "priority": 11,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -17,7 +17,7 @@
     },
     "openrouter-paid-moonshotai-kimi-k2-5": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Jan 27, 2026. 262,144 context. $0.23/M input tokens. $3.00/M output tokens.",
-        "priority": 2,
+        "priority": 111,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -33,7 +33,7 @@
     },
     "openrouter-paid-qwen-qwen3-5-397b-a17b": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 16, 2026. 262,144 context. Pricing: $0.60/M input tokens. $3.60/M output tokens.",
-        "priority": 3,
+        "priority": 1,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
diff --git a/llm_config.premium.json b/llm_config.premium.json
index 4c6ed5f68..a5ded5cb9 100644
--- a/llm_config.premium.json
+++ b/llm_config.premium.json
@@ -17,7 +17,7 @@
     },
     "openrouter-paid-stepfun-step-3-5-flash": {
         "comment": "This is very fast. It's paid, so check the pricing before use. Created Jan 29, 2026. 256,000 context. $0.10/M input tokens. $0.30/M output tokens.",
-        "priority": 2,
+        "priority": 21,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
@@ -31,13 +31,13 @@
             "max_retries": 5
         }
     },
-    "openrouter-paid-z-ai-glm-5": {
+    "openrouter-google/gemini-2.5-flash-lite-preview-09-2025": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 11, 2026. 204,800 context. $0.30/M input tokens. $2.55/M output tokens.",
-        "priority": 3,
+        "priority": 1,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {
-            "model": "z-ai/glm-5",
+            "model": "google/gemini-2.5-flash-lite-preview-09-2025",
             "api_key": "${OPENROUTER_API_KEY}",
             "temperature": 1,
             "timeout": 60.0,
@@ -49,7 +49,7 @@
     },
     "openrouter-paid-qwen-qwen3-5-397b-a17b": {
         "comment": "This is fast. It's paid, so check the pricing before use. Created Feb 16, 2026. 262,144 context. Pricing varies by context length: starting at $0.15/M input, $1.00/M output; highest tier $0.60/M input, $3.60/M output (when input > 128k tokens).",
-        "priority": 1,
+        "priority": 2,
         "luigi_workers": 4,
         "class": "OpenRouter",
         "arguments": {