From ed8583f8dc994a2ffb2b25337c4e8fa3f293c36e Mon Sep 17 00:00:00 2001 From: Larry the Laptop Lobster Date: Wed, 18 Feb 2026 14:49:44 -0500 Subject: [PATCH 1/8] feat: add multi-profile llm config switching across frontend/api/worker --- docs/llm_config.md | 108 ++++++----- frontend_multi_user/src/app.py | 13 +- frontend_multi_user/templates/demo_run.html | 3 +- frontend_multi_user/templates/index.html | 7 + frontend_single_user/app.py | 55 ++++-- llm_config.custom.json | 170 ++++++++++++++++++ llm_config.frontier.json | 170 ++++++++++++++++++ llm_config.premium.json | 170 ++++++++++++++++++ mcp_cloud/app.py | 17 +- mcp_cloud/http_server.py | 6 + mcp_cloud/tests/test_speed_vs_detail.py | 19 +- mcp_cloud/tool_models.py | 4 + worker_plan/Dockerfile | 2 +- worker_plan/README.md | 2 + worker_plan/app.py | 20 ++- worker_plan/worker_plan_api/model_profile.py | 126 +++++++++++++ worker_plan/worker_plan_api/planexe_config.py | 159 ++++++++-------- .../tests/test_model_profile.py | 37 ++++ .../worker_plan_internal/llm_factory.py | 34 +++- .../plan/pipeline_environment.py | 5 +- .../plan/run_plan_pipeline.py | 36 +++- .../utils/planexe_llmconfig.py | 12 +- worker_plan_database/Dockerfile | 2 +- worker_plan_database/app.py | 77 ++++++-- worker_plan_database/model_profile.py | 9 + worker_plan_database/railway.toml | 2 +- .../tests/test_model_profile.py | 22 +++ 27 files changed, 1111 insertions(+), 176 deletions(-) create mode 100644 llm_config.custom.json create mode 100644 llm_config.frontier.json create mode 100644 llm_config.premium.json create mode 100644 worker_plan/worker_plan_api/model_profile.py create mode 100644 worker_plan/worker_plan_api/tests/test_model_profile.py create mode 100644 worker_plan_database/model_profile.py create mode 100644 worker_plan_database/tests/test_model_profile.py diff --git a/docs/llm_config.md b/docs/llm_config.md index 076a3fae5..3a03e98d3 100644 --- a/docs/llm_config.md +++ b/docs/llm_config.md @@ -1,16 +1,72 @@ --- -title: LLM config (llm_config.json) +title: LLM config profiles --- -# LLM config (llm_config.json) +# LLM config profiles -This file defines which LLM providers and models PlanExe can use. Each top‑level key is a model id used in the UI and pipeline. +PlanExe supports **4 model profiles**: -`llm_config.json` lives in the PlanExe repo root and is read at runtime. Environment variables are substituted from `.env`. +- `baseline` +- `premium` +- `frontier` +- `custom` + +Each profile maps to a separate config file: + +- `baseline` → `llm_config.json` +- `premium` → `llm_config.premium.json` +- `frontier` → `llm_config.frontier.json` +- `custom` → `llm_config.custom.json` (or `PLANEXE_LLM_CONFIG_CUSTOM_FILENAME`) + +If the selected profile file is missing or invalid, PlanExe safely falls back to `llm_config.json`. + +--- + +## How profile selection works + +### Runtime env var + +Set: + +- `PLANEXE_MODEL_PROFILE=baseline|premium|frontier|custom` + +This is passed end-to-end in worker execution paths (frontend/API/task parameters → worker pipeline). + +### Request/task parameter + +Task producers (web frontend, MCP) can include: + +- `model_profile` + +Invalid values are normalized to `baseline`. + +--- + +## Strict filename validation + +Config filenames are strictly validated: + +- must be a **filename only** (no `/`, `\\`, absolute path) +- must match: `llm_config*.json` + +This prevents path traversal and unsafe file selection. + +Legacy override `PLANEXE_LLM_CONFIG_NAME` is still supported for backward compatibility, but profile-based selection is preferred. + +--- + +## Provider-priority ordering per profile + +Within each profile config file, priority is defined per model entry: + +- lower `priority` value = tried first +- higher `priority` value = fallback order + +`auto` mode uses this profile-specific priority ordering. --- -## File structure +## File format (same for all profile files) ```json { @@ -24,8 +80,6 @@ This file defines which LLM providers and models PlanExe can use. Each top‑lev "api_key": "${OPENROUTER_API_KEY}", "temperature": 0.1, "timeout": 60.0, - "is_function_calling_model": false, - "is_chat_model": true, "max_tokens": 8192, "max_retries": 5 } @@ -35,41 +89,11 @@ This file defines which LLM providers and models PlanExe can use. Each top‑lev --- -## Top-level fields +## Backward compatibility -- **comment**: Plain‑text description for humans. Optional. -- **priority**: Lower number = higher priority when `auto` is selected. Optional. -- **luigi_workers**: Number of Luigi workers used for this model. Use `1` for local models (Ollama/LM Studio). -- **class**: Provider class name (e.g., `OpenRouter`, `OpenAI`, `Ollama`, `LMStudio`, `OpenAILike`). -- **arguments**: Provider‑specific settings passed to the LLM client. - ---- - -## Common arguments - -These keys are common across most providers: - -- **model** / **model_name**: Provider model identifier. -- **api_key**: API key reference (usually `${ENV_VAR}`). -- **base_url** / **api_base**: Override the provider base URL. -- **temperature**: Controls randomness. Lower is more deterministic. -- **timeout** / **request_timeout**: Max time per request in seconds. -- **max_tokens** / **max_completion_tokens**: Output token limit (provider specific). -- **max_retries**: Retry count on transient errors. -- **is_function_calling_model**: Whether the model supports structured/tool output. -- **is_chat_model**: Whether the model uses chat format. - ---- - -## Choosing values - -- Use **luigi_workers = 1** for local models (Ollama / LM Studio). -- Use **luigi_workers > 1** for cloud models if you want parallel tasks. -- Keep **timeout** higher for slower models. - ---- +When no profile is provided, PlanExe defaults to: -## Notes +- `baseline` +- `llm_config.json` -- If `llm_config.json` is missing, PlanExe logs a warning and proceeds with defaults. -- Changes to `llm_config.json` require a container restart (or rebuild if baked into the image). +So existing deployments continue to work without changes. diff --git a/frontend_multi_user/src/app.py b/frontend_multi_user/src/app.py index 17e8b415c..dde0063f7 100644 --- a/frontend_multi_user/src/app.py +++ b/frontend_multi_user/src/app.py @@ -53,6 +53,7 @@ from worker_plan_api.planexe_dotenv import DotEnvKeyEnum, PlanExeDotEnv from worker_plan_api.planexe_config import PlanExeConfig +from worker_plan_api.model_profile import normalize_model_profile RUN_DIR = "run" @@ -2401,6 +2402,12 @@ def run(): if len(parameters) == 0: parameters = None + # Normalize model profile to a known value with backward-compatible baseline default. + if not isinstance(parameters, dict): + parameters = {} + raw_profile = parameters.get("model_profile") + parameters["model_profile"] = normalize_model_profile(raw_profile).value + # Get length of prompt_param in bytes and in characters prompt_param_bytes = len(prompt_param.encode('utf-8')) prompt_param_characters = len(prompt_param) @@ -2502,8 +2509,10 @@ def create_plan(): parameters.pop('user_id', None) parameters.pop('nonce', None) parameters.pop('redirect_to_plan', None) - if len(parameters) == 0: - parameters = None + + # Normalize model profile to a known value with backward-compatible baseline default. + raw_profile = parameters.get("model_profile") + parameters["model_profile"] = normalize_model_profile(raw_profile).value prompt_param_bytes = len(prompt_param.encode('utf-8')) prompt_param_characters = len(prompt_param) diff --git a/frontend_multi_user/templates/demo_run.html b/frontend_multi_user/templates/demo_run.html index c588c2c9d..3b7a86d96 100644 --- a/frontend_multi_user/templates/demo_run.html +++ b/frontend_multi_user/templates/demo_run.html @@ -211,6 +211,7 @@

Demo Run

+ @@ -282,7 +283,7 @@

Demo Run

if (methodSelect.value === 'GET') { // GET method: build URL with query parameters - let url = `/run?prompt=${encodeURIComponent(promptValue)}&user_id={{ user_id }}&nonce={{ nonce }}&speed_vs_detail=${encodeURIComponent(speedVsDetailValue)}`; + let url = `/run?prompt=${encodeURIComponent(promptValue)}&user_id={{ user_id }}&nonce={{ nonce }}&speed_vs_detail=${encodeURIComponent(speedVsDetailValue)}&model_profile=baseline`; if (developerChecked) { url += '&developer'; } diff --git a/frontend_multi_user/templates/index.html b/frontend_multi_user/templates/index.html index 83e6cb8ad..1da3eb207 100644 --- a/frontend_multi_user/templates/index.html +++ b/frontend_multi_user/templates/index.html @@ -468,6 +468,13 @@

Start a New Plan

+ +
0 characters