From ab7ce3f2a1aae4e06b9158f8589164e58ab3aefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= Date: Wed, 6 May 2026 15:11:07 +0100 Subject: [PATCH] Fix litellm forgetting api keys on --- .../embed/infrastructure/base/templates/llm.yaml | 11 ++++++----- internal/stack/stack.go | 12 +++++++++--- internal/stack/stack_test.go | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index 5ee34beb..db9ee90b 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -96,9 +96,12 @@ metadata: data: {} --- -# Secret for LiteLLM master key and cloud provider API keys. -# Master key is unique per cluster ({{CLUSTER_ID}} resolved at init). -# Provider keys start empty; patched via `obol model setup`. +# Secret for LiteLLM master key. Provider API keys (ANTHROPIC_API_KEY, +# OPENAI_API_KEY, ...) are patched in by `obol model setup` and the +# autoConfigureLLM hook on first up. We deliberately do NOT declare empty +# placeholders here — re-applying this manifest on every `obol stack up` +# would overwrite any keys the user has set, leaving `obol model status` +# in the misleading state of `enabled: true, api_key: false`. apiVersion: v1 kind: Secret metadata: @@ -107,8 +110,6 @@ metadata: type: Opaque stringData: LITELLM_MASTER_KEY: "sk-obol-{{CLUSTER_ID}}" - ANTHROPIC_API_KEY: "" - OPENAI_API_KEY: "" --- apiVersion: apps/v1 diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 2965a5a3..f4cfd974 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -630,9 +630,15 @@ func autoDetectCloudProvider(cfg *config.Config, u *ui.UI) string { return "" } - // Already configured — skip. - if model.HasProviderConfigured(cfg, provider) { - return "" + // Skip only when both the model entry AND the API key are present. + // A re-up that lost the Secret's API key (or any drift between + // ConfigMap and Secret) heals here by re-patching from the env var, + // instead of leaving `obol model status` reporting + // `enabled: true, api_key: false` as it did before this check. + if statuses, err := model.GetProviderStatus(cfg); err == nil { + if st, ok := statuses[provider]; ok && st.Enabled && st.HasAPIKey { + return "" + } } // Resolve API key: try primary + alt env vars, then .env in dev mode. diff --git a/internal/stack/stack_test.go b/internal/stack/stack_test.go index e1413997..1cb8b2f5 100644 --- a/internal/stack/stack_test.go +++ b/internal/stack/stack_test.go @@ -496,6 +496,20 @@ func TestLLMTemplate_IncludesPaidRouteAndBuyerSidecar(t *testing.T) { if strings.Contains(out, "custom_provider_map") { t.Fatalf("llm template should not require a custom provider:\n%s", out) } + + // Regression guard: provider API keys must not be pre-declared as + // empty placeholders in the bootstrap Secret. If they are, every + // `obol stack up` re-applies the manifest and overwrites whatever + // `obol model setup` (or autoConfigureLLM) patched in, leaving the + // user with `obol model status` reporting `enabled: true, api_key: false`. + for _, forbidden := range []string{ + `ANTHROPIC_API_KEY: ""`, + `OPENAI_API_KEY: ""`, + } { + if strings.Contains(out, forbidden) { + t.Fatalf("llm template must not pre-declare empty provider API keys (found %q) — these get clobbered on every `obol stack up`", forbidden) + } + } } func TestMergeLiteLLMConfigPreservesChartDefaultsAndPreviousModels(t *testing.T) {