From 360d27510f7eeb31c7ae3bd90eef6e80e5128971 Mon Sep 17 00:00:00 2001 From: Sam Xu Date: Fri, 15 May 2026 00:13:23 -0700 Subject: [PATCH] chore(litellm): retire env-var codex auth path (ADR-014 Phase A) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pod-side device-auth via the codex-cli sidecar (ADR-014) is the live auth source for ChatGPT/Codex. The env-var-fed path (laptop-device-auth tokens uploaded to GCP SM → secret → env var) is dead-on-arrival under cluster-IP-bound OAuth — those tokens 401 token_invalidated on first cluster use regardless of JWT exp. - codex-auth-rotator: drop env-var fallback branch in get_candidates; pod-side /chatgpt-auth/auth-N.json is the only source. Drop unused OPENAI_CODEX_*_{1,2,3} env vars (only CLIENT_ID remains for OAuth refresh). - codex-auth-seed init: replace 150-line env-var seeder with a 40-line pod-side seeder so LiteLLM has auth.json ready before boot (avoids startup race with rotator sidecar). Drops all OPENAI_CODEX_* env vars from init container too. - LiteLLM main container: drop unused OPENAI_CODEX_ACCESS_TOKEN[_2|_3] env vars; litellm-config no longer references them. Net: 212 lines of legacy auth code gone. Phase B follow-ups (#371-#373) cover clawdbot env vars, the daily refresh job, and the presets.ts audit. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../templates/agents/litellm-deployment.yaml | 258 ++++-------------- 1 file changed, 46 insertions(+), 212 deletions(-) diff --git a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml index d99cf918..ef35a030 100644 --- a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml +++ b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml @@ -174,29 +174,11 @@ spec: name: api-keys key: anthropic-api-key optional: true - # chatgpt/ provider reads auth.json from CHATGPT_TOKEN_DIR (written by init container). - # Account-1 token is written to auth.json; accounts 2 & 3 use api_key in litellm_params. + # chatgpt/ provider reads auth.json from CHATGPT_TOKEN_DIR (written by the + # codex-auth-rotator sidecar; source files seeded by operator via the codex-cli + # sidecar). See ADR-014. - name: CHATGPT_TOKEN_DIR value: /chatgpt-auth - # Codex account tokens — account-1 goes to auth.json; accounts 2 & 3 use api_key. - - name: OPENAI_CODEX_ACCESS_TOKEN - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-access-token - optional: true - - name: OPENAI_CODEX_ACCESS_TOKEN_2 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-access-token-2 - optional: true - - name: OPENAI_CODEX_ACCESS_TOKEN_3 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-access-token-3 - optional: true # Force prompt/response storage in spend logs regardless of runtime general_settings. # The config-file setting (store_prompts_in_spend_logs: true) is sometimes shadowed by # the in-memory general_settings dict at startup; the env var is the reliable fallback. @@ -364,15 +346,10 @@ spec: def get_candidates(): """Build the rotation candidate list. - PREFERRED: pod-side device-auth'd files at /chatgpt-auth/auth-{1,2,3}.json. - Operator created these via `kubectl exec` into the codex-cli sidecar + - `codex login --device-auth`. Tokens are cluster-IP-bound, so ChatGPT - doesn't invalidate them on cluster usage (the inverse of the env-var - path below where laptop-bound sessions die on first cluster call). - - FALLBACK: env-var-fed tokens from GCP SM. Kept for backward compat with - older operator flows; flagged stale by `--mode envvar` in logs so - it's clear when we're on the dead path. + Pod-side device-auth'd files at /chatgpt-auth/auth-{1,2,3}.json. + Operator created these via `kubectl exec` into the codex-cli sidecar + + `codex login --device-auth`. Tokens are cluster-IP-bound, so ChatGPT + doesn't invalidate them on cluster usage. See ADR-014. """ pod_files = [ ('1', '/chatgpt-auth/auth-1.json'), @@ -385,20 +362,6 @@ spec: if rec: access, refresh, id_tok = rec out.append((label, access, refresh, id_tok)) - if out: - return out - # Fallback: env-var path (legacy, pre-cluster-bound) - specs = [ - ('1', 'OPENAI_CODEX_ACCESS_TOKEN', 'OPENAI_CODEX_REFRESH_TOKEN', 'OPENAI_CODEX_ID_TOKEN'), - ('2', 'OPENAI_CODEX_ACCESS_TOKEN_2', 'OPENAI_CODEX_REFRESH_TOKEN_2', ''), - ('3', 'OPENAI_CODEX_ACCESS_TOKEN_3', 'OPENAI_CODEX_REFRESH_TOKEN_3', 'OPENAI_CODEX_ID_TOKEN_3'), - ] - for label, a_env, r_env, i_env in specs: - access = os.environ.get(a_env, '') - refresh = os.environ.get(r_env, '') - id_tok = os.environ.get(i_env, '') if i_env else '' - if access or refresh: - out.append((label, access, refresh, id_tok)) return out def write_auth(label, access, refresh, id_tok, exp): @@ -466,30 +429,6 @@ spec: # Can be overridden per-deploy via Helm if you have one dedicated # cluster-only account and don't need rotation at all. value: "1800" - - name: OPENAI_CODEX_ACCESS_TOKEN - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-access-token, optional: true} - - name: OPENAI_CODEX_REFRESH_TOKEN - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-refresh-token, optional: true} - - name: OPENAI_CODEX_ID_TOKEN - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-id-token, optional: true} - - name: OPENAI_CODEX_ACCESS_TOKEN_2 - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-access-token-2, optional: true} - - name: OPENAI_CODEX_REFRESH_TOKEN_2 - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-refresh-token-2, optional: true} - - name: OPENAI_CODEX_ACCESS_TOKEN_3 - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-access-token-3, optional: true} - - name: OPENAI_CODEX_REFRESH_TOKEN_3 - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-refresh-token-3, optional: true} - - name: OPENAI_CODEX_ID_TOKEN_3 - valueFrom: - secretKeyRef: {name: api-keys, key: openai-codex-id-token-3, optional: true} - name: OPENAI_CODEX_CLIENT_ID valueFrom: secretKeyRef: {name: api-keys, key: openai-codex-client-id, optional: true} @@ -569,21 +508,22 @@ spec: cpu: 200m memory: 256Mi initContainers: - # Write the best available (non-expired) Codex token to auth.json for LiteLLM's chatgpt/ provider. - # Account-1 entries in litellm-config have no api_key — the chatgpt/ provider reads - # CHATGPT_TOKEN_DIR/auth.json at startup. Accounts 2 & 3 use api_key in litellm_params directly. - # Priority: account-3 first (quota available), then account-1 as fallback, else empty (disables chatgpt/ provider). - # NOTE: chatgpt/ provider ignores api_key in litellm_params — all accounts share this single auth.json. - # There is no per-deployment rotation; whichever account is written here is used for ALL chatgpt/ calls. + # Seed /chatgpt-auth/auth.json from a pod-side device-auth'd file before LiteLLM + # starts, so the chatgpt/ provider has a token to read on first boot. This avoids + # a startup race with codex-auth-rotator (which runs as a sidecar and only writes + # auth.json on its first scheduled tick). If no pod-side auth-N.json exists yet + # (fresh cluster — operator hasn't run auth-login.sh), writes empty auth.json + # and the chatgpt/ provider stays disabled until the operator seeds an account. + # + # All OAuth refresh + multi-account rotation logic lives in the rotator sidecar. + # See ADR-014. - name: codex-auth-seed image: python:3-alpine command: - python3 - -c - | - import os, sys, base64, json, datetime, urllib.request, urllib.parse, urllib.error - - OAUTH_URL = 'https://auth.openai.com/oauth/token' + import json, os, sys, base64 def token_exp(tok): if not tok: @@ -595,150 +535,44 @@ spec: except Exception: return 0 - def refresh_token(refresh_tok, client_id, label): - """Exchange refresh_token for fresh access_token. Returns (access, refresh, id_token, exp) or None.""" - if not refresh_tok or not client_id: - return None - body = urllib.parse.urlencode({ - 'grant_type': 'refresh_token', - 'refresh_token': refresh_tok, - 'client_id': client_id, - 'scope': 'openid profile email', - }).encode('utf-8') - req = urllib.request.Request( - OAUTH_URL, - data=body, - headers={'Content-Type': 'application/x-www-form-urlencoded'}, - ) + def read_pod_auth(path): try: - with urllib.request.urlopen(req, timeout=15) as resp: - payload = json.loads(resp.read().decode('utf-8')) - except urllib.error.HTTPError as e: - err_body = e.read().decode('utf-8', errors='replace')[:200] - print(f' [{label}] refresh failed: HTTP {e.code} {err_body}') - return None - except Exception as e: - print(f' [{label}] refresh failed: {e}') + with open(path) as f: + d = json.load(f) + except Exception: return None - new_access = payload.get('access_token', '') - new_id = payload.get('id_token', '') - new_refresh = payload.get('refresh_token', refresh_tok) - exp = token_exp(new_access) or (int(datetime.datetime.now(datetime.timezone.utc).timestamp()) + int(payload.get('expires_in', 0) or 0)) - if not new_access: - print(f' [{label}] refresh returned no access_token') + tokens = d.get('tokens') if isinstance(d.get('tokens'), dict) else d + access = tokens.get('access_token', '') or d.get('access_token', '') + refresh = tokens.get('refresh_token', '') or d.get('refresh_token', '') + id_tok = tokens.get('id_token', '') or d.get('id_token', '') + if not (access or refresh): return None - return (new_access, new_refresh, new_id, exp) - - now_ts = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) - client_id = os.environ.get('OPENAI_CODEX_CLIENT_ID', '') - - # All three accounts in preferred order. Account-1 is primary; 3 and 2 are - # fallbacks only used when account-1's tokens are expired AND unrefreshable. - candidates = [ - ('', os.environ.get('OPENAI_CODEX_ACCESS_TOKEN', ''), os.environ.get('OPENAI_CODEX_REFRESH_TOKEN', ''), os.environ.get('OPENAI_CODEX_ID_TOKEN', '')), - ('3', os.environ.get('OPENAI_CODEX_ACCESS_TOKEN_3', ''), os.environ.get('OPENAI_CODEX_REFRESH_TOKEN_3', ''), os.environ.get('OPENAI_CODEX_ID_TOKEN_3', '')), - ('2', os.environ.get('OPENAI_CODEX_ACCESS_TOKEN_2', ''), os.environ.get('OPENAI_CODEX_REFRESH_TOKEN_2', ''), ''), - ] + return access, refresh, id_tok chosen = None - for (suffix, access, refresh, id_tok) in candidates: - label = f'account-{"1" if suffix == "" else suffix}' - if not access and not refresh: - print(f'{label}: no tokens configured, skipping') + for label in ('1', '2', '3'): + rec = read_pod_auth(f'/chatgpt-auth/auth-{label}.json') + if not rec: continue - exp = token_exp(access) if access else 0 - # Prefer valid access token (with 60s buffer to avoid edge-race expiry) - if access and exp > now_ts + 60: - chosen = (suffix, access, refresh, id_tok, exp) - print(f'{label}: access_token valid (expires {exp}, +{exp - now_ts}s)') - break - # Otherwise try refresh - if refresh and client_id: - print(f'{label}: access_token expired/missing, refreshing…') - refreshed = refresh_token(refresh, client_id, label) - if refreshed: - new_access, new_refresh, new_id, new_exp = refreshed - print(f'{label}: refreshed successfully (expires {new_exp}, +{new_exp - now_ts}s)') - chosen = (suffix, new_access, new_refresh, new_id or id_tok, new_exp) - break - else: - print(f'{label}: no refresh_token or client_id, skipping') - - if not chosen: - with open('/chatgpt-auth/auth.json', 'w') as f: - json.dump({}, f) - print('No valid Codex token found after checking all accounts — wrote empty auth.json') - sys.exit(0) - - suffix, access, refresh, id_tok, exp = chosen - out = {'access_token': access, 'expires_at': exp} - if refresh: - out['refresh_token'] = refresh - if id_tok: - out['id_token'] = id_tok + access, refresh, id_tok = rec + chosen = (label, access, refresh, id_tok, token_exp(access)) + print(f'seed: using pod-side auth-{label}.json (exp={chosen[4]})') + break + + out = {} + if chosen: + label, access, refresh, id_tok, exp = chosen + out = {'access_token': access, 'expires_at': exp} + if refresh: + out['refresh_token'] = refresh + if id_tok: + out['id_token'] = id_tok + else: + print('seed: no pod-side auth-N.json found — writing empty auth.json. ' + 'Run `/scripts/auth-login.sh ` from the codex-cli sidecar to seed.') with open('/chatgpt-auth/auth.json', 'w') as f: json.dump(out, f) - - exp_str = datetime.datetime.fromtimestamp(exp, datetime.timezone.utc).isoformat() - print(f'auth.json written — account-{"1" if suffix == "" else suffix} expires_at={exp} ({exp_str})') - env: - - name: OPENAI_CODEX_ACCESS_TOKEN - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-access-token - optional: true - - name: OPENAI_CODEX_REFRESH_TOKEN - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-refresh-token - optional: true - - name: OPENAI_CODEX_ID_TOKEN - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-id-token - optional: true - - name: OPENAI_CODEX_ACCESS_TOKEN_3 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-access-token-3 - optional: true - - name: OPENAI_CODEX_REFRESH_TOKEN_3 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-refresh-token-3 - optional: true - - name: OPENAI_CODEX_ID_TOKEN_3 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-id-token-3 - optional: true - # Account-2: used when accounts 1 and 3 are exhausted/expired - - name: OPENAI_CODEX_ACCESS_TOKEN_2 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-access-token-2 - optional: true - - name: OPENAI_CODEX_REFRESH_TOKEN_2 - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-refresh-token-2 - optional: true - # Client ID for OAuth refresh flow - - name: OPENAI_CODEX_CLIENT_ID - valueFrom: - secretKeyRef: - name: api-keys - key: openai-codex-client-id - optional: true volumeMounts: - name: chatgpt-auth mountPath: /chatgpt-auth