From 4aec1e217650a2712b095199983c742b2d35045d Mon Sep 17 00:00:00 2001 From: Sam Xu Date: Thu, 14 May 2026 18:36:21 -0700 Subject: [PATCH 1/2] feat(litellm): in-pod codex device-auth for cluster-IP-bound sessions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ChatGPT binds OAuth sessions to the IP/device that completed device-auth. Laptop-device-auth'd tokens uploaded to the cluster get token_invalidated on first use (confirmed via direct probe today). The cloud-codex-cody pod already proved the fix: device-auth FROM inside the cluster produces sessions ChatGPT keeps alive across cluster usage. This brings that fix one layer up so Nova/Pixel and any future codex agent share the same auth surface (LiteLLM), rather than each agent needing its own pod with its own codex login. What changes: 1. New `codex-cli` sidecar on the LiteLLM pod. Installs codex CLI on first boot, idles. Operator runs: kubectl exec -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1 Completes device-auth in browser; resulting auth.json lands on the shared chatgpt-auth volume as /chatgpt-auth/auth-1.json. Repeat for accounts 2 and 3. 2. codex-auth-rotator now PREFERS pod-side /chatgpt-auth/auth-N.json files when present, and only falls back to env-var-fed tokens (laptop-bound, dead) when no pod-side files exist. Keeps the existing rotation cadence + 429 signal handling unchanged. 3. chatgpt-auth volume can be a PVC (values: litellm.chatgptAuth. persistence.enabled). Required for the cluster-bound flow — emptyDir loses tokens on every pod restart. Dev opts in; defaults stay off so OSS deployments aren't surprised. 4. Adds `strategy.type: Recreate` to the LiteLLM Deployment when the PVC is enabled — RWO single-writer can't hand off cleanly with RollingUpdate. After this lands + operator does device-auth × N from inside the codex-cli sidecar, all dev LLM traffic (openclaw moltbot via LiteLLM chatgpt/ bridge, and any future codex CLI agents pointed at LiteLLM) uses cluster-bound sessions. Nova/Pixel come back to life without another laptop device-auth round. Follow-up: switch cloud-codex-cody to point codex CLI at LiteLLM (model_provider override + virtual key) so Cody routes through the same auth surface instead of needing her own /state/.codex/auth.json. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../templates/agents/litellm-deployment.yaml | 144 +++++++++++++++++- k8s/helm/commonly/values-dev.yaml | 9 ++ k8s/helm/commonly/values.yaml | 16 ++ 3 files changed, 168 insertions(+), 1 deletion(-) diff --git a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml index b8d7b21d..2701dff3 100644 --- a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml +++ b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml @@ -1,4 +1,23 @@ {{- if .Values.litellm.enabled }} +{{- if .Values.litellm.chatgptAuth.persistence.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: litellm-chatgpt-auth + namespace: {{ include "commonly.namespace" . }} + labels: + {{- include "commonly.labels" . | nindent 4 }} + app: litellm +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: {{ .Values.litellm.chatgptAuth.persistence.size | default "1Gi" }} + {{- with .Values.litellm.chatgptAuth.persistence.storageClass }} + storageClassName: {{ . }} + {{- end }} +--- +{{- end }} apiVersion: apps/v1 kind: Deployment metadata: @@ -9,6 +28,13 @@ metadata: app: litellm spec: replicas: 1 + {{- if .Values.litellm.chatgptAuth.persistence.enabled }} + # PVC is RWO single-writer; Recreate ensures the old pod releases before + # the new one tries to attach. Without this, helm-upgrade can hang for + # 5+ minutes on the new pod waiting for the volume. + strategy: + type: Recreate + {{- end }} selector: matchLabels: {{- include "commonly.selectorLabels" . | nindent 6 }} @@ -315,13 +341,53 @@ spec: except Exception as e: print(f'[rotator] save_state failed: {e}', flush=True) + def _read_pod_auth_file(path): + """Read a codex auth.json from disk. Returns (access, refresh, id_token) or None.""" + try: + with open(path) as f: + d = json.load(f) + # codex CLI writes flat shape; legacy rotator-written file also flat + access = d.get('access_token', '') + refresh = d.get('refresh_token', '') + id_tok = d.get('id_token', '') + if access or refresh: + return access, refresh, id_tok + except Exception: + pass + return None + def get_candidates(): + """Build the rotation candidate list. + + PREFERRED: pod-side device-auth'd files at /chatgpt-auth/auth-{1,2,3}.json. + Operator created these via `kubectl exec` into the codex-cli sidecar + + `codex login --device-auth`. Tokens are cluster-IP-bound, so ChatGPT + doesn't invalidate them on cluster usage (the inverse of the env-var + path below where laptop-bound sessions die on first cluster call). + + FALLBACK: env-var-fed tokens from GCP SM. Kept for backward compat with + older operator flows; flagged stale by `--mode envvar` in logs so + it's clear when we're on the dead path. + """ + pod_files = [ + ('1', '/chatgpt-auth/auth-1.json'), + ('2', '/chatgpt-auth/auth-2.json'), + ('3', '/chatgpt-auth/auth-3.json'), + ] + out = [] + for label, path in pod_files: + rec = _read_pod_auth_file(path) + if rec: + access, refresh, id_tok = rec + out.append((label, access, refresh, id_tok)) + if out: + return out + # Fallback: env-var path (legacy, pre-cluster-bound) specs = [ ('1', 'OPENAI_CODEX_ACCESS_TOKEN', 'OPENAI_CODEX_REFRESH_TOKEN', 'OPENAI_CODEX_ID_TOKEN'), ('2', 'OPENAI_CODEX_ACCESS_TOKEN_2', 'OPENAI_CODEX_REFRESH_TOKEN_2', ''), ('3', 'OPENAI_CODEX_ACCESS_TOKEN_3', 'OPENAI_CODEX_REFRESH_TOKEN_3', 'OPENAI_CODEX_ID_TOKEN_3'), ] - out = [] for label, a_env, r_env, i_env in specs: access = os.environ.get(a_env, '') refresh = os.environ.get(r_env, '') @@ -600,12 +666,88 @@ spec: volumeMounts: - name: chatgpt-auth mountPath: /chatgpt-auth + # codex-cli sidecar: provides the codex binary inside the LiteLLM pod + # so an operator can run `codex login --device-auth` from within the + # cluster. The resulting auth.json lands on the shared chatgpt-auth + # PVC (as /chatgpt-auth/auth-N.json), and the codex-auth-rotator + # above prefers those pod-side files over the env-var-fed tokens. + # + # ChatGPT binds OAuth sessions to the IP/device that completed + # device-auth. Doing device-auth on a laptop and uploading tokens + # to the cluster invalidates the session on first cluster use. + # Doing device-auth from inside this pod produces a cluster-IP- + # bound session that survives. See PR #362 / cloud-codex-deployment + # for the per-agent precedent. + # + # Operator flow (one-time per account, after pod is up): + # kubectl exec -n {{ include "commonly.namespace" . }} -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1 + # Sign in to ChatGPT in browser with account #N. Upon success the + # script copies ~/.codex/auth.json to /chatgpt-auth/auth-1.json. + # Repeat for accounts 2 and 3. + - name: codex-cli + image: node:22-bookworm-slim + command: + - /bin/sh + - -c + - | + # Install codex CLI + ca-certs on first boot, then idle so the + # operator can exec into us. The sleep loop is intentional — + # there's no continuous workload here; this sidecar exists + # purely to provide `codex` inside the same pod that holds the + # chatgpt-auth PVC. + if [ ! -x /usr/local/bin/codex ]; then + apt-get update >/dev/null 2>&1 || true + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates >/dev/null 2>&1 || true + update-ca-certificates >/dev/null 2>&1 || true + npm install --global --no-audit --no-fund "@openai/codex@{{ .Values.litellm.codexCli.version | default "0.125.0" }}" >/dev/null 2>&1 || true + fi + mkdir -p /scripts + cat > /scripts/auth-login.sh <<'SCRIPT' + #!/bin/sh + # auth-login.sh + # Runs codex login --device-auth interactively. Operator follows the URL+code + # printed to stdout, completes in browser, and on success the resulting + # auth.json is copied to /chatgpt-auth/auth-N.json for the rotator to pick up. + set -e + N="${1:-}" + if [ -z "$N" ]; then echo "usage: $0 "; exit 1; fi + HOMEDIR="/tmp/codex-login-$N" + rm -rf "$HOMEDIR" + mkdir -p "$HOMEDIR/.codex" + HOME="$HOMEDIR" codex login --device-auth + if [ ! -s "$HOMEDIR/.codex/auth.json" ]; then + echo "auth.json was not written — login did not complete" + exit 1 + fi + cp "$HOMEDIR/.codex/auth.json" "/chatgpt-auth/auth-$N.json" + chmod 600 "/chatgpt-auth/auth-$N.json" + echo "wrote /chatgpt-auth/auth-$N.json — rotator will pick it up on next tick" + SCRIPT + chmod +x /scripts/auth-login.sh + echo "[codex-cli] ready. Run device-auth via:" + echo "[codex-cli] kubectl exec -it -c codex-cli -- /scripts/auth-login.sh <1|2|3>" + while true; do sleep 3600; done + volumeMounts: + - name: chatgpt-auth + mountPath: /chatgpt-auth + resources: + requests: + cpu: 20m + memory: 64Mi + limits: + cpu: 200m + memory: 256Mi volumes: - name: litellm-config configMap: name: litellm-config - name: chatgpt-auth + {{- if .Values.litellm.chatgptAuth.persistence.enabled }} + persistentVolumeClaim: + claimName: litellm-chatgpt-auth + {{- else }} emptyDir: {} + {{- end }} {{- with .Values.litellm.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/k8s/helm/commonly/values-dev.yaml b/k8s/helm/commonly/values-dev.yaml index 4e203849..ea962289 100644 --- a/k8s/helm/commonly/values-dev.yaml +++ b/k8s/helm/commonly/values-dev.yaml @@ -241,6 +241,15 @@ litellm: # rotation gives real multi-account benefit when one account exhausts. codexAuthRotator: enabled: true + # Persist chatgpt-auth across pod restarts so pod-side device-auth'd + # auth-N.json files survive litellm rollouts. Without this every + # helm-upgrade nukes the cluster-bound tokens and Nova/Pixel/Cody + # go silent until the operator re-device-auths. + chatgptAuth: + persistence: + enabled: true + size: 1Gi + storageClass: standard-rwo nodeSelector: pool: dev tolerations: diff --git a/k8s/helm/commonly/values.yaml b/k8s/helm/commonly/values.yaml index 10bbf108..26434c20 100644 --- a/k8s/helm/commonly/values.yaml +++ b/k8s/helm/commonly/values.yaml @@ -369,6 +369,22 @@ litellm: # tokens). Re-enable once the upstream LiteLLM bug is fixed. codexAuthRotator: enabled: true + # Codex CLI sidecar: provides the `codex` binary inside the LiteLLM + # pod so an operator can run device-auth FROM the cluster (not from + # a laptop). The resulting auth.json lands on the chatgpt-auth PVC + # and the codex-auth-rotator picks it up preferentially over env-var + # tokens. See litellm-deployment.yaml for the operator flow. + codexCli: + version: "0.125.0" + # chatgpt-auth volume backs the codex-auth-rotator's auth.json plus + # any pod-side device-auth'd auth-N.json files. Persistence is + # REQUIRED for the cluster-bound auth flow — emptyDir loses tokens + # on every pod restart, forcing re-device-auth every helm-upgrade. + chatgptAuth: + persistence: + enabled: false + size: 1Gi + # storageClass defaults to the cluster's default StorageClass. image: repository: ghcr.io/berriai/litellm tag: v1.82.3-stable From 05ff08af99fdc217fc300c78a3d5934c2bd62036 Mon Sep 17 00:00:00 2001 From: Sam Xu Date: Thu, 14 May 2026 19:10:09 -0700 Subject: [PATCH 2/2] fix(litellm): codex-cli is a sidecar (containers:), not an init container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In PR #365 the codex-cli block landed in the initContainers list by mistake, which made the pod stuck Init:1/2 — codex-cli's sleep loop never exits, so the pod never progressed to Running, and helm-upgrade hit the 10m timeout. Move codex-cli into containers: (sidecar position, after the codex-auth-rotator). LiteLLM main container can now reach Ready while codex-cli idles in parallel waiting for operator exec. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../templates/agents/litellm-deployment.yaml | 142 +++++++++--------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml index 2701dff3..c810790f 100644 --- a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml +++ b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml @@ -492,6 +492,77 @@ spec: - name: chatgpt-auth mountPath: /chatgpt-auth {{- end }} + # codex-cli sidecar: provides the codex binary inside the LiteLLM pod + # so an operator can run `codex login --device-auth` from within the + # cluster. The resulting auth.json lands on the shared chatgpt-auth + # PVC (as /chatgpt-auth/auth-N.json), and the codex-auth-rotator + # above prefers those pod-side files over the env-var-fed tokens. + # + # ChatGPT binds OAuth sessions to the IP/device that completed + # device-auth. Doing device-auth on a laptop and uploading tokens + # to the cluster invalidates the session on first cluster use. + # Doing device-auth from inside this pod produces a cluster-IP- + # bound session that survives. See PR #362 / cloud-codex-deployment + # for the per-agent precedent. + # + # Operator flow (one-time per account, after pod is up): + # kubectl exec -n {{ include "commonly.namespace" . }} -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1 + # Sign in to ChatGPT in browser with account #N. Upon success the + # script copies ~/.codex/auth.json to /chatgpt-auth/auth-1.json. + # Repeat for accounts 2 and 3. + - name: codex-cli + image: node:22-bookworm-slim + command: + - /bin/sh + - -c + - | + # Install codex CLI + ca-certs on first boot, then idle so the + # operator can exec into us. The sleep loop is intentional — + # there's no continuous workload here; this sidecar exists + # purely to provide `codex` inside the same pod that holds the + # chatgpt-auth PVC. + if [ ! -x /usr/local/bin/codex ]; then + apt-get update >/dev/null 2>&1 || true + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates >/dev/null 2>&1 || true + update-ca-certificates >/dev/null 2>&1 || true + npm install --global --no-audit --no-fund "@openai/codex@{{ .Values.litellm.codexCli.version | default "0.125.0" }}" >/dev/null 2>&1 || true + fi + mkdir -p /scripts + cat > /scripts/auth-login.sh <<'SCRIPT' + #!/bin/sh + # auth-login.sh + # Runs codex login --device-auth interactively. Operator follows the URL+code + # printed to stdout, completes in browser, and on success the resulting + # auth.json is copied to /chatgpt-auth/auth-N.json for the rotator to pick up. + set -e + N="${1:-}" + if [ -z "$N" ]; then echo "usage: $0 "; exit 1; fi + HOMEDIR="/tmp/codex-login-$N" + rm -rf "$HOMEDIR" + mkdir -p "$HOMEDIR/.codex" + HOME="$HOMEDIR" codex login --device-auth + if [ ! -s "$HOMEDIR/.codex/auth.json" ]; then + echo "auth.json was not written — login did not complete" + exit 1 + fi + cp "$HOMEDIR/.codex/auth.json" "/chatgpt-auth/auth-$N.json" + chmod 600 "/chatgpt-auth/auth-$N.json" + echo "wrote /chatgpt-auth/auth-$N.json — rotator will pick it up on next tick" + SCRIPT + chmod +x /scripts/auth-login.sh + echo "[codex-cli] ready. Run device-auth via:" + echo "[codex-cli] kubectl exec -it -c codex-cli -- /scripts/auth-login.sh <1|2|3>" + while true; do sleep 3600; done + volumeMounts: + - name: chatgpt-auth + mountPath: /chatgpt-auth + resources: + requests: + cpu: 20m + memory: 64Mi + limits: + cpu: 200m + memory: 256Mi initContainers: # Write the best available (non-expired) Codex token to auth.json for LiteLLM's chatgpt/ provider. # Account-1 entries in litellm-config have no api_key — the chatgpt/ provider reads @@ -666,77 +737,6 @@ spec: volumeMounts: - name: chatgpt-auth mountPath: /chatgpt-auth - # codex-cli sidecar: provides the codex binary inside the LiteLLM pod - # so an operator can run `codex login --device-auth` from within the - # cluster. The resulting auth.json lands on the shared chatgpt-auth - # PVC (as /chatgpt-auth/auth-N.json), and the codex-auth-rotator - # above prefers those pod-side files over the env-var-fed tokens. - # - # ChatGPT binds OAuth sessions to the IP/device that completed - # device-auth. Doing device-auth on a laptop and uploading tokens - # to the cluster invalidates the session on first cluster use. - # Doing device-auth from inside this pod produces a cluster-IP- - # bound session that survives. See PR #362 / cloud-codex-deployment - # for the per-agent precedent. - # - # Operator flow (one-time per account, after pod is up): - # kubectl exec -n {{ include "commonly.namespace" . }} -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1 - # Sign in to ChatGPT in browser with account #N. Upon success the - # script copies ~/.codex/auth.json to /chatgpt-auth/auth-1.json. - # Repeat for accounts 2 and 3. - - name: codex-cli - image: node:22-bookworm-slim - command: - - /bin/sh - - -c - - | - # Install codex CLI + ca-certs on first boot, then idle so the - # operator can exec into us. The sleep loop is intentional — - # there's no continuous workload here; this sidecar exists - # purely to provide `codex` inside the same pod that holds the - # chatgpt-auth PVC. - if [ ! -x /usr/local/bin/codex ]; then - apt-get update >/dev/null 2>&1 || true - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates >/dev/null 2>&1 || true - update-ca-certificates >/dev/null 2>&1 || true - npm install --global --no-audit --no-fund "@openai/codex@{{ .Values.litellm.codexCli.version | default "0.125.0" }}" >/dev/null 2>&1 || true - fi - mkdir -p /scripts - cat > /scripts/auth-login.sh <<'SCRIPT' - #!/bin/sh - # auth-login.sh - # Runs codex login --device-auth interactively. Operator follows the URL+code - # printed to stdout, completes in browser, and on success the resulting - # auth.json is copied to /chatgpt-auth/auth-N.json for the rotator to pick up. - set -e - N="${1:-}" - if [ -z "$N" ]; then echo "usage: $0 "; exit 1; fi - HOMEDIR="/tmp/codex-login-$N" - rm -rf "$HOMEDIR" - mkdir -p "$HOMEDIR/.codex" - HOME="$HOMEDIR" codex login --device-auth - if [ ! -s "$HOMEDIR/.codex/auth.json" ]; then - echo "auth.json was not written — login did not complete" - exit 1 - fi - cp "$HOMEDIR/.codex/auth.json" "/chatgpt-auth/auth-$N.json" - chmod 600 "/chatgpt-auth/auth-$N.json" - echo "wrote /chatgpt-auth/auth-$N.json — rotator will pick it up on next tick" - SCRIPT - chmod +x /scripts/auth-login.sh - echo "[codex-cli] ready. Run device-auth via:" - echo "[codex-cli] kubectl exec -it -c codex-cli -- /scripts/auth-login.sh <1|2|3>" - while true; do sleep 3600; done - volumeMounts: - - name: chatgpt-auth - mountPath: /chatgpt-auth - resources: - requests: - cpu: 20m - memory: 64Mi - limits: - cpu: 200m - memory: 256Mi volumes: - name: litellm-config configMap: