From 4aec1e217650a2712b095199983c742b2d35045d Mon Sep 17 00:00:00 2001
From: Sam Xu <xcjsam@g.ucla.edu>
Date: Thu, 14 May 2026 18:36:21 -0700
Subject: [PATCH 1/2] feat(litellm): in-pod codex device-auth for
 cluster-IP-bound sessions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ChatGPT binds OAuth sessions to the IP/device that completed device-auth.
Laptop-device-auth'd tokens uploaded to the cluster get token_invalidated
on first use (confirmed via direct probe today). The cloud-codex-cody pod
already proved the fix: device-auth FROM inside the cluster produces
sessions ChatGPT keeps alive across cluster usage.

This brings that fix one layer up so Nova/Pixel and any future codex
agent share the same auth surface (LiteLLM), rather than each agent
needing its own pod with its own codex login.

What changes:

1. New `codex-cli` sidecar on the LiteLLM pod. Installs codex CLI on
   first boot, idles. Operator runs:
     kubectl exec -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1
   Completes device-auth in browser; resulting auth.json lands on the
   shared chatgpt-auth volume as /chatgpt-auth/auth-1.json. Repeat for
   accounts 2 and 3.

2. codex-auth-rotator now PREFERS pod-side /chatgpt-auth/auth-N.json
   files when present, and only falls back to env-var-fed tokens
   (laptop-bound, dead) when no pod-side files exist. Keeps the existing
   rotation cadence + 429 signal handling unchanged.

3. chatgpt-auth volume can be a PVC (values: litellm.chatgptAuth.
   persistence.enabled). Required for the cluster-bound flow — emptyDir
   loses tokens on every pod restart. Dev opts in; defaults stay off
   so OSS deployments aren't surprised.

4. Adds `strategy.type: Recreate` to the LiteLLM Deployment when the
   PVC is enabled — RWO single-writer can't hand off cleanly with
   RollingUpdate.

After this lands + operator does device-auth × N from inside the
codex-cli sidecar, all dev LLM traffic (openclaw moltbot via LiteLLM
chatgpt/ bridge, and any future codex CLI agents pointed at LiteLLM)
uses cluster-bound sessions. Nova/Pixel come back to life without
another laptop device-auth round.

Follow-up: switch cloud-codex-cody to point codex CLI at LiteLLM
(model_provider override + virtual key) so Cody routes through the
same auth surface instead of needing her own /state/.codex/auth.json.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../templates/agents/litellm-deployment.yaml  | 144 +++++++++++++++++-
 k8s/helm/commonly/values-dev.yaml             |   9 ++
 k8s/helm/commonly/values.yaml                 |  16 ++
 3 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml
index b8d7b21d..2701dff3 100644
--- a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml
+++ b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml
@@ -1,4 +1,23 @@
 {{- if .Values.litellm.enabled }}
+{{- if .Values.litellm.chatgptAuth.persistence.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: litellm-chatgpt-auth
+  namespace: {{ include "commonly.namespace" . }}
+  labels:
+    {{- include "commonly.labels" . | nindent 4 }}
+    app: litellm
+spec:
+  accessModes: ["ReadWriteOnce"]
+  resources:
+    requests:
+      storage: {{ .Values.litellm.chatgptAuth.persistence.size | default "1Gi" }}
+  {{- with .Values.litellm.chatgptAuth.persistence.storageClass }}
+  storageClassName: {{ . }}
+  {{- end }}
+---
+{{- end }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -9,6 +28,13 @@ metadata:
     app: litellm
 spec:
   replicas: 1
+  {{- if .Values.litellm.chatgptAuth.persistence.enabled }}
+  # PVC is RWO single-writer; Recreate ensures the old pod releases before
+  # the new one tries to attach. Without this, helm-upgrade can hang for
+  # 5+ minutes on the new pod waiting for the volume.
+  strategy:
+    type: Recreate
+  {{- end }}
   selector:
     matchLabels:
       {{- include "commonly.selectorLabels" . | nindent 6 }}
@@ -315,13 +341,53 @@ spec:
               except Exception as e:
                   print(f'[rotator] save_state failed: {e}', flush=True)
 
+          def _read_pod_auth_file(path):
+              """Read a codex auth.json from disk. Returns (access, refresh, id_token) or None."""
+              try:
+                  with open(path) as f:
+                      d = json.load(f)
+                  # codex CLI writes flat shape; legacy rotator-written file also flat
+                  access = d.get('access_token', '')
+                  refresh = d.get('refresh_token', '')
+                  id_tok = d.get('id_token', '')
+                  if access or refresh:
+                      return access, refresh, id_tok
+              except Exception:
+                  pass
+              return None
+
           def get_candidates():
+              """Build the rotation candidate list.
+
+              PREFERRED: pod-side device-auth'd files at /chatgpt-auth/auth-{1,2,3}.json.
+                  Operator created these via `kubectl exec` into the codex-cli sidecar +
+                  `codex login --device-auth`. Tokens are cluster-IP-bound, so ChatGPT
+                  doesn't invalidate them on cluster usage (the inverse of the env-var
+                  path below where laptop-bound sessions die on first cluster call).
+
+              FALLBACK: env-var-fed tokens from GCP SM. Kept for backward compat with
+                  older operator flows; flagged stale by `--mode envvar` in logs so
+                  it's clear when we're on the dead path.
+              """
+              pod_files = [
+                  ('1', '/chatgpt-auth/auth-1.json'),
+                  ('2', '/chatgpt-auth/auth-2.json'),
+                  ('3', '/chatgpt-auth/auth-3.json'),
+              ]
+              out = []
+              for label, path in pod_files:
+                  rec = _read_pod_auth_file(path)
+                  if rec:
+                      access, refresh, id_tok = rec
+                      out.append((label, access, refresh, id_tok))
+              if out:
+                  return out
+              # Fallback: env-var path (legacy, pre-cluster-bound)
               specs = [
                   ('1', 'OPENAI_CODEX_ACCESS_TOKEN', 'OPENAI_CODEX_REFRESH_TOKEN', 'OPENAI_CODEX_ID_TOKEN'),
                   ('2', 'OPENAI_CODEX_ACCESS_TOKEN_2', 'OPENAI_CODEX_REFRESH_TOKEN_2', ''),
                   ('3', 'OPENAI_CODEX_ACCESS_TOKEN_3', 'OPENAI_CODEX_REFRESH_TOKEN_3', 'OPENAI_CODEX_ID_TOKEN_3'),
               ]
-              out = []
               for label, a_env, r_env, i_env in specs:
                   access = os.environ.get(a_env, '')
                   refresh = os.environ.get(r_env, '')
@@ -600,12 +666,88 @@ spec:
         volumeMounts:
         - name: chatgpt-auth
           mountPath: /chatgpt-auth
+      # codex-cli sidecar: provides the codex binary inside the LiteLLM pod
+      # so an operator can run `codex login --device-auth` from within the
+      # cluster. The resulting auth.json lands on the shared chatgpt-auth
+      # PVC (as /chatgpt-auth/auth-N.json), and the codex-auth-rotator
+      # above prefers those pod-side files over the env-var-fed tokens.
+      #
+      # ChatGPT binds OAuth sessions to the IP/device that completed
+      # device-auth. Doing device-auth on a laptop and uploading tokens
+      # to the cluster invalidates the session on first cluster use.
+      # Doing device-auth from inside this pod produces a cluster-IP-
+      # bound session that survives. See PR #362 / cloud-codex-deployment
+      # for the per-agent precedent.
+      #
+      # Operator flow (one-time per account, after pod is up):
+      #   kubectl exec -n {{ include "commonly.namespace" . }} -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1
+      # Sign in to ChatGPT in browser with account #N. Upon success the
+      # script copies ~/.codex/auth.json to /chatgpt-auth/auth-1.json.
+      # Repeat for accounts 2 and 3.
+      - name: codex-cli
+        image: node:22-bookworm-slim
+        command:
+        - /bin/sh
+        - -c
+        - |
+          # Install codex CLI + ca-certs on first boot, then idle so the
+          # operator can exec into us. The sleep loop is intentional —
+          # there's no continuous workload here; this sidecar exists
+          # purely to provide `codex` inside the same pod that holds the
+          # chatgpt-auth PVC.
+          if [ ! -x /usr/local/bin/codex ]; then
+            apt-get update >/dev/null 2>&1 || true
+            DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates >/dev/null 2>&1 || true
+            update-ca-certificates >/dev/null 2>&1 || true
+            npm install --global --no-audit --no-fund "@openai/codex@{{ .Values.litellm.codexCli.version | default "0.125.0" }}" >/dev/null 2>&1 || true
+          fi
+          mkdir -p /scripts
+          cat > /scripts/auth-login.sh <<'SCRIPT'
+          #!/bin/sh
+          # auth-login.sh <account-number>
+          # Runs codex login --device-auth interactively. Operator follows the URL+code
+          # printed to stdout, completes in browser, and on success the resulting
+          # auth.json is copied to /chatgpt-auth/auth-N.json for the rotator to pick up.
+          set -e
+          N="${1:-}"
+          if [ -z "$N" ]; then echo "usage: $0 <account-number>"; exit 1; fi
+          HOMEDIR="/tmp/codex-login-$N"
+          rm -rf "$HOMEDIR"
+          mkdir -p "$HOMEDIR/.codex"
+          HOME="$HOMEDIR" codex login --device-auth
+          if [ ! -s "$HOMEDIR/.codex/auth.json" ]; then
+            echo "auth.json was not written — login did not complete"
+            exit 1
+          fi
+          cp "$HOMEDIR/.codex/auth.json" "/chatgpt-auth/auth-$N.json"
+          chmod 600 "/chatgpt-auth/auth-$N.json"
+          echo "wrote /chatgpt-auth/auth-$N.json — rotator will pick it up on next tick"
+          SCRIPT
+          chmod +x /scripts/auth-login.sh
+          echo "[codex-cli] ready. Run device-auth via:"
+          echo "[codex-cli]   kubectl exec -it <litellm-pod> -c codex-cli -- /scripts/auth-login.sh <1|2|3>"
+          while true; do sleep 3600; done
+        volumeMounts:
+        - name: chatgpt-auth
+          mountPath: /chatgpt-auth
+        resources:
+          requests:
+            cpu: 20m
+            memory: 64Mi
+          limits:
+            cpu: 200m
+            memory: 256Mi
       volumes:
       - name: litellm-config
         configMap:
           name: litellm-config
       - name: chatgpt-auth
+        {{- if .Values.litellm.chatgptAuth.persistence.enabled }}
+        persistentVolumeClaim:
+          claimName: litellm-chatgpt-auth
+        {{- else }}
         emptyDir: {}
+        {{- end }}
       {{- with .Values.litellm.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
diff --git a/k8s/helm/commonly/values-dev.yaml b/k8s/helm/commonly/values-dev.yaml
index 4e203849..ea962289 100644
--- a/k8s/helm/commonly/values-dev.yaml
+++ b/k8s/helm/commonly/values-dev.yaml
@@ -241,6 +241,15 @@ litellm:
   # rotation gives real multi-account benefit when one account exhausts.
   codexAuthRotator:
     enabled: true
+  # Persist chatgpt-auth across pod restarts so pod-side device-auth'd
+  # auth-N.json files survive litellm rollouts. Without this every
+  # helm-upgrade nukes the cluster-bound tokens and Nova/Pixel/Cody
+  # go silent until the operator re-device-auths.
+  chatgptAuth:
+    persistence:
+      enabled: true
+      size: 1Gi
+      storageClass: standard-rwo
   nodeSelector:
     pool: dev
   tolerations:
diff --git a/k8s/helm/commonly/values.yaml b/k8s/helm/commonly/values.yaml
index 10bbf108..26434c20 100644
--- a/k8s/helm/commonly/values.yaml
+++ b/k8s/helm/commonly/values.yaml
@@ -369,6 +369,22 @@ litellm:
   # tokens). Re-enable once the upstream LiteLLM bug is fixed.
   codexAuthRotator:
     enabled: true
+  # Codex CLI sidecar: provides the `codex` binary inside the LiteLLM
+  # pod so an operator can run device-auth FROM the cluster (not from
+  # a laptop). The resulting auth.json lands on the chatgpt-auth PVC
+  # and the codex-auth-rotator picks it up preferentially over env-var
+  # tokens. See litellm-deployment.yaml for the operator flow.
+  codexCli:
+    version: "0.125.0"
+  # chatgpt-auth volume backs the codex-auth-rotator's auth.json plus
+  # any pod-side device-auth'd auth-N.json files. Persistence is
+  # REQUIRED for the cluster-bound auth flow — emptyDir loses tokens
+  # on every pod restart, forcing re-device-auth every helm-upgrade.
+  chatgptAuth:
+    persistence:
+      enabled: false
+      size: 1Gi
+      # storageClass defaults to the cluster's default StorageClass.
   image:
     repository: ghcr.io/berriai/litellm
     tag: v1.82.3-stable

From 05ff08af99fdc217fc300c78a3d5934c2bd62036 Mon Sep 17 00:00:00 2001
From: Sam Xu <xcjsam@g.ucla.edu>
Date: Thu, 14 May 2026 19:10:09 -0700
Subject: [PATCH 2/2] fix(litellm): codex-cli is a sidecar (containers:), not
 an init container
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In PR #365 the codex-cli block landed in the initContainers list by
mistake, which made the pod stuck Init:1/2 — codex-cli's sleep loop
never exits, so the pod never progressed to Running, and helm-upgrade
hit the 10m timeout.

Move codex-cli into containers: (sidecar position, after the
codex-auth-rotator). LiteLLM main container can now reach Ready
while codex-cli idles in parallel waiting for operator exec.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../templates/agents/litellm-deployment.yaml  | 142 +++++++++---------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml
index 2701dff3..c810790f 100644
--- a/k8s/helm/commonly/templates/agents/litellm-deployment.yaml
+++ b/k8s/helm/commonly/templates/agents/litellm-deployment.yaml
@@ -492,6 +492,77 @@ spec:
         - name: chatgpt-auth
           mountPath: /chatgpt-auth
       {{- end }}
+      # codex-cli sidecar: provides the codex binary inside the LiteLLM pod
+      # so an operator can run `codex login --device-auth` from within the
+      # cluster. The resulting auth.json lands on the shared chatgpt-auth
+      # PVC (as /chatgpt-auth/auth-N.json), and the codex-auth-rotator
+      # above prefers those pod-side files over the env-var-fed tokens.
+      #
+      # ChatGPT binds OAuth sessions to the IP/device that completed
+      # device-auth. Doing device-auth on a laptop and uploading tokens
+      # to the cluster invalidates the session on first cluster use.
+      # Doing device-auth from inside this pod produces a cluster-IP-
+      # bound session that survives. See PR #362 / cloud-codex-deployment
+      # for the per-agent precedent.
+      #
+      # Operator flow (one-time per account, after pod is up):
+      #   kubectl exec -n {{ include "commonly.namespace" . }} -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1
+      # Sign in to ChatGPT in browser with account #N. Upon success the
+      # script copies ~/.codex/auth.json to /chatgpt-auth/auth-1.json.
+      # Repeat for accounts 2 and 3.
+      - name: codex-cli
+        image: node:22-bookworm-slim
+        command:
+        - /bin/sh
+        - -c
+        - |
+          # Install codex CLI + ca-certs on first boot, then idle so the
+          # operator can exec into us. The sleep loop is intentional —
+          # there's no continuous workload here; this sidecar exists
+          # purely to provide `codex` inside the same pod that holds the
+          # chatgpt-auth PVC.
+          if [ ! -x /usr/local/bin/codex ]; then
+            apt-get update >/dev/null 2>&1 || true
+            DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates >/dev/null 2>&1 || true
+            update-ca-certificates >/dev/null 2>&1 || true
+            npm install --global --no-audit --no-fund "@openai/codex@{{ .Values.litellm.codexCli.version | default "0.125.0" }}" >/dev/null 2>&1 || true
+          fi
+          mkdir -p /scripts
+          cat > /scripts/auth-login.sh <<'SCRIPT'
+          #!/bin/sh
+          # auth-login.sh <account-number>
+          # Runs codex login --device-auth interactively. Operator follows the URL+code
+          # printed to stdout, completes in browser, and on success the resulting
+          # auth.json is copied to /chatgpt-auth/auth-N.json for the rotator to pick up.
+          set -e
+          N="${1:-}"
+          if [ -z "$N" ]; then echo "usage: $0 <account-number>"; exit 1; fi
+          HOMEDIR="/tmp/codex-login-$N"
+          rm -rf "$HOMEDIR"
+          mkdir -p "$HOMEDIR/.codex"
+          HOME="$HOMEDIR" codex login --device-auth
+          if [ ! -s "$HOMEDIR/.codex/auth.json" ]; then
+            echo "auth.json was not written — login did not complete"
+            exit 1
+          fi
+          cp "$HOMEDIR/.codex/auth.json" "/chatgpt-auth/auth-$N.json"
+          chmod 600 "/chatgpt-auth/auth-$N.json"
+          echo "wrote /chatgpt-auth/auth-$N.json — rotator will pick it up on next tick"
+          SCRIPT
+          chmod +x /scripts/auth-login.sh
+          echo "[codex-cli] ready. Run device-auth via:"
+          echo "[codex-cli]   kubectl exec -it <litellm-pod> -c codex-cli -- /scripts/auth-login.sh <1|2|3>"
+          while true; do sleep 3600; done
+        volumeMounts:
+        - name: chatgpt-auth
+          mountPath: /chatgpt-auth
+        resources:
+          requests:
+            cpu: 20m
+            memory: 64Mi
+          limits:
+            cpu: 200m
+            memory: 256Mi
       initContainers:
       # Write the best available (non-expired) Codex token to auth.json for LiteLLM's chatgpt/ provider.
       # Account-1 entries in litellm-config have no api_key — the chatgpt/ provider reads
@@ -666,77 +737,6 @@ spec:
         volumeMounts:
         - name: chatgpt-auth
           mountPath: /chatgpt-auth
-      # codex-cli sidecar: provides the codex binary inside the LiteLLM pod
-      # so an operator can run `codex login --device-auth` from within the
-      # cluster. The resulting auth.json lands on the shared chatgpt-auth
-      # PVC (as /chatgpt-auth/auth-N.json), and the codex-auth-rotator
-      # above prefers those pod-side files over the env-var-fed tokens.
-      #
-      # ChatGPT binds OAuth sessions to the IP/device that completed
-      # device-auth. Doing device-auth on a laptop and uploading tokens
-      # to the cluster invalidates the session on first cluster use.
-      # Doing device-auth from inside this pod produces a cluster-IP-
-      # bound session that survives. See PR #362 / cloud-codex-deployment
-      # for the per-agent precedent.
-      #
-      # Operator flow (one-time per account, after pod is up):
-      #   kubectl exec -n {{ include "commonly.namespace" . }} -it deploy/litellm -c codex-cli -- /scripts/auth-login.sh 1
-      # Sign in to ChatGPT in browser with account #N. Upon success the
-      # script copies ~/.codex/auth.json to /chatgpt-auth/auth-1.json.
-      # Repeat for accounts 2 and 3.
-      - name: codex-cli
-        image: node:22-bookworm-slim
-        command:
-        - /bin/sh
-        - -c
-        - |
-          # Install codex CLI + ca-certs on first boot, then idle so the
-          # operator can exec into us. The sleep loop is intentional —
-          # there's no continuous workload here; this sidecar exists
-          # purely to provide `codex` inside the same pod that holds the
-          # chatgpt-auth PVC.
-          if [ ! -x /usr/local/bin/codex ]; then
-            apt-get update >/dev/null 2>&1 || true
-            DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates >/dev/null 2>&1 || true
-            update-ca-certificates >/dev/null 2>&1 || true
-            npm install --global --no-audit --no-fund "@openai/codex@{{ .Values.litellm.codexCli.version | default "0.125.0" }}" >/dev/null 2>&1 || true
-          fi
-          mkdir -p /scripts
-          cat > /scripts/auth-login.sh <<'SCRIPT'
-          #!/bin/sh
-          # auth-login.sh <account-number>
-          # Runs codex login --device-auth interactively. Operator follows the URL+code
-          # printed to stdout, completes in browser, and on success the resulting
-          # auth.json is copied to /chatgpt-auth/auth-N.json for the rotator to pick up.
-          set -e
-          N="${1:-}"
-          if [ -z "$N" ]; then echo "usage: $0 <account-number>"; exit 1; fi
-          HOMEDIR="/tmp/codex-login-$N"
-          rm -rf "$HOMEDIR"
-          mkdir -p "$HOMEDIR/.codex"
-          HOME="$HOMEDIR" codex login --device-auth
-          if [ ! -s "$HOMEDIR/.codex/auth.json" ]; then
-            echo "auth.json was not written — login did not complete"
-            exit 1
-          fi
-          cp "$HOMEDIR/.codex/auth.json" "/chatgpt-auth/auth-$N.json"
-          chmod 600 "/chatgpt-auth/auth-$N.json"
-          echo "wrote /chatgpt-auth/auth-$N.json — rotator will pick it up on next tick"
-          SCRIPT
-          chmod +x /scripts/auth-login.sh
-          echo "[codex-cli] ready. Run device-auth via:"
-          echo "[codex-cli]   kubectl exec -it <litellm-pod> -c codex-cli -- /scripts/auth-login.sh <1|2|3>"
-          while true; do sleep 3600; done
-        volumeMounts:
-        - name: chatgpt-auth
-          mountPath: /chatgpt-auth
-        resources:
-          requests:
-            cpu: 20m
-            memory: 64Mi
-          limits:
-            cpu: 200m
-            memory: 256Mi
       volumes:
       - name: litellm-config
         configMap: