From 64a4a3b0c7ec1943ad265cf6430a176d43f7f3ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Sun, 3 May 2026 21:56:46 +0100
Subject: [PATCH 1/9] Allow hermes messaging

---
 internal/hermes/hermes.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
index 108fdfd2..cd9ff6b0 100644
--- a/internal/hermes/hermes.go
+++ b/internal/hermes/hermes.go
@@ -844,15 +844,16 @@ func generateValues(namespace, hostname, dashboardHostname, agentBaseURL, token,
                   fi
                   cd "$install_dir"
                   # Reinstall when the venv is missing the hermes binary OR
-                  # when the dashboard's web extra (fastapi/uvicorn) is absent.
-                  # The upstream image installs ".[all]" (which pulls in
-                  # ".[web]"); we re-create the venv from a fresh clone, so
-                  # the extras must be re-requested explicitly here.
+                  # any selected extra is absent. The upstream image installs
+                  # ".[all]"; we re-create the venv from a fresh clone, so the
+                  # extras must be re-requested explicitly. The import check
+                  # picks one module per extra so existing PVCs trigger a
+                  # rebuild when we add a new extra to the install line.
                   if [ ! -x "$install_dir/venv/bin/hermes" ] || \
-                     ! "$install_dir/venv/bin/python3" -c "import fastapi, uvicorn" >/dev/null 2>&1; then
+                     ! "$install_dir/venv/bin/python3" -c "import fastapi, uvicorn, telegram, mcp, ptyprocess, simple_term_menu, googleapiclient" >/dev/null 2>&1; then
                     rm -rf "$install_dir/venv"
                     uv venv --python python3 --system-site-packages venv
-                    VIRTUAL_ENV="$install_dir/venv" uv pip install -e ".[web]"
+                    VIRTUAL_ENV="$install_dir/venv" uv pip install -e ".[web,messaging,mcp,pty,cli,acp,google]"
                   fi
                   if [ -f /data/.hermes/state.db ]; then
                     if ! python3 - <<'PY'

From 2bc578301ccfa2b82b8c929fea94b8c1fe13e5f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Sun, 3 May 2026 22:21:08 +0100
Subject: [PATCH 2/9] fix(store): being served old docker images and not
 detecting a new one

---
 .../workflows/docker-publish-storefront.yml   |  10 +-
 .github/workflows/docker-publish-x402.yml     |   5 +
 cmd/obol/sell.go                              |   3 +-
 internal/images/images.go                     |  54 ++++++++++
 internal/images/images_test.go                | 102 ++++++++++++++++++
 internal/tunnel/tunnel.go                     |   3 +-
 6 files changed, 174 insertions(+), 3 deletions(-)
 create mode 100644 internal/images/images.go
 create mode 100644 internal/images/images_test.go

diff --git a/.github/workflows/docker-publish-storefront.yml b/.github/workflows/docker-publish-storefront.yml
index 3b944668..f78721fc 100644
--- a/.github/workflows/docker-publish-storefront.yml
+++ b/.github/workflows/docker-publish-storefront.yml
@@ -51,7 +51,15 @@ jobs:
           tags: |
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
-            type=sha,prefix=
+            # Long SHA: needed by the security-scan step which references
+            # ${{ github.sha }} (40-char). Without it Trivy fails with
+            # MANIFEST_UNKNOWN. Same bug the x402 workflow used to have.
+            type=sha,format=long,prefix=
+            # Short SHA: matches the obol binary's version.GitCommit (set via
+            # ldflags from `git rev-parse --short HEAD`). internal/images.Resolve
+            # uses it to commit-pin the storefront deployment so binary upgrades
+            # actually roll the pod.
+            type=sha,format=short,prefix=
             type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
           labels: |
             org.opencontainers.image.title=obol-stack-public-storefront
diff --git a/.github/workflows/docker-publish-x402.yml b/.github/workflows/docker-publish-x402.yml
index 4733c412..f66e7b46 100644
--- a/.github/workflows/docker-publish-x402.yml
+++ b/.github/workflows/docker-publish-x402.yml
@@ -91,6 +91,11 @@ jobs:
             # Previously `type=sha,prefix=` produced the 7-char short SHA,
             # causing Trivy to fail with MANIFEST_UNKNOWN on every run.
             type=sha,format=long,prefix=
+            # Also publish the 7-char short SHA as a separate tag. The obol
+            # binary embeds version.GitCommit (short SHA) via ldflags and uses
+            # it through internal/images.Resolve to commit-pin the deployments
+            # it manages. Without this, binary upgrades wouldn't roll the pods.
+            type=sha,format=short,prefix=
             type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/feat/secure-enclave-inference' }}
           labels: |
             org.opencontainers.image.title=${{ matrix.component }}
diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go
index f51a57be..17546275 100644
--- a/cmd/obol/sell.go
+++ b/cmd/obol/sell.go
@@ -26,6 +26,7 @@ import (
 	"github.com/ObolNetwork/obol-stack/internal/enclave"
 	"github.com/ObolNetwork/obol-stack/internal/erc8004"
 	"github.com/ObolNetwork/obol-stack/internal/hermes"
+	"github.com/ObolNetwork/obol-stack/internal/images"
 	"github.com/ObolNetwork/obol-stack/internal/inference"
 	"github.com/ObolNetwork/obol-stack/internal/kubectl"
 	"github.com/ObolNetwork/obol-stack/internal/monetizeapi"
@@ -1270,7 +1271,7 @@ func buildDemoResources(name string, spec demoSpec, paymentChain string) []map[s
 						"containers": []map[string]any{
 							{
 								"name":            "demo",
-								"image":           "ghcr.io/obolnetwork/demo-server:latest",
+								"image":           images.Resolve("ghcr.io/obolnetwork/demo-server"),
 								"imagePullPolicy": "IfNotPresent",
 								"env":             env,
 								"ports": []map[string]any{
diff --git a/internal/images/images.go b/internal/images/images.go
new file mode 100644
index 00000000..1c596eed
--- /dev/null
+++ b/internal/images/images.go
@@ -0,0 +1,54 @@
+// Package images centralises the policy for selecting Docker image tags in
+// embedded Kubernetes manifests.
+//
+// The problem this solves: when the obol binary is upgraded, the K8s
+// Deployments it creates must trigger a rolling update so old pods are
+// replaced with ones running the new image. With ":latest" tags the embedded
+// manifest is byte-identical across binary versions, so kubectl apply reports
+// "unchanged" and stale pods keep serving the old image forever.
+//
+// The fix: production binaries are built with version.GitCommit injected via
+// ldflags, and CI publishes images tagged with the same short commit SHA.
+// Resolve uses that SHA so upgrading the binary changes the image:tag in
+// every manifest, which is what triggers K8s to roll the pod.
+//
+// Dev mode (OBOL_DEVELOPMENT=true), unset/unknown GitCommit, and dirty repos
+// fall back to ":latest" — that path matches the buildAndImportLocalImages
+// flow that imports freshly-built images into k3d as ":latest".
+package images
+
+import (
+	"os"
+	"strings"
+
+	"github.com/ObolNetwork/obol-stack/internal/version"
+)
+
+// Resolve returns the fully-qualified image reference for an image whose
+// repository part is `repo` (e.g. "ghcr.io/obolnetwork/demo-server").
+//
+//	images.Resolve("ghcr.io/obolnetwork/demo-server")
+//	// → "ghcr.io/obolnetwork/demo-server:abc1234"   (production)
+//	// → "ghcr.io/obolnetwork/demo-server:latest"    (dev / unknown commit)
+func Resolve(repo string) string {
+	if useLatest() {
+		return repo + ":latest"
+	}
+	return repo + ":" + version.GitCommit
+}
+
+// useLatest reports whether the current binary should reach for the mutable
+// :latest tag rather than a commit-pinned tag.
+func useLatest() bool {
+	if strings.EqualFold(strings.TrimSpace(os.Getenv("OBOL_DEVELOPMENT")), "true") {
+		return true
+	}
+	commit := strings.TrimSpace(version.GitCommit)
+	if commit == "" || commit == "unknown" || commit == "dev" {
+		return true
+	}
+	if strings.EqualFold(strings.TrimSpace(version.GitDirty), "true") {
+		return true
+	}
+	return false
+}
diff --git a/internal/images/images_test.go b/internal/images/images_test.go
new file mode 100644
index 00000000..f7629aaa
--- /dev/null
+++ b/internal/images/images_test.go
@@ -0,0 +1,102 @@
+package images
+
+import (
+	"testing"
+
+	"github.com/ObolNetwork/obol-stack/internal/version"
+)
+
+// withVersion temporarily overrides the version package globals for one test.
+// Restores them in t.Cleanup so subsequent tests see the package's natural
+// state (whatever ldflags or defaults left behind).
+func withVersion(t *testing.T, commit, dirty string) {
+	t.Helper()
+	prevCommit := version.GitCommit
+	prevDirty := version.GitDirty
+	version.GitCommit = commit
+	version.GitDirty = dirty
+	t.Cleanup(func() {
+		version.GitCommit = prevCommit
+		version.GitDirty = prevDirty
+	})
+}
+
+func TestResolve_DevModeForcesLatest(t *testing.T) {
+	// Even when GitCommit is set to a real SHA, OBOL_DEVELOPMENT=true must win.
+	// The local-build path imports images into k3d as :latest, so the manifest
+	// must reference :latest to actually pick up the local image.
+	withVersion(t, "abc1234", "false")
+	t.Setenv("OBOL_DEVELOPMENT", "true")
+
+	got := Resolve("ghcr.io/obolnetwork/demo-server")
+	want := "ghcr.io/obolnetwork/demo-server:latest"
+	if got != want {
+		t.Errorf("Resolve = %q, want %q", got, want)
+	}
+}
+
+func TestResolve_UnknownCommitFallsBackToLatest(t *testing.T) {
+	// Binaries built without ldflags leave GitCommit at "unknown". There's no
+	// matching CI image tag, so :latest is the only safe choice.
+	withVersion(t, "unknown", "false")
+	t.Setenv("OBOL_DEVELOPMENT", "")
+
+	got := Resolve("ghcr.io/obolnetwork/demo-server")
+	want := "ghcr.io/obolnetwork/demo-server:latest"
+	if got != want {
+		t.Errorf("Resolve = %q, want %q", got, want)
+	}
+}
+
+func TestResolve_DirtyRepoFallsBackToLatest(t *testing.T) {
+	// A dirty build has no published image — its commit doesn't match anything
+	// on GHCR. Use :latest rather than producing a tag that 404s.
+	withVersion(t, "abc1234", "true")
+	t.Setenv("OBOL_DEVELOPMENT", "")
+
+	got := Resolve("ghcr.io/obolnetwork/demo-server")
+	want := "ghcr.io/obolnetwork/demo-server:latest"
+	if got != want {
+		t.Errorf("Resolve = %q, want %q", got, want)
+	}
+}
+
+func TestResolve_ProductionUsesCommitPin(t *testing.T) {
+	// Released binary: GitCommit is the short SHA, repo is clean, not in dev
+	// mode. Result must be the commit-pinned tag — this is what makes binary
+	// upgrades roll the K8s pods automatically.
+	withVersion(t, "abc1234", "false")
+	t.Setenv("OBOL_DEVELOPMENT", "")
+
+	got := Resolve("ghcr.io/obolnetwork/demo-server")
+	want := "ghcr.io/obolnetwork/demo-server:abc1234"
+	if got != want {
+		t.Errorf("Resolve = %q, want %q", got, want)
+	}
+}
+
+func TestResolve_EmptyCommitFallsBackToLatest(t *testing.T) {
+	withVersion(t, "", "false")
+	t.Setenv("OBOL_DEVELOPMENT", "")
+
+	got := Resolve("ghcr.io/obolnetwork/storefront")
+	want := "ghcr.io/obolnetwork/storefront:latest"
+	if got != want {
+		t.Errorf("Resolve = %q, want %q", got, want)
+	}
+}
+
+func TestResolve_DevModeCaseInsensitive(t *testing.T) {
+	// People sometimes set OBOL_DEVELOPMENT=True or =TRUE. Don't penalise
+	// them — the env var is binary-true, not a string match.
+	withVersion(t, "abc1234", "false")
+
+	for _, val := range []string{"true", "TRUE", "True", "tRuE"} {
+		t.Run(val, func(t *testing.T) {
+			t.Setenv("OBOL_DEVELOPMENT", val)
+			if got := Resolve("img"); got != "img:latest" {
+				t.Errorf("OBOL_DEVELOPMENT=%q: Resolve = %q, want img:latest", val, got)
+			}
+		})
+	}
+}
diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go
index 76e83b53..ed56caf8 100644
--- a/internal/tunnel/tunnel.go
+++ b/internal/tunnel/tunnel.go
@@ -15,6 +15,7 @@ import (
 
 	"github.com/ObolNetwork/obol-stack/internal/agentruntime"
 	"github.com/ObolNetwork/obol-stack/internal/config"
+	"github.com/ObolNetwork/obol-stack/internal/images"
 	"github.com/ObolNetwork/obol-stack/internal/ui"
 )
 
@@ -532,7 +533,7 @@ func CreateStorefront(cfg *config.Config, tunnelURL string) error {
 						"containers": []map[string]any{
 								{
 									"name":            "storefront",
-									"image":           "ghcr.io/obolnetwork/obol-stack-public-storefront:latest",
+									"image":           images.Resolve("ghcr.io/obolnetwork/obol-stack-public-storefront"),
 									"imagePullPolicy": "IfNotPresent",
 									"ports": []map[string]any{
 										{"containerPort": 3000, "name": "http"},

From 543666c64243f97773ac0267c063b25fd9fc7235 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Sun, 3 May 2026 22:54:09 +0100
Subject: [PATCH 3/9] Lost conflicted fixes

---
 internal/hermes/hermes_test.go |  4 ++--
 internal/stack/stack.go        | 43 ++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/internal/hermes/hermes_test.go b/internal/hermes/hermes_test.go
index 734477a1..4e27c869 100644
--- a/internal/hermes/hermes_test.go
+++ b/internal/hermes/hermes_test.go
@@ -150,8 +150,8 @@ func TestGenerateValues_UsesHermesNativeNames(t *testing.T) {
 		`Timed out waiting for Hermes install lock`,
 		`git clone --depth 1 "$repo_url" "${install_dir}.tmp"`,
 		"uv venv --python python3 --system-site-packages venv",
-		`uv pip install -e ".[web]"`,
-		`import fastapi, uvicorn`,
+		`uv pip install -e ".[web,messaging,mcp,pty,cli,acp,google]"`,
+		`import fastapi, uvicorn, telegram, mcp, ptyprocess, simple_term_menu, googleapiclient`,
 		`PRAGMA quick_check`,
 		`state-db-corrupt-$ts`,
 		`- "/data/.hermes/hermes-agent/venv/bin/hermes"`,
diff --git a/internal/stack/stack.go b/internal/stack/stack.go
index 90700775..b6704e61 100644
--- a/internal/stack/stack.go
+++ b/internal/stack/stack.go
@@ -362,6 +362,18 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s
 		u.Warnf("Failed to preserve LiteLLM config across Helm sync: %v", err)
 	}
 
+	// Release runtime field ownership of litellm-config.data.config.yaml so the
+	// upcoming helm upgrade can reclaim it without an SSA conflict. Without
+	// this step, the second `obol stack up` after autoConfigureLLM/restore has
+	// claimed the field via SSA (manager=helm, op=Apply) fails with
+	// "conflict with helm using v1: .data.config.yaml" because helm registers
+	// a separate managedFields entry (manager=helm, op=Update) for the same
+	// field. The data is already snapshotted in previousLiteLLMConfig and gets
+	// re-applied by restoreLiteLLMConfig after helm runs.
+	if err := releaseLiteLLMConfigOwnership(cfg, kubeconfigPath); err != nil {
+		u.Warnf("Failed to release LiteLLM config field ownership: %v", err)
+	}
+
 	// Compatibility migration
 	if err := migrateDefaultsHTTPRouteHostnames(helmfilePath); err != nil {
 		u.Warnf("Failed to migrate defaults helmfile hostnames: %v", err)
@@ -907,6 +919,37 @@ func preserveLiteLLMConfigForHelm(cfg *config.Config, kubeconfigPath string) (st
 	return raw, nil
 }
 
+// releaseLiteLLMConfigOwnership strips managedFields from the litellm-config
+// ConfigMap so the next helm upgrade can claim ownership of every field
+// without an SSA conflict. Helm tracks release ownership via the
+// meta.helm.sh/release-name annotation, not managedFields, so clearing
+// managedFields does not detach the resource from its release.
+//
+// The single empty entry [{}] is the documented apiserver idiom for clearing
+// all field-ownership claims on a resource. See:
+// https://kubernetes.io/docs/reference/using-api/server-side-apply/#clearing-managedfields
+func releaseLiteLLMConfigOwnership(cfg *config.Config, kubeconfigPath string) error {
+	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
+
+	// Skip if the configmap doesn't exist (first install).
+	if _, err := kubectl.Output(kubectlBinary, kubeconfigPath,
+		"get", "configmap", "litellm-config", "-n", "llm", "-o", "name"); err != nil {
+		return nil
+	}
+
+	cmd := exec.Command(kubectlBinary,
+		"patch", "configmap", "litellm-config",
+		"-n", "llm",
+		"--type=merge",
+		"--patch", `{"metadata":{"managedFields":[{}]}}`,
+	)
+	cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath)
+	if out, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("kubectl patch managedFields: %w\n%s", err, string(out))
+	}
+	return nil
+}
+
 func restoreLiteLLMConfig(cfg *config.Config, kubeconfigPath, raw string) error {
 	if strings.TrimSpace(raw) == "" {
 		return nil

From 2df7f80490e76012f2df948edfbb7a5e9d91ce28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Mon, 4 May 2026 00:19:51 +0100
Subject: [PATCH 4/9] Make wallet backup work for hermes too

---
 cmd/obol/agent.go                          |  48 ++--
 internal/hermes/wallet_backup.go           | 242 +++++++++++++++++
 internal/openclaw/wallet_backup.go         | 296 ++++-----------------
 internal/walletbackup/walletbackup.go      | 246 +++++++++++++++++
 internal/walletbackup/walletbackup_test.go |  99 +++++++
 5 files changed, 666 insertions(+), 265 deletions(-)
 create mode 100644 internal/hermes/wallet_backup.go
 create mode 100644 internal/walletbackup/walletbackup.go
 create mode 100644 internal/walletbackup/walletbackup_test.go

diff --git a/cmd/obol/agent.go b/cmd/obol/agent.go
index eb687258..123fbfbe 100644
--- a/cmd/obol/agent.go
+++ b/cmd/obol/agent.go
@@ -227,14 +227,22 @@ func agentWalletCommand(cfg *config.Config) *cli.Command {
 					if err != nil {
 						return err
 					}
-					if target.Runtime != agentruntime.OpenClaw {
-						return errors.New("Hermes wallet backup needs a Hermes-native product decision; use OpenClaw backup only for OpenClaw instances")
+					switch target.Runtime {
+					case agentruntime.Hermes:
+						return hermes.BackupWalletCmd(cfg, target.ID, hermes.BackupWalletOptions{
+							Output:      cmd.String("output"),
+							Passphrase:  cmd.String("passphrase"),
+							HasPassFlag: cmd.IsSet("passphrase"),
+						}, getUI(cmd))
+					case agentruntime.OpenClaw:
+						return openclaw.BackupWalletCmd(cfg, target.ID, openclaw.BackupWalletOptions{
+							Output:      cmd.String("output"),
+							Passphrase:  cmd.String("passphrase"),
+							HasPassFlag: cmd.IsSet("passphrase"),
+						}, getUI(cmd))
+					default:
+						return fmt.Errorf("unsupported runtime %q", target.Runtime)
 					}
-					return openclaw.BackupWalletCmd(cfg, target.ID, openclaw.BackupWalletOptions{
-						Output:      cmd.String("output"),
-						Passphrase:  cmd.String("passphrase"),
-						HasPassFlag: cmd.IsSet("passphrase"),
-					}, getUI(cmd))
 				},
 			},
 			{
@@ -266,15 +274,25 @@ func agentWalletCommand(cfg *config.Config) *cli.Command {
 					if err != nil {
 						return err
 					}
-					if target.Runtime != agentruntime.OpenClaw {
-						return errors.New("Hermes wallet restore needs a Hermes-native product decision; use OpenClaw restore only for OpenClaw instances")
+					switch target.Runtime {
+					case agentruntime.Hermes:
+						return hermes.RestoreWalletCmd(cfg, target.ID, hermes.RestoreWalletOptions{
+							Input:        cmd.String("input"),
+							Passphrase:   cmd.String("passphrase"),
+							HasPassFlag:  cmd.IsSet("passphrase"),
+							Force:        cmd.Bool("force"),
+							ApplyCluster: true,
+						}, getUI(cmd))
+					case agentruntime.OpenClaw:
+						return openclaw.RestoreWalletCmd(cfg, target.ID, openclaw.RestoreWalletOptions{
+							Input:       cmd.String("input"),
+							Passphrase:  cmd.String("passphrase"),
+							HasPassFlag: cmd.IsSet("passphrase"),
+							Force:       cmd.Bool("force"),
+						}, getUI(cmd))
+					default:
+						return fmt.Errorf("unsupported runtime %q", target.Runtime)
 					}
-					return openclaw.RestoreWalletCmd(cfg, target.ID, openclaw.RestoreWalletOptions{
-						Input:       cmd.String("input"),
-						Passphrase:  cmd.String("passphrase"),
-						HasPassFlag: cmd.IsSet("passphrase"),
-						Force:       cmd.Bool("force"),
-					}, getUI(cmd))
 				},
 			},
 		},
diff --git a/internal/hermes/wallet_backup.go b/internal/hermes/wallet_backup.go
new file mode 100644
index 00000000..99ad3f0a
--- /dev/null
+++ b/internal/hermes/wallet_backup.go
@@ -0,0 +1,242 @@
+package hermes
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/ObolNetwork/obol-stack/internal/agentruntime"
+	"github.com/ObolNetwork/obol-stack/internal/config"
+	"github.com/ObolNetwork/obol-stack/internal/kubectl"
+	"github.com/ObolNetwork/obol-stack/internal/ui"
+	"github.com/ObolNetwork/obol-stack/internal/walletbackup"
+)
+
+// BackupWalletOptions holds options for `obol agent wallet backup`.
+type BackupWalletOptions struct {
+	Output      string
+	Passphrase  string
+	HasPassFlag bool
+}
+
+// RestoreWalletOptions holds options for `obol agent wallet restore`.
+type RestoreWalletOptions struct {
+	Input        string
+	Passphrase   string
+	HasPassFlag  bool
+	Force        bool
+	ApplyCluster bool
+}
+
+// BackupWalletCmd creates a backup of the Hermes instance's remote-signer
+// wallet. The on-disk format is identical to OpenClaw's, so a Hermes backup
+// can be restored into an OpenClaw instance and vice versa — instance
+// names and namespace scoping are not part of the backup payload.
+func BackupWalletCmd(cfg *config.Config, id string, opts BackupWalletOptions, u *ui.UI) error {
+	deployDir := DeploymentPath(cfg, id)
+
+	wallet, err := ReadWalletMetadata(deployDir)
+	if err != nil {
+		return fmt.Errorf("no wallet found for instance %q: %w", id, err)
+	}
+
+	keystorePath := filepath.Join(agentruntime.KeystoreVolumePath(cfg, agentruntime.Hermes, id), wallet.KeystoreUUID+".json")
+	keystoreData, err := os.ReadFile(keystorePath)
+	if err != nil {
+		return fmt.Errorf("failed to read keystore file: %w", err)
+	}
+
+	password, err := walletbackup.ReadKeystorePassword(deployDir)
+	if err != nil {
+		return fmt.Errorf("failed to read keystore password: %w", err)
+	}
+
+	backup := &walletbackup.File{
+		Version:  walletbackup.Version,
+		Instance: id,
+		Wallets: []walletbackup.Wallet{{
+			Address:          wallet.Address,
+			PublicKey:        wallet.PublicKey,
+			KeystoreUUID:     wallet.KeystoreUUID,
+			CreatedAt:        wallet.CreatedAt,
+			Keystore:         json.RawMessage(keystoreData),
+			KeystorePassword: password,
+		}},
+	}
+
+	passphrase, err := walletbackup.PromptPassphrase(opts.Passphrase, opts.HasPassFlag, u)
+	if err != nil {
+		return err
+	}
+
+	payload, encrypted, err := walletbackup.Encode(backup, passphrase)
+	if err != nil {
+		return err
+	}
+
+	addrSuffix := wallet.Address
+	if len(addrSuffix) > 8 {
+		addrSuffix = addrSuffix[len(addrSuffix)-8:]
+	}
+	outputPath := opts.Output
+	if outputPath == "" {
+		ext := "json"
+		if encrypted {
+			ext = "enc"
+		}
+		outputPath = fmt.Sprintf("obol-wallet-backup-%s.%s", addrSuffix, ext)
+	}
+
+	if err := os.WriteFile(outputPath, payload, 0o600); err != nil {
+		return fmt.Errorf("failed to write backup: %w", err)
+	}
+
+	u.Success("Wallet backup created")
+	u.Detail("Address", wallet.Address)
+	u.Detail("Output", outputPath)
+	if encrypted {
+		u.Detail("Encrypted", "yes (AES-256-GCM)")
+	} else {
+		u.Detail("Encrypted", "no")
+		u.Warn("Backup contains unencrypted keystore password — store securely")
+	}
+	return nil
+}
+
+// RestoreWalletCmd restores a Hermes wallet from a backup file. Mirrors
+// openclaw.RestoreWalletCmd, sharing the wire format via walletbackup.
+func RestoreWalletCmd(cfg *config.Config, id string, opts RestoreWalletOptions, u *ui.UI) error {
+	raw, err := os.ReadFile(opts.Input)
+	if err != nil {
+		return fmt.Errorf("failed to read backup file: %w", err)
+	}
+
+	passphrase := opts.Passphrase
+	if walletbackup.IsEncrypted(raw) && !opts.HasPassFlag {
+		passphrase, err = u.SecretInput("Backup passphrase")
+		if err != nil {
+			return fmt.Errorf("failed to read passphrase: %w", err)
+		}
+	}
+
+	backup, err := walletbackup.Decode(raw, passphrase)
+	if err != nil {
+		return err
+	}
+
+	w := backup.Wallets[0]
+
+	deployDir := DeploymentPath(cfg, id)
+	if _, err := os.Stat(deployDir); os.IsNotExist(err) {
+		return fmt.Errorf("instance %q not found — run 'obol agent new --runtime hermes --id %s' first", id, id)
+	}
+
+	existingWallet, _ := ReadWalletMetadata(deployDir)
+	if existingWallet != nil && !opts.Force {
+		return fmt.Errorf("instance %q already has a wallet (address: %s)\nUse --force to overwrite", id, existingWallet.Address)
+	}
+
+	keystoreDir := agentruntime.KeystoreVolumePath(cfg, agentruntime.Hermes, id)
+	ensureVolumeWritableFn(cfg, keystoreDir, u)
+	if err := os.MkdirAll(keystoreDir, 0o700); err != nil {
+		return fmt.Errorf("failed to create keystore directory: %w", err)
+	}
+
+	keystorePath := filepath.Join(keystoreDir, w.KeystoreUUID+".json")
+	if err := os.WriteFile(keystorePath, []byte(w.Keystore), 0o600); err != nil {
+		return fmt.Errorf("failed to write keystore: %w", err)
+	}
+	fixRuntimeVolumeOwnershipFn(cfg, keystoreDir, u)
+
+	walletInfo := &WalletInfo{
+		Address:      w.Address,
+		PublicKey:    w.PublicKey,
+		KeystoreUUID: w.KeystoreUUID,
+		KeystorePath: keystorePath,
+		CreatedAt:    w.CreatedAt,
+		Password:     w.KeystorePassword,
+	}
+
+	rsValues := generateRemoteSignerValues(walletInfo)
+	if err := walletbackup.WriteValuesRemoteSigner(deployDir, rsValues); err != nil {
+		return fmt.Errorf("failed to write values-remote-signer.yaml: %w", err)
+	}
+	if err := WriteWalletMetadata(deployDir, walletInfo); err != nil {
+		return fmt.Errorf("failed to write wallet metadata: %w", err)
+	}
+	if err := archiveReplacedHermesKeystore(cfg, id, existingWallet, w.KeystoreUUID, u); err != nil {
+		return fmt.Errorf("failed to archive replaced keystore: %w", err)
+	}
+
+	if opts.ApplyCluster {
+		applyHermesKeystorePasswordSecret(cfg, id, w.KeystorePassword, u)
+		restartHermesRemoteSignerFn(cfg, id, u)
+	}
+
+	u.Success("Wallet restored")
+	u.Detail("Address", w.Address)
+	u.Detail("Instance", id)
+	return nil
+}
+
+// FindInstancesWithWallets returns Hermes instance IDs that have wallet
+// metadata on disk. Used by purge prompts.
+func FindInstancesWithWallets(cfg *config.Config) []string {
+	ids, err := agentruntime.ListInstanceIDs(cfg, agentruntime.Hermes)
+	if err != nil {
+		return nil
+	}
+	var out []string
+	for _, id := range ids {
+		if _, err := ReadWalletMetadata(DeploymentPath(cfg, id)); err == nil {
+			out = append(out, id)
+		}
+	}
+	return out
+}
+
+// applyHermesKeystorePasswordSecret applies the remote-signer keystore
+// password Secret in the instance namespace. Best-effort; if the cluster is
+// down the caller is expected to re-sync later.
+func applyHermesKeystorePasswordSecret(cfg *config.Config, id, password string, u *ui.UI) {
+	if password == "" {
+		return
+	}
+	namespace := agentruntime.Namespace(agentruntime.Hermes, id)
+	manifest := map[string]any{
+		"apiVersion": "v1",
+		"kind":       "Secret",
+		"metadata": map[string]any{
+			"name":      "remote-signer-keystore-password",
+			"namespace": namespace,
+			"labels": map[string]string{
+				"app.kubernetes.io/component":  "remote-signer",
+				"app.kubernetes.io/managed-by": "obol",
+			},
+		},
+		"type": "Opaque",
+		"stringData": map[string]string{
+			"password": password,
+		},
+	}
+	raw, err := json.Marshal(manifest)
+	if err != nil {
+		u.Warnf("Could not marshal remote-signer password Secret: %v", err)
+		return
+	}
+
+	kubectlBin, kubeconfig := kubectl.Paths(cfg)
+	cmd := exec.Command(kubectlBin, "apply", "-f", "-")
+	cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfig)
+	cmd.Stdin = bytes.NewReader(raw)
+	var stderr bytes.Buffer
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		u.Blank()
+		u.Warnf("Could not update remote-signer password Secret (cluster may not be running)")
+		u.Printf("Run 'obol agent sync %s' to apply changes to the cluster.", id)
+	}
+}
diff --git a/internal/openclaw/wallet_backup.go b/internal/openclaw/wallet_backup.go
index 2412724b..6975c8f0 100644
--- a/internal/openclaw/wallet_backup.go
+++ b/internal/openclaw/wallet_backup.go
@@ -2,9 +2,6 @@ package openclaw
 
 import (
 	"bytes"
-	"crypto/aes"
-	"crypto/cipher"
-	"crypto/rand"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -17,31 +14,16 @@ import (
 	"github.com/ObolNetwork/obol-stack/internal/config"
 	"github.com/ObolNetwork/obol-stack/internal/kubectl"
 	"github.com/ObolNetwork/obol-stack/internal/ui"
-	"golang.org/x/crypto/scrypt"
-	"gopkg.in/yaml.v3"
+	"github.com/ObolNetwork/obol-stack/internal/walletbackup"
 )
 
-// backupMagic is the first 4 bytes of an encrypted backup file.
-var backupMagic = []byte("OBOL")
+// BackupFile is re-exported from walletbackup for backwards-compatibility
+// with existing OpenClaw callers. Both runtimes share the same on-disk shape.
+type BackupFile = walletbackup.File
 
-const backupVersion byte = 1
-
-// BackupFile is the JSON structure of a wallet backup.
-type BackupFile struct {
-	Version  int            `json:"version"`
-	Instance string         `json:"instance"`
-	Wallets  []BackupWallet `json:"wallets"`
-}
-
-// BackupWallet holds a single wallet's backup data.
-type BackupWallet struct {
-	Address          string          `json:"address"`
-	PublicKey        string          `json:"publicKey"`
-	KeystoreUUID     string          `json:"keystoreUUID"`
-	CreatedAt        string          `json:"createdAt"`
-	Keystore         json.RawMessage `json:"keystore"`
-	KeystorePassword string          `json:"keystorePassword"`
-}
+// BackupWallet is re-exported from walletbackup so the OpenClaw subcommand
+// surface stays unchanged.
+type BackupWallet = walletbackup.Wallet
 
 // BackupWalletOptions holds options for the backup command.
 type BackupWalletOptions struct {
@@ -90,76 +72,55 @@ func BackupWalletCmd(cfg *config.Config, id string, opts BackupWalletOptions, u
 		return fmt.Errorf("failed to read keystore password: %w", err)
 	}
 
-	// Build backup structure.
-	backup := BackupFile{
-		Version:  1,
+	backup := &walletbackup.File{
+		Version:  walletbackup.Version,
 		Instance: id,
-		Wallets: []BackupWallet{
-			{
-				Address:          wallet.Address,
-				PublicKey:        wallet.PublicKey,
-				KeystoreUUID:     wallet.KeystoreUUID,
-				CreatedAt:        wallet.CreatedAt,
-				Keystore:         json.RawMessage(keystoreData),
-				KeystorePassword: password,
-			},
-		},
+		Wallets: []walletbackup.Wallet{{
+			Address:          wallet.Address,
+			PublicKey:        wallet.PublicKey,
+			KeystoreUUID:     wallet.KeystoreUUID,
+			CreatedAt:        wallet.CreatedAt,
+			Keystore:         json.RawMessage(keystoreData),
+			KeystorePassword: password,
+		}},
 	}
 
-	backupJSON, err := json.MarshalIndent(backup, "", "  ")
+	passphrase, err := walletbackup.PromptPassphrase(opts.Passphrase, opts.HasPassFlag, u)
 	if err != nil {
-		return fmt.Errorf("failed to marshal backup: %w", err)
+		return err
 	}
 
-	// Determine passphrase.
-	passphrase, err := resolvePassphrase(opts.Passphrase, opts.HasPassFlag, u)
+	payload, encrypted, err := walletbackup.Encode(backup, passphrase)
 	if err != nil {
 		return err
 	}
 
-	// Determine output path and write.
 	addrSuffix := wallet.Address
 	if len(addrSuffix) > 8 {
 		addrSuffix = addrSuffix[len(addrSuffix)-8:]
 	}
-
 	outputPath := opts.Output
-	encrypted := passphrase != ""
-
 	if outputPath == "" {
+		ext := "json"
 		if encrypted {
-			outputPath = fmt.Sprintf("obol-wallet-backup-%s.enc", addrSuffix)
-		} else {
-			outputPath = fmt.Sprintf("obol-wallet-backup-%s.json", addrSuffix)
+			ext = "enc"
 		}
+		outputPath = fmt.Sprintf("obol-wallet-backup-%s.%s", addrSuffix, ext)
 	}
 
-	if encrypted {
-		ciphertext, err := encryptBackup(backupJSON, passphrase)
-		if err != nil {
-			return fmt.Errorf("encryption failed: %w", err)
-		}
-
-		if err := os.WriteFile(outputPath, ciphertext, 0o600); err != nil {
-			return fmt.Errorf("failed to write backup: %w", err)
-		}
-	} else {
-		if err := os.WriteFile(outputPath, backupJSON, 0o600); err != nil {
-			return fmt.Errorf("failed to write backup: %w", err)
-		}
+	if err := os.WriteFile(outputPath, payload, 0o600); err != nil {
+		return fmt.Errorf("failed to write backup: %w", err)
 	}
 
 	u.Success("Wallet backup created")
 	u.Detail("Address", wallet.Address)
 	u.Detail("Output", outputPath)
-
 	if encrypted {
 		u.Detail("Encrypted", "yes (AES-256-GCM)")
 	} else {
 		u.Detail("Encrypted", "no")
 		u.Warn("Backup contains unencrypted keystore password — store securely")
 	}
-
 	return nil
 }
 
@@ -204,53 +165,26 @@ func ImportPrivateKeyWalletCmd(cfg *config.Config, id string, opts ImportPrivate
 
 // RestoreWalletCmd restores a wallet from a backup file.
 func RestoreWalletCmd(cfg *config.Config, id string, opts RestoreWalletOptions, u *ui.UI) error {
-	// Read backup file.
 	raw, err := os.ReadFile(opts.Input)
 	if err != nil {
 		return fmt.Errorf("failed to read backup file: %w", err)
 	}
 
-	// Detect format and decrypt if needed.
-	var backupJSON []byte
-
-	if isEncryptedBackup(raw) {
-		passphrase := opts.Passphrase
-		if !opts.HasPassFlag {
-			passphrase, err = u.SecretInput("Backup passphrase")
-			if err != nil {
-				return fmt.Errorf("failed to read passphrase: %w", err)
-			}
-		}
-
-		if passphrase == "" {
-			return errors.New("passphrase required for encrypted backup")
-		}
-
-		backupJSON, err = decryptBackup(raw, passphrase)
+	passphrase := opts.Passphrase
+	if walletbackup.IsEncrypted(raw) && !opts.HasPassFlag {
+		passphrase, err = u.SecretInput("Backup passphrase")
 		if err != nil {
-			return fmt.Errorf("decryption failed (wrong passphrase?): %w", err)
+			return fmt.Errorf("failed to read passphrase: %w", err)
 		}
-	} else {
-		backupJSON = raw
 	}
 
-	// Parse backup.
-	var backup BackupFile
-	if err := json.Unmarshal(backupJSON, &backup); err != nil {
-		return fmt.Errorf("invalid backup file: %w", err)
-	}
-
-	if backup.Version != 1 {
-		return fmt.Errorf("unsupported backup version %d (expected 1)", backup.Version)
-	}
-
-	if len(backup.Wallets) == 0 {
-		return errors.New("backup contains no wallets")
+	backup, err := walletbackup.Decode(raw, passphrase)
+	if err != nil {
+		return err
 	}
 
 	w := backup.Wallets[0]
 
-	// Verify deployment dir exists.
 	deployDir := DeploymentPath(cfg, id)
 	if _, err := os.Stat(deployDir); os.IsNotExist(err) {
 		return fmt.Errorf("instance %q not found — run 'obol openclaw onboard --id %s' first", id, id)
@@ -477,172 +411,34 @@ func keystorePasswordSecretManifest(id, password string) ([]byte, error) {
 	return json.Marshal(manifest)
 }
 
-// resolvePassphrase determines the passphrase via flag or interactive prompt.
+// resolvePassphrase delegates to walletbackup.PromptPassphrase. Kept as a
+// package-private wrapper so existing OpenClaw call sites stay compact.
 func resolvePassphrase(flagValue string, hasFlag bool, u *ui.UI) (string, error) {
-	if hasFlag {
-		return flagValue, nil
-	}
-
-	passphrase, err := u.SecretInput("Backup passphrase (empty for no encryption)")
-	if err != nil {
-		return "", fmt.Errorf("failed to read passphrase: %w", err)
-	}
-
-	if passphrase != "" {
-		confirm, err := u.SecretInput("Confirm passphrase")
-		if err != nil {
-			return "", fmt.Errorf("failed to read confirmation: %w", err)
-		}
-
-		if passphrase != confirm {
-			return "", errors.New("passphrases do not match")
-		}
-	}
-
-	return passphrase, nil
+	return walletbackup.PromptPassphrase(flagValue, hasFlag, u)
 }
 
-// readKeystorePassword extracts the keystore password from values-remote-signer.yaml.
+// readKeystorePassword delegates to walletbackup.ReadKeystorePassword.
 func readKeystorePassword(deployDir string) (string, error) {
-	data, err := os.ReadFile(filepath.Join(deployDir, "values-remote-signer.yaml"))
-	if err != nil {
-		return "", err
-	}
-
-	var values struct {
-		KeystorePassword struct {
-			Value string `yaml:"value"`
-		} `yaml:"keystorePassword"`
-	}
-	if err := yaml.Unmarshal(data, &values); err != nil {
-		return "", fmt.Errorf("failed to parse values-remote-signer.yaml: %w", err)
-	}
-
-	if values.KeystorePassword.Value == "" {
-		return "", errors.New("keystorePassword.value not found in values-remote-signer.yaml")
-	}
-
-	return values.KeystorePassword.Value, nil
+	return walletbackup.ReadKeystorePassword(deployDir)
 }
 
-// writeKeystorePassword writes the remote-signer values YAML with the given password.
+// writeKeystorePassword renders the remote-signer values YAML for the given
+// password and writes it under deployDir.
 func writeKeystorePassword(deployDir, password string) error {
 	content := generateRemoteSignerValues(&WalletInfo{Password: password})
-	return os.WriteFile(filepath.Join(deployDir, "values-remote-signer.yaml"), []byte(content), 0o600)
+	return walletbackup.WriteValuesRemoteSigner(deployDir, content)
 }
 
-// encryptBackup encrypts plaintext using AES-256-GCM with a scrypt-derived key.
-// Format: magic(4) || version(1) || salt(32) || nonce(12) || ciphertext+tag
-func encryptBackup(plaintext []byte, passphrase string) ([]byte, error) {
-	salt := make([]byte, 32)
-	if _, err := rand.Read(salt); err != nil {
-		return nil, fmt.Errorf("salt generation: %w", err)
-	}
-
-	key, err := scrypt.Key([]byte(passphrase), salt, scryptN, scryptR, scryptP, scryptDKLen)
-	if err != nil {
-		return nil, fmt.Errorf("scrypt key derivation: %w", err)
-	}
-
-	block, err := aes.NewCipher(key)
-	if err != nil {
-		return nil, fmt.Errorf("aes cipher: %w", err)
-	}
-
-	gcm, err := cipher.NewGCM(block)
-	if err != nil {
-		return nil, fmt.Errorf("gcm: %w", err)
-	}
-
-	nonce := make([]byte, gcm.NonceSize())
-	if _, err := rand.Read(nonce); err != nil {
-		return nil, fmt.Errorf("nonce generation: %w", err)
-	}
-
-	ciphertext := gcm.Seal(nil, nonce, plaintext, nil)
+// Compat shims for tests in this package that exercise the crypto envelope
+// directly. New code should call walletbackup.Encrypt/Decrypt/IsEncrypted.
+func isEncryptedBackup(data []byte) bool { return walletbackup.IsEncrypted(data) }
 
-	// Assemble: magic || version || salt || nonce || ciphertext
-	result := make([]byte, 0, len(backupMagic)+1+len(salt)+len(nonce)+len(ciphertext))
-	result = append(result, backupMagic...)
-	result = append(result, backupVersion)
-	result = append(result, salt...)
-	result = append(result, nonce...)
-	result = append(result, ciphertext...)
-
-	return result, nil
+func encryptBackup(plaintext []byte, passphrase string) ([]byte, error) {
+	return walletbackup.Encrypt(plaintext, passphrase)
 }
 
-// decryptBackup decrypts an encrypted backup file.
 func decryptBackup(data []byte, passphrase string) ([]byte, error) {
-	minLen := len(backupMagic) + 1 + 32 + 12 // magic + version + salt + nonce
-	if len(data) < minLen {
-		return nil, errors.New("encrypted file too short")
-	}
-
-	offset := 0
-
-	// Verify magic.
-	if string(data[offset:offset+len(backupMagic)]) != string(backupMagic) {
-		return nil, errors.New("not an encrypted backup file")
-	}
-
-	offset += len(backupMagic)
-
-	// Check version.
-	version := data[offset]
-	offset++
-
-	if version != backupVersion {
-		return nil, fmt.Errorf("unsupported encryption version %d", version)
-	}
-
-	// Extract salt.
-	salt := data[offset : offset+32]
-	offset += 32
-
-	// Derive key.
-	key, err := scrypt.Key([]byte(passphrase), salt, scryptN, scryptR, scryptP, scryptDKLen)
-	if err != nil {
-		return nil, fmt.Errorf("scrypt key derivation: %w", err)
-	}
-
-	block, err := aes.NewCipher(key)
-	if err != nil {
-		return nil, fmt.Errorf("aes cipher: %w", err)
-	}
-
-	gcm, err := cipher.NewGCM(block)
-	if err != nil {
-		return nil, fmt.Errorf("gcm: %w", err)
-	}
-
-	// Extract nonce.
-	nonceSize := gcm.NonceSize()
-	if len(data) < offset+nonceSize {
-		return nil, errors.New("encrypted file too short for nonce")
-	}
-
-	nonce := data[offset : offset+nonceSize]
-	offset += nonceSize
-
-	// Decrypt.
-	ciphertext := data[offset:]
-
-	plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
-	if err != nil {
-		return nil, fmt.Errorf("decryption failed: %w", err)
-	}
-
-	return plaintext, nil
-}
-
-// isEncryptedBackup checks if data starts with the OBOL magic bytes.
-func isEncryptedBackup(data []byte) bool {
-	if len(data) < len(backupMagic) {
-		return false
-	}
-
-	return string(data[:len(backupMagic)]) == string(backupMagic)
+	return walletbackup.Decrypt(data, passphrase)
 }
 
 // walletAddressesForPurgeWarning returns addresses of wallets that would be lost.
diff --git a/internal/walletbackup/walletbackup.go b/internal/walletbackup/walletbackup.go
new file mode 100644
index 00000000..c527ee2b
--- /dev/null
+++ b/internal/walletbackup/walletbackup.go
@@ -0,0 +1,246 @@
+// Package walletbackup is the runtime-agnostic core of `obol agent wallet
+// backup` / `restore`. It owns the on-disk backup wire format, the
+// AES-256-GCM encryption envelope, and the helpers that read/write the
+// keystore password from values-remote-signer.yaml. Per-runtime callers
+// (internal/openclaw, internal/hermes) compose these primitives with their
+// own deployDir/keystoreDir conventions and namespace-specific cluster apply
+// steps. The on-disk format must round-trip across runtimes, so a backup
+// taken from an OpenClaw instance can restore into a Hermes one and vice
+// versa.
+package walletbackup
+
+import (
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/rand"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/ObolNetwork/obol-stack/internal/ui"
+	"golang.org/x/crypto/scrypt"
+	"gopkg.in/yaml.v3"
+)
+
+// Magic is the 4-byte prefix of an encrypted backup file.
+var Magic = []byte("OBOL")
+
+// Version is the current backup-format version. Bumping requires a parallel
+// bump in Decode's accepted-versions check.
+const Version = 1
+
+const (
+	scryptN     = 262144
+	scryptR     = 8
+	scryptP     = 1
+	scryptDKLen = 32
+)
+
+// File is the JSON shape of a wallet backup. One backup may carry multiple
+// wallets; today both runtimes write a single-wallet file.
+type File struct {
+	Version  int      `json:"version"`
+	Instance string   `json:"instance"`
+	Wallets  []Wallet `json:"wallets"`
+}
+
+// Wallet holds a single wallet's backup data — enough to restore both the
+// keystore JSON on disk and the keystore password the remote-signer needs.
+type Wallet struct {
+	Address          string          `json:"address"`
+	PublicKey        string          `json:"publicKey"`
+	KeystoreUUID     string          `json:"keystoreUUID"`
+	CreatedAt        string          `json:"createdAt"`
+	Keystore         json.RawMessage `json:"keystore"`
+	KeystorePassword string          `json:"keystorePassword"`
+}
+
+// Encode marshals a backup to bytes. If passphrase is non-empty, it returns
+// an encrypted blob; otherwise it returns the indented JSON. The second
+// return value reports which form was emitted.
+func Encode(f *File, passphrase string) ([]byte, bool, error) {
+	plain, err := json.MarshalIndent(f, "", "  ")
+	if err != nil {
+		return nil, false, fmt.Errorf("marshal backup: %w", err)
+	}
+	if passphrase == "" {
+		return plain, false, nil
+	}
+	enc, err := Encrypt(plain, passphrase)
+	if err != nil {
+		return nil, false, err
+	}
+	return enc, true, nil
+}
+
+// Decode parses raw bytes into a File. If the input starts with the OBOL
+// magic, passphrase is required; otherwise it must be empty (or is ignored).
+func Decode(data []byte, passphrase string) (*File, error) {
+	var plain []byte
+	if IsEncrypted(data) {
+		if passphrase == "" {
+			return nil, errors.New("passphrase required for encrypted backup")
+		}
+		dec, err := Decrypt(data, passphrase)
+		if err != nil {
+			return nil, fmt.Errorf("decryption failed (wrong passphrase?): %w", err)
+		}
+		plain = dec
+	} else {
+		plain = data
+	}
+
+	var f File
+	if err := json.Unmarshal(plain, &f); err != nil {
+		return nil, fmt.Errorf("invalid backup file: %w", err)
+	}
+	if f.Version != Version {
+		return nil, fmt.Errorf("unsupported backup version %d (expected %d)", f.Version, Version)
+	}
+	if len(f.Wallets) == 0 {
+		return nil, errors.New("backup contains no wallets")
+	}
+	return &f, nil
+}
+
+// IsEncrypted reports whether data carries the OBOL magic prefix.
+func IsEncrypted(data []byte) bool {
+	if len(data) < len(Magic) {
+		return false
+	}
+	return string(data[:len(Magic)]) == string(Magic)
+}
+
+// PromptPassphrase resolves a passphrase for backup. If the caller already
+// passed --passphrase explicitly, hasFlag=true short-circuits the prompt
+// (even when flagValue is the empty string, which means "no encryption").
+func PromptPassphrase(flagValue string, hasFlag bool, u *ui.UI) (string, error) {
+	if hasFlag {
+		return flagValue, nil
+	}
+	pass, err := u.SecretInput("Backup passphrase (empty for no encryption)")
+	if err != nil {
+		return "", fmt.Errorf("failed to read passphrase: %w", err)
+	}
+	if pass == "" {
+		return "", nil
+	}
+	confirm, err := u.SecretInput("Confirm passphrase")
+	if err != nil {
+		return "", fmt.Errorf("failed to read confirmation: %w", err)
+	}
+	if pass != confirm {
+		return "", errors.New("passphrases do not match")
+	}
+	return pass, nil
+}
+
+// ReadKeystorePassword extracts keystorePassword.value from
+// values-remote-signer.yaml under deployDir. Both Hermes and OpenClaw write
+// the same shape, generated by their respective generateRemoteSignerValues.
+func ReadKeystorePassword(deployDir string) (string, error) {
+	data, err := os.ReadFile(filepath.Join(deployDir, "values-remote-signer.yaml"))
+	if err != nil {
+		return "", err
+	}
+	var values struct {
+		KeystorePassword struct {
+			Value string `yaml:"value"`
+		} `yaml:"keystorePassword"`
+	}
+	if err := yaml.Unmarshal(data, &values); err != nil {
+		return "", fmt.Errorf("failed to parse values-remote-signer.yaml: %w", err)
+	}
+	if values.KeystorePassword.Value == "" {
+		return "", errors.New("keystorePassword.value not found in values-remote-signer.yaml")
+	}
+	return values.KeystorePassword.Value, nil
+}
+
+// WriteValuesRemoteSigner writes the rendered values-remote-signer.yaml to
+// deployDir. Callers pass the runtime-specific rendered content (the YAML
+// shape is identical across runtimes, but the comment header differs).
+func WriteValuesRemoteSigner(deployDir, content string) error {
+	return os.WriteFile(filepath.Join(deployDir, "values-remote-signer.yaml"), []byte(content), 0o600)
+}
+
+// Encrypt wraps plaintext with AES-256-GCM under a scrypt-derived key.
+// Layout: magic(4) | version(1) | salt(32) | nonce(12) | ciphertext+tag.
+// Exported so callers (and crypto-only tests) can exercise the envelope
+// without going through Encode's JSON marshalling step.
+func Encrypt(plaintext []byte, passphrase string) ([]byte, error) {
+	salt := make([]byte, 32)
+	if _, err := rand.Read(salt); err != nil {
+		return nil, fmt.Errorf("salt generation: %w", err)
+	}
+	key, err := scrypt.Key([]byte(passphrase), salt, scryptN, scryptR, scryptP, scryptDKLen)
+	if err != nil {
+		return nil, fmt.Errorf("scrypt key derivation: %w", err)
+	}
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return nil, fmt.Errorf("aes cipher: %w", err)
+	}
+	gcm, err := cipher.NewGCM(block)
+	if err != nil {
+		return nil, fmt.Errorf("gcm: %w", err)
+	}
+	nonce := make([]byte, gcm.NonceSize())
+	if _, err := rand.Read(nonce); err != nil {
+		return nil, fmt.Errorf("nonce generation: %w", err)
+	}
+	ciphertext := gcm.Seal(nil, nonce, plaintext, nil)
+	out := make([]byte, 0, len(Magic)+1+len(salt)+len(nonce)+len(ciphertext))
+	out = append(out, Magic...)
+	out = append(out, byte(Version))
+	out = append(out, salt...)
+	out = append(out, nonce...)
+	out = append(out, ciphertext...)
+	return out, nil
+}
+
+// Decrypt reverses Encrypt for the same passphrase, returning an error if
+// the magic, version, or AEAD tag fails to verify.
+func Decrypt(data []byte, passphrase string) ([]byte, error) {
+	minLen := len(Magic) + 1 + 32 + 12
+	if len(data) < minLen {
+		return nil, errors.New("encrypted file too short")
+	}
+	off := 0
+	if string(data[off:off+len(Magic)]) != string(Magic) {
+		return nil, errors.New("not an encrypted backup file")
+	}
+	off += len(Magic)
+	if data[off] != byte(Version) {
+		return nil, fmt.Errorf("unsupported encryption version %d", data[off])
+	}
+	off++
+	salt := data[off : off+32]
+	off += 32
+	key, err := scrypt.Key([]byte(passphrase), salt, scryptN, scryptR, scryptP, scryptDKLen)
+	if err != nil {
+		return nil, fmt.Errorf("scrypt key derivation: %w", err)
+	}
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return nil, fmt.Errorf("aes cipher: %w", err)
+	}
+	gcm, err := cipher.NewGCM(block)
+	if err != nil {
+		return nil, fmt.Errorf("gcm: %w", err)
+	}
+	nonceSize := gcm.NonceSize()
+	if len(data) < off+nonceSize {
+		return nil, errors.New("encrypted file too short for nonce")
+	}
+	nonce := data[off : off+nonceSize]
+	off += nonceSize
+	ciphertext := data[off:]
+	plain, err := gcm.Open(nil, nonce, ciphertext, nil)
+	if err != nil {
+		return nil, fmt.Errorf("decryption failed: %w", err)
+	}
+	return plain, nil
+}
diff --git a/internal/walletbackup/walletbackup_test.go b/internal/walletbackup/walletbackup_test.go
new file mode 100644
index 00000000..a587faa1
--- /dev/null
+++ b/internal/walletbackup/walletbackup_test.go
@@ -0,0 +1,99 @@
+package walletbackup
+
+import (
+	"bytes"
+	"encoding/json"
+	"testing"
+)
+
+func TestEncodeDecodePlain(t *testing.T) {
+	in := &File{
+		Version:  Version,
+		Instance: "demo",
+		Wallets: []Wallet{{
+			Address:          "0xabc",
+			PublicKey:        "0xpub",
+			KeystoreUUID:     "uuid-1",
+			CreatedAt:        "2026-01-01T00:00:00Z",
+			Keystore:         json.RawMessage(`{"version":3}`),
+			KeystorePassword: "hunter2",
+		}},
+	}
+
+	payload, encrypted, err := Encode(in, "")
+	if err != nil {
+		t.Fatalf("Encode: %v", err)
+	}
+	if encrypted {
+		t.Fatalf("expected plain payload")
+	}
+	if IsEncrypted(payload) {
+		t.Fatalf("plain payload reported as encrypted")
+	}
+
+	out, err := Decode(payload, "")
+	if err != nil {
+		t.Fatalf("Decode: %v", err)
+	}
+	if out.Instance != in.Instance || out.Wallets[0].KeystorePassword != in.Wallets[0].KeystorePassword {
+		t.Fatalf("round-trip mismatch: got %+v", out)
+	}
+}
+
+func TestEncodeDecodeEncrypted(t *testing.T) {
+	in := &File{
+		Version: Version,
+		Wallets: []Wallet{{Address: "0x1", KeystorePassword: "p"}},
+	}
+	payload, encrypted, err := Encode(in, "correct horse")
+	if err != nil {
+		t.Fatalf("Encode: %v", err)
+	}
+	if !encrypted {
+		t.Fatalf("expected encrypted payload")
+	}
+	if !IsEncrypted(payload) {
+		t.Fatalf("encrypted payload missing magic prefix")
+	}
+
+	if _, err := Decode(payload, "wrong"); err == nil {
+		t.Fatalf("Decode with wrong passphrase should fail")
+	}
+
+	out, err := Decode(payload, "correct horse")
+	if err != nil {
+		t.Fatalf("Decode: %v", err)
+	}
+	if out.Wallets[0].Address != "0x1" {
+		t.Fatalf("round-trip mismatch")
+	}
+}
+
+func TestEncryptDecryptRawBytes(t *testing.T) {
+	plain := []byte("hello world")
+	cipher, err := Encrypt(plain, "passphrase")
+	if err != nil {
+		t.Fatalf("Encrypt: %v", err)
+	}
+	if !IsEncrypted(cipher) {
+		t.Fatalf("ciphertext missing magic prefix")
+	}
+	out, err := Decrypt(cipher, "passphrase")
+	if err != nil {
+		t.Fatalf("Decrypt: %v", err)
+	}
+	if !bytes.Equal(plain, out) {
+		t.Fatalf("got %q want %q", out, plain)
+	}
+}
+
+func TestDecodeRejectsUnknownVersion(t *testing.T) {
+	in := &File{Version: 99, Wallets: []Wallet{{}}}
+	payload, _, err := Encode(in, "")
+	if err != nil {
+		t.Fatalf("Encode: %v", err)
+	}
+	if _, err := Decode(payload, ""); err == nil {
+		t.Fatalf("Decode should reject unknown version")
+	}
+}

From 4f84352ff056c58e9d52d4e1aeb09016adf37a3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Mon, 4 May 2026 00:55:07 +0100
Subject: [PATCH 5/9] Hermes publish address

---
 internal/hermes/hermes.go |  3 ++
 internal/hermes/wallet.go | 70 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
index cd9ff6b0..98616cd6 100644
--- a/internal/hermes/hermes.go
+++ b/internal/hermes/hermes.go
@@ -243,6 +243,9 @@ func Sync(cfg *config.Config, id string, u *ui.UI) error {
 		return fmt.Errorf("helmfile sync failed: %w", err)
 	}
 
+	// Publish wallet-metadata ConfigMap for the frontend (namespace now exists).
+	applyWalletMetadataConfigMap(cfg, id, deploymentDir)
+
 	u.Blank()
 	u.Success("Hermes installed successfully!")
 	u.Detail("Namespace", agentruntime.Namespace(agentruntime.Hermes, id))
diff --git a/internal/hermes/wallet.go b/internal/hermes/wallet.go
index 936757a0..198bf1c0 100644
--- a/internal/hermes/wallet.go
+++ b/internal/hermes/wallet.go
@@ -1,6 +1,7 @@
 package hermes
 
 import (
+	"bytes"
 	"crypto/aes"
 	"crypto/cipher"
 	"crypto/rand"
@@ -9,6 +10,7 @@ import (
 	"fmt"
 	"math/big"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
@@ -272,6 +274,74 @@ func ReadWalletMetadata(deploymentDir string) (*WalletInfo, error) {
 	return &wallet, nil
 }
 
+// applyWalletMetadataConfigMap creates or updates a wallet-metadata ConfigMap
+// in the instance namespace. The frontend reads this to display wallet
+// addresses on the agent card. Mirrors the OpenClaw helper at
+// internal/openclaw/wallet.go so the frontend's getWalletMetadata works
+// identically for either runtime. Must be called after helmfile sync (the
+// namespace must exist).
+func applyWalletMetadataConfigMap(cfg *config.Config, id, deploymentDir string) {
+	wallet, err := ReadWalletMetadata(deploymentDir)
+	if err != nil {
+		return
+	}
+
+	namespace := agentruntime.Namespace(agentruntime.Hermes, id)
+	kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml")
+	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
+
+	addressesJSON := map[string]any{
+		"instanceId": id,
+		"addresses": []map[string]string{
+			{
+				"address":   wallet.Address,
+				"publicKey": wallet.PublicKey,
+				"createdAt": wallet.CreatedAt,
+				"label":     "hermes-" + id,
+			},
+		},
+		"count": 1,
+	}
+
+	addressesData, err := json.Marshal(addressesJSON)
+	if err != nil {
+		fmt.Printf("Warning: could not marshal wallet metadata: %v\n", err)
+		return
+	}
+
+	manifest := map[string]any{
+		"apiVersion": "v1",
+		"kind":       "ConfigMap",
+		"metadata": map[string]any{
+			"name":      "wallet-metadata",
+			"namespace": namespace,
+			"labels": map[string]string{
+				"app.kubernetes.io/component":  "remote-signer",
+				"app.kubernetes.io/managed-by": "obol",
+			},
+		},
+		"data": map[string]string{
+			"addresses.json": string(addressesData),
+		},
+	}
+
+	raw, err := json.Marshal(manifest)
+	if err != nil {
+		fmt.Printf("Warning: could not marshal ConfigMap: %v\n", err)
+		return
+	}
+
+	cmd := exec.Command(kubectlBinary, "apply", "-f", "-")
+	cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath)
+	cmd.Stdin = bytes.NewReader(raw)
+
+	var stderr bytes.Buffer
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		fmt.Printf("Warning: could not apply wallet-metadata ConfigMap: %v\n%s", err, stderr.String())
+	}
+}
+
 func ResolveWalletAddress(cfg *config.Config) (string, error) {
 	ids, err := agentruntime.ListInstanceIDs(cfg, agentruntime.Hermes)
 	if err != nil {

From dce9cd2a0f758929705e273d64e61ebfc3b9e58d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Mon, 4 May 2026 01:07:52 +0100
Subject: [PATCH 6/9] Unsure this is better

---
 internal/hermes/hermes.go      | 77 ++++++++--------------------------
 internal/hermes/hermes_test.go | 24 ++++++-----
 2 files changed, 30 insertions(+), 71 deletions(-)

diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
index 98616cd6..1a2635a7 100644
--- a/internal/hermes/hermes.go
+++ b/internal/hermes/hermes.go
@@ -33,10 +33,13 @@ const (
 	// renovate: datasource=helm depName=raw registryUrl=https://bedag.github.io/helm-charts/
 	rawChartVersion = "2.0.2"
 
-	defaultImage     = "nousresearch/hermes-agent:v2026.4.23"
-	hermesInstallDir = "/data/.hermes/hermes-agent"
-	hermesRepoURL    = "https://github.com/NousResearch/hermes-agent.git"
-	hermesBinary     = hermesInstallDir + "/venv/bin/hermes"
+	defaultImage = "nousresearch/hermes-agent:v2026.4.23"
+	// hermesBinary points at the venv that the upstream image preinstalls at
+	// build time via `uv pip install -e ".[all]"`. Using the in-image venv
+	// avoids cloning the repo + rebuilding a venv on every cold start, and
+	// keeps the persistent PVC free of the read-only git pack files that
+	// poison subsequent local-path-provisioner chowns on macOS virtiofs.
+	hermesBinary = "/opt/hermes/.venv/bin/hermes"
 
 	containerUID  = 10000
 	containerGID  = 10000
@@ -798,68 +801,24 @@ func generateValues(namespace, hostname, dashboardHostname, agentBaseURL, token,
             runAsGroup: %d
             fsGroup: %d
           initContainers:
+            # Single init container that runs the same Hermes image used by
+            # the runtime. The upstream image already ships /opt/hermes/.venv
+            # with all dependencies preinstalled, so there is no clone or pip
+            # install at pod start. fsGroup on the pod makes the PVC mount
+            # group-writable to the hermes user (uid 10000), so we don't need
+            # a recursive chown — which is the operation that fails on macOS
+            # virtiofs once the volume has read-only files like git pack
+            # files left from any previous install.
             - name: init-hermes-data
-              image: busybox:1.36
-              command:
-                - sh
-                - -c
-                - mkdir -p /data/.hermes && chown -R %d:%d /data/.hermes
-              securityContext:
-                runAsUser: 0
-              volumeMounts:
-                - name: data
-                  mountPath: /data
-            - name: bootstrap-hermes-install
               image: %s
               imagePullPolicy: IfNotPresent
               command:
                 - sh
                 - -ec
                 - |
-                  install_dir=%s
-                  repo_url=%s
                   mkdir -p /data/.hermes/home /data/.hermes/workspace
-                  lock_dir="${install_dir}.lock"
-                  got_lock=0
-                  for _ in $(seq 1 120); do
-                    if mkdir "$lock_dir" 2>/dev/null; then
-                      got_lock=1
-                      break
-                    fi
-                    sleep 1
-                  done
-                  if [ "$got_lock" != 1 ]; then
-                    echo "Timed out waiting for Hermes install lock: $lock_dir" >&2
-                    exit 1
-                  fi
-                  cleanup_lock() {
-                    rmdir "$lock_dir" 2>/dev/null || true
-                  }
-                  trap cleanup_lock EXIT
-
-                  if [ ! -d "$install_dir/.git" ] || { [ ! -f "$install_dir/pyproject.toml" ] && [ ! -f "$install_dir/setup.py" ]; }; then
-                    rm -rf "${install_dir}.tmp"
-                    if [ -e "$install_dir" ]; then
-                      mv "$install_dir" "${install_dir}.backup.$(date +%%s)"
-                    fi
-                    git clone --depth 1 "$repo_url" "${install_dir}.tmp"
-                    mv "${install_dir}.tmp" "$install_dir"
-                  fi
-                  cd "$install_dir"
-                  # Reinstall when the venv is missing the hermes binary OR
-                  # any selected extra is absent. The upstream image installs
-                  # ".[all]"; we re-create the venv from a fresh clone, so the
-                  # extras must be re-requested explicitly. The import check
-                  # picks one module per extra so existing PVCs trigger a
-                  # rebuild when we add a new extra to the install line.
-                  if [ ! -x "$install_dir/venv/bin/hermes" ] || \
-                     ! "$install_dir/venv/bin/python3" -c "import fastapi, uvicorn, telegram, mcp, ptyprocess, simple_term_menu, googleapiclient" >/dev/null 2>&1; then
-                    rm -rf "$install_dir/venv"
-                    uv venv --python python3 --system-site-packages venv
-                    VIRTUAL_ENV="$install_dir/venv" uv pip install -e ".[web,messaging,mcp,pty,cli,acp,google]"
-                  fi
                   if [ -f /data/.hermes/state.db ]; then
-                    if ! python3 - <<'PY'
+                    if ! /opt/hermes/.venv/bin/python3 - <<'PY'
                   import sqlite3
                   conn = sqlite3.connect('/data/.hermes/state.db')
                   row = conn.execute('PRAGMA quick_check').fetchone()
@@ -875,8 +834,6 @@ func generateValues(namespace, hostname, dashboardHostname, agentBaseURL, token,
                       echo "Backed up malformed Hermes state DB to $backup_dir"
                     fi
                   fi
-                  cleanup_lock
-                  trap - EXIT
               volumeMounts:
                 - name: data
                   mountPath: /data
@@ -917,7 +874,7 @@ func generateValues(namespace, hostname, dashboardHostname, agentBaseURL, token,
                   value: %s
                 - name: OBOL_SKILLS_DIR
                   value: /data/.hermes/%s
-	`, desc.DataPVCName, namespace, desc.ServiceName, desc.ServiceName, namespace, desc.ServiceName, desc.ServiceName, desc.ServiceName, desc.ServiceName, containerUID, containerGID, containerGID, containerUID, containerGID, quoteYAML(image()), quoteYAML(hermesInstallDir), quoteYAML(hermesRepoURL), desc.ServiceName, quoteYAML(image()), quoteYAML(hermesBinary), desc.DefaultPort, desc.DefaultPort, quoteYAML(primary), quoteYAML(namespace), obolSkillsDirName)
+	`, desc.DataPVCName, namespace, desc.ServiceName, desc.ServiceName, namespace, desc.ServiceName, desc.ServiceName, desc.ServiceName, desc.ServiceName, containerUID, containerGID, containerGID, quoteYAML(image()), desc.ServiceName, quoteYAML(image()), quoteYAML(hermesBinary), desc.DefaultPort, desc.DefaultPort, quoteYAML(primary), quoteYAML(namespace), obolSkillsDirName)
 
 	if agentBaseURL != "" {
 		fmt.Fprintf(&b, "                - name: AGENT_BASE_URL\n                  value: %s\n", quoteYAML(agentBaseURL))
diff --git a/internal/hermes/hermes_test.go b/internal/hermes/hermes_test.go
index 4e27c869..9728c526 100644
--- a/internal/hermes/hermes_test.go
+++ b/internal/hermes/hermes_test.go
@@ -143,18 +143,9 @@ func TestGenerateValues_UsesHermesNativeNames(t *testing.T) {
 		"containerPort: 8642",
 		"containerPort: 9119",
 		"init-hermes-data",
-		"bootstrap-hermes-install",
-		`install_dir="/data/.hermes/hermes-agent"`,
-		`repo_url="https://github.com/NousResearch/hermes-agent.git"`,
-		`lock_dir="${install_dir}.lock"`,
-		`Timed out waiting for Hermes install lock`,
-		`git clone --depth 1 "$repo_url" "${install_dir}.tmp"`,
-		"uv venv --python python3 --system-site-packages venv",
-		`uv pip install -e ".[web,messaging,mcp,pty,cli,acp,google]"`,
-		`import fastapi, uvicorn, telegram, mcp, ptyprocess, simple_term_menu, googleapiclient`,
 		`PRAGMA quick_check`,
 		`state-db-corrupt-$ts`,
-		`- "/data/.hermes/hermes-agent/venv/bin/hermes"`,
+		`- "/opt/hermes/.venv/bin/hermes"`,
 		`- "hermes-obol-agent.obol.stack"`,
 		`- "obol-agent.obol.stack"`,
 		"name: hermes-dashboard",
@@ -165,6 +156,17 @@ func TestGenerateValues_UsesHermesNativeNames(t *testing.T) {
 		}
 	}
 
+	for _, banned := range []string{
+		"bootstrap-hermes-install",
+		"git clone",
+		"uv pip install",
+		"/data/.hermes/hermes-agent",
+	} {
+		if strings.Contains(values, banned) {
+			t.Fatalf("generateValues() should no longer reference %q (the in-pod git clone + venv build); use the upstream image's /opt/hermes/.venv instead:\n%s", banned, values)
+		}
+	}
+
 	var parsed any
 	if err := yaml.Unmarshal([]byte(values), &parsed); err != nil {
 		t.Fatalf("generateValues() produced invalid YAML: %v\n%s", err, values)
@@ -203,7 +205,7 @@ func TestHermesExecArgs_UsesNativeHermesBinary(t *testing.T) {
 		"-n", "hermes-obol-agent",
 		"deploy/hermes",
 		"--",
-		"/data/.hermes/hermes-agent/venv/bin/hermes",
+		"/opt/hermes/.venv/bin/hermes",
 		"skills",
 		"audit",
 	}

From 0042d0f01866af2e84fc5d0e8ba4d54e8f85d66b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Mon, 4 May 2026 01:39:43 +0100
Subject: [PATCH 7/9] Updates config map with recreate

---
 internal/stack/backend_k3d.go | 24 +++++++-----
 internal/stack/stack.go       | 71 +++++++++++++++++++++--------------
 2 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/internal/stack/backend_k3d.go b/internal/stack/backend_k3d.go
index f530f0e3..8bc15469 100644
--- a/internal/stack/backend_k3d.go
+++ b/internal/stack/backend_k3d.go
@@ -103,6 +103,21 @@ func (b *K3dBackend) Up(cfg *config.Config, u *ui.UI, stackID string) ([]byte, e
 		return nil, err
 	}
 
+	// Ensure the dev registry caches are started in BOTH branches. `k3d
+	// cluster start` does not auto-restart registry containers attached via
+	// `--registry-use` at create time — it only starts the cluster's own
+	// nodes. Without this call, every retry after a `cluster stop` falls
+	// back to direct upstream pulls and re-fetches every image, costing
+	// minutes per attempt.
+	if os.Getenv("OBOL_DEVELOPMENT") == "true" {
+		setup, setupErr := ensureDevRegistries(cfg, u)
+		if setupErr != nil {
+			u.Warnf("Dev registry cache unavailable, falling back to direct upstream pulls: %v", setupErr)
+		} else {
+			registrySetup = setup
+		}
+	}
+
 	if running {
 		u.Warn("Cluster already exists, starting it")
 
@@ -128,15 +143,6 @@ func (b *K3dBackend) Up(cfg *config.Config, u *ui.UI, stackID string) ([]byte, e
 		// 'obol stack init' wrote the k3d config.
 		ensureK3dPortsAvailable(k3dConfigPath, u)
 
-		if os.Getenv("OBOL_DEVELOPMENT") == "true" {
-			setup, setupErr := ensureDevRegistries(cfg, u)
-			if setupErr != nil {
-				u.Warnf("Dev registry cache unavailable, falling back to direct upstream pulls: %v", setupErr)
-			} else {
-				registrySetup = setup
-			}
-		}
-
 		createCmd := exec.Command(
 			filepath.Join(cfg.BinDir, "k3d"),
 			k3dCreateArgs(stackName, k3dConfigPath, registrySetup)...,
diff --git a/internal/stack/stack.go b/internal/stack/stack.go
index b6704e61..94395ee3 100644
--- a/internal/stack/stack.go
+++ b/internal/stack/stack.go
@@ -362,16 +362,14 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s
 		u.Warnf("Failed to preserve LiteLLM config across Helm sync: %v", err)
 	}
 
-	// Release runtime field ownership of litellm-config.data.config.yaml so the
-	// upcoming helm upgrade can reclaim it without an SSA conflict. Without
-	// this step, the second `obol stack up` after autoConfigureLLM/restore has
-	// claimed the field via SSA (manager=helm, op=Apply) fails with
-	// "conflict with helm using v1: .data.config.yaml" because helm registers
-	// a separate managedFields entry (manager=helm, op=Update) for the same
-	// field. The data is already snapshotted in previousLiteLLMConfig and gets
-	// re-applied by restoreLiteLLMConfig after helm runs.
-	if err := releaseLiteLLMConfigOwnership(cfg, kubeconfigPath); err != nil {
-		u.Warnf("Failed to release LiteLLM config field ownership: %v", err)
+	// Establish field manager "helm" as the SSA owner of
+	// litellm-config.data["config.yaml"] before helmfile sync runs, so the
+	// upcoming helm upgrade's SSA merges in place instead of conflicting
+	// with the synthesised "before-first-apply" or a previous "helm" Apply
+	// entry. The data is already snapshotted in previousLiteLLMConfig and
+	// gets re-applied by restoreLiteLLMConfig after helm runs.
+	if err := releaseLiteLLMConfigOwnership(cfg, kubeconfigPath, previousLiteLLMConfig); err != nil {
+		u.Warnf("Failed to claim LiteLLM config field ownership: %v", err)
 	}
 
 	// Compatibility migration
@@ -919,33 +917,50 @@ func preserveLiteLLMConfigForHelm(cfg *config.Config, kubeconfigPath string) (st
 	return raw, nil
 }
 
-// releaseLiteLLMConfigOwnership strips managedFields from the litellm-config
-// ConfigMap so the next helm upgrade can claim ownership of every field
-// without an SSA conflict. Helm tracks release ownership via the
-// meta.helm.sh/release-name annotation, not managedFields, so clearing
-// managedFields does not detach the resource from its release.
+// releaseLiteLLMConfigOwnership deletes the litellm-config ConfigMap (if it
+// exists) before the next helmfile sync runs, so helm's upgrade creates a
+// fresh ConfigMap and becomes the sole SSA owner of every field — no
+// pre-existing field-ownership entries, no synthesised "before-first-apply"
+// manager, no conflict on `.data.config.yaml`.
 //
-// The single empty entry [{}] is the documented apiserver idiom for clearing
-// all field-ownership claims on a resource. See:
-// https://kubernetes.io/docs/reference/using-api/server-side-apply/#clearing-managedfields
-func releaseLiteLLMConfigOwnership(cfg *config.Config, kubeconfigPath string) error {
-	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
-
-	// Skip if the configmap doesn't exist (first install).
-	if _, err := kubectl.Output(kubectlBinary, kubeconfigPath,
-		"get", "configmap", "litellm-config", "-n", "llm", "-o", "name"); err != nil {
+// Why deletion rather than re-applying with manager "helm" or clearing
+// managedFields:
+//   - clearing managedFields ([{}]) leaves data fields with no SSA owner;
+//     Kubernetes synthesises "before-first-apply" on the next SSA call to
+//     track them, and helm's apply then conflicts on `.data.config.yaml`
+//     against that synthesised manager.
+//   - re-applying with manager "helm" via SSA only claims the fields in our
+//     manifest. Adjacent fields (labels, annotations, other data keys) stay
+//     under their original Update manager, and synthesised
+//     "before-first-apply" still appears on helm's SSA call.
+//
+// Helm tracks release ownership via the meta.helm.sh/release-name annotation
+// on the resource — not via managedFields — but those annotations ride along
+// with the ConfigMap content. Helm reconstructs them from its release
+// secret on the next upgrade, so deleting the ConfigMap does not orphan it
+// from the release.
+//
+// The window where the ConfigMap is missing is bounded by helmfile sync
+// (seconds). Running LiteLLM pods are unaffected because volume projections
+// happen at pod start, not on ConfigMap mutation. The user data (custom
+// providers, paid model routes) is already snapshotted in
+// previousLiteLLMConfig and re-applied by restoreLiteLLMConfig after helm
+// finishes.
+func releaseLiteLLMConfigOwnership(cfg *config.Config, kubeconfigPath, snapshot string) error {
+	if strings.TrimSpace(snapshot) == "" {
+		// No existing ConfigMap to delete (first install or earlier failure).
 		return nil
 	}
 
+	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
 	cmd := exec.Command(kubectlBinary,
-		"patch", "configmap", "litellm-config",
+		"delete", "configmap", "litellm-config",
 		"-n", "llm",
-		"--type=merge",
-		"--patch", `{"metadata":{"managedFields":[{}]}}`,
+		"--ignore-not-found",
 	)
 	cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath)
 	if out, err := cmd.CombinedOutput(); err != nil {
-		return fmt.Errorf("kubectl patch managedFields: %w\n%s", err, string(out))
+		return fmt.Errorf("kubectl delete configmap litellm-config: %w\n%s", err, string(out))
 	}
 	return nil
 }

From c2df08e7e5c812b6ac8ce8f387db2497d89c39cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Mon, 4 May 2026 12:37:19 +0100
Subject: [PATCH 8/9] Update to do same change on rs

---
 internal/embed/infrastructure/helmfile.yaml | 11 ++++
 internal/hermes/hermes.go                   | 11 ++++
 internal/stack/stack.go                     | 58 ---------------------
 3 files changed, 22 insertions(+), 58 deletions(-)

diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml
index 05ff5f27..7f7ca348 100644
--- a/internal/embed/infrastructure/helmfile.yaml
+++ b/internal/embed/infrastructure/helmfile.yaml
@@ -2,6 +2,17 @@
 # Orchestrates core infrastructure components deployed with every stack
 # Uses Traefik with Gateway API for routing (replaces nginx-ingress)
 
+# Force helm to use SSA with --force-conflicts on every release so that
+# upgrades take ownership of fields written by other field managers
+# (kubectl-client-side-apply from helm's pre-3.13 default, "before-first-apply"
+# synthesised by the apiserver, or runtime writers like obol's auto-config
+# patches). Without this, every `obol stack down`/`up` cycle hits whack-a-mole
+# SSA conflicts on resources helm shares with other writers.
+helmDefaults:
+  args:
+    - "--server-side"
+    - "--force-conflicts"
+
 repositories:
   - name: traefik
     url: https://traefik.github.io/charts
diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
index 1a2635a7..b72b5fdd 100644
--- a/internal/hermes/hermes.go
+++ b/internal/hermes/hermes.go
@@ -692,6 +692,17 @@ func writeDeploymentFiles(cfg *config.Config, id, deploymentDir, agentBaseURL st
 func generateHelmfile(namespace string) string {
 	return fmt.Sprintf(`# Managed by obol agent
 
+# --server-side --force-conflicts on every helm release so upgrades take
+# ownership of fields previously written by other managers (e.g. helm's
+# pre-3.13 client-side-apply default, the apiserver's synthesised
+# "before-first-apply", or runtime kubectl applies). Without this, every
+# subsequent `+"`obol agent sync`"+` after a fresh install hits whack-a-mole
+# SSA conflicts on the remote-signer Secret labels and similar shared fields.
+helmDefaults:
+  args:
+    - "--server-side"
+    - "--force-conflicts"
+
 repositories:
   - name: obol
     url: https://obolnetwork.github.io/helm-charts/
diff --git a/internal/stack/stack.go b/internal/stack/stack.go
index 94395ee3..90700775 100644
--- a/internal/stack/stack.go
+++ b/internal/stack/stack.go
@@ -362,16 +362,6 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s
 		u.Warnf("Failed to preserve LiteLLM config across Helm sync: %v", err)
 	}
 
-	// Establish field manager "helm" as the SSA owner of
-	// litellm-config.data["config.yaml"] before helmfile sync runs, so the
-	// upcoming helm upgrade's SSA merges in place instead of conflicting
-	// with the synthesised "before-first-apply" or a previous "helm" Apply
-	// entry. The data is already snapshotted in previousLiteLLMConfig and
-	// gets re-applied by restoreLiteLLMConfig after helm runs.
-	if err := releaseLiteLLMConfigOwnership(cfg, kubeconfigPath, previousLiteLLMConfig); err != nil {
-		u.Warnf("Failed to claim LiteLLM config field ownership: %v", err)
-	}
-
 	// Compatibility migration
 	if err := migrateDefaultsHTTPRouteHostnames(helmfilePath); err != nil {
 		u.Warnf("Failed to migrate defaults helmfile hostnames: %v", err)
@@ -917,54 +907,6 @@ func preserveLiteLLMConfigForHelm(cfg *config.Config, kubeconfigPath string) (st
 	return raw, nil
 }
 
-// releaseLiteLLMConfigOwnership deletes the litellm-config ConfigMap (if it
-// exists) before the next helmfile sync runs, so helm's upgrade creates a
-// fresh ConfigMap and becomes the sole SSA owner of every field — no
-// pre-existing field-ownership entries, no synthesised "before-first-apply"
-// manager, no conflict on `.data.config.yaml`.
-//
-// Why deletion rather than re-applying with manager "helm" or clearing
-// managedFields:
-//   - clearing managedFields ([{}]) leaves data fields with no SSA owner;
-//     Kubernetes synthesises "before-first-apply" on the next SSA call to
-//     track them, and helm's apply then conflicts on `.data.config.yaml`
-//     against that synthesised manager.
-//   - re-applying with manager "helm" via SSA only claims the fields in our
-//     manifest. Adjacent fields (labels, annotations, other data keys) stay
-//     under their original Update manager, and synthesised
-//     "before-first-apply" still appears on helm's SSA call.
-//
-// Helm tracks release ownership via the meta.helm.sh/release-name annotation
-// on the resource — not via managedFields — but those annotations ride along
-// with the ConfigMap content. Helm reconstructs them from its release
-// secret on the next upgrade, so deleting the ConfigMap does not orphan it
-// from the release.
-//
-// The window where the ConfigMap is missing is bounded by helmfile sync
-// (seconds). Running LiteLLM pods are unaffected because volume projections
-// happen at pod start, not on ConfigMap mutation. The user data (custom
-// providers, paid model routes) is already snapshotted in
-// previousLiteLLMConfig and re-applied by restoreLiteLLMConfig after helm
-// finishes.
-func releaseLiteLLMConfigOwnership(cfg *config.Config, kubeconfigPath, snapshot string) error {
-	if strings.TrimSpace(snapshot) == "" {
-		// No existing ConfigMap to delete (first install or earlier failure).
-		return nil
-	}
-
-	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
-	cmd := exec.Command(kubectlBinary,
-		"delete", "configmap", "litellm-config",
-		"-n", "llm",
-		"--ignore-not-found",
-	)
-	cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath)
-	if out, err := cmd.CombinedOutput(); err != nil {
-		return fmt.Errorf("kubectl delete configmap litellm-config: %w\n%s", err, string(out))
-	}
-	return nil
-}
-
 func restoreLiteLLMConfig(cfg *config.Config, kubeconfigPath, raw string) error {
 	if strings.TrimSpace(raw) == "" {
 		return nil

From be8a0d5e71d2d68e4194a0604053d8c917941b17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= <oisin@obol.tech>
Date: Mon, 4 May 2026 13:11:50 +0100
Subject: [PATCH 9/9] Update docker registry handling network loss

---
 internal/embed/infrastructure/helmfile.yaml |  5 ++++-
 internal/hermes/hermes.go                   |  5 ++++-
 internal/stack/dev_registry.go              | 15 ++++++++++++---
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml
index 7f7ca348..ea40be45 100644
--- a/internal/embed/infrastructure/helmfile.yaml
+++ b/internal/embed/infrastructure/helmfile.yaml
@@ -10,7 +10,10 @@
 # SSA conflicts on resources helm shares with other writers.
 helmDefaults:
   args:
-    - "--server-side"
+    # `=true` form is required: helm's `--server-side` takes a value
+    # (auto|true|false). Without `=true`, helm consumes the next arg as the
+    # value and rejects `--force-conflicts` as an unknown apply method.
+    - "--server-side=true"
     - "--force-conflicts"
 
 repositories:
diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
index b72b5fdd..d321f296 100644
--- a/internal/hermes/hermes.go
+++ b/internal/hermes/hermes.go
@@ -700,7 +700,10 @@ func generateHelmfile(namespace string) string {
 # SSA conflicts on the remote-signer Secret labels and similar shared fields.
 helmDefaults:
   args:
-    - "--server-side"
+    # =true form is required: helm's --server-side takes a value
+    # (auto|true|false), so without =true helm consumes the next arg as the
+    # value and rejects --force-conflicts as an unknown apply method.
+    - "--server-side=true"
     - "--force-conflicts"
 
 repositories:
diff --git a/internal/stack/dev_registry.go b/internal/stack/dev_registry.go
index 91ed2f96..7633968a 100644
--- a/internal/stack/dev_registry.go
+++ b/internal/stack/dev_registry.go
@@ -79,11 +79,20 @@ func ensureDevRegistry(cfg *config.Config, k3dBinary string, mirror registryMirr
 			return nil
 		}
 
-		if err := runCommand(exec.Command("docker", "start", containerName)); err != nil {
-			return fmt.Errorf("start registry %s: %w", containerName, err)
+		// Container exists but is stopped. Try to start it.
+		if startErr := runCommand(exec.Command("docker", "start", containerName)); startErr == nil {
+			return nil
 		}
 
-		return nil
+		// Start failed — most commonly because the k3d-obol-stack-* Docker
+		// network the registry was attached to has been removed (cluster
+		// purge or reclaimLeakedDevK3dNetworks). The container's stored
+		// network reference is now a dangling ID and `docker start` aborts
+		// with "network ... not found". Force-remove the container and
+		// fall through to recreate it. The cache content lives on a host
+		// volume mount, so the recreated container picks it back up
+		// without re-downloading anything.
+		_ = runCommand(exec.Command("docker", "rm", "-f", containerName))
 	}
 
 	createCmd := exec.Command(