diff --git a/.agents/skills/create-spike/SKILL.md b/.agents/skills/create-spike/SKILL.md
index 96dd8cf4..f19ab476 100644
--- a/.agents/skills/create-spike/SKILL.md
+++ b/.agents/skills/create-spike/SKILL.md
@@ -14,16 +14,6 @@ A **spike** is an exploratory investigation. The user has a vague idea — a fea
 - The `gh` CLI must be authenticated (`gh auth status`)
 - You must be in a git repository with a GitHub remote
 
-## Agent Comment Marker
-
-All comments posted by this skill **must** begin with the following marker line:
-
-```
-> **🔬 spike-agent**
-```
-
-This marker distinguishes spike investigation comments from other skills (e.g., `🏗️ build-from-issue-agent`, `🔒 security-review-agent`) and from human comments.
-
 ## Workflow Overview
 
 ```
@@ -39,9 +29,7 @@ User describes a problem
   │
   ├─ Step 4: Create a GitHub issue with structured findings
   │
-  ├─ Step 5: Post investigation detail comment with spike-agent marker
-  │
-  └─ Step 6: Report to user with issue URL and next steps
+  └─ Step 5: Report to user with issue URL and next steps
 ```
 
 ## Step 1: Gather the Problem Statement
@@ -115,10 +103,7 @@ Include in the prompt to the reviewer:
 
 ### What to do with the results
 
-The reviewer will return a detailed analysis. You'll use this to populate both the issue body (Step 4) and the investigation detail comment (Step 5). Split the content as follows:
-
-- **Issue body**: concise, stakeholder-readable summary
-- **Spike comment**: full technical details with code references, for implementers
+The reviewer will return a detailed analysis. You'll use this to populate the issue body (Step 4). The issue should contain both the stakeholder-readable summary and the full technical investigation — everything in one place.
 
 ## Step 3: Determine Labels
 
@@ -137,7 +122,7 @@ Based on the investigation results, select appropriate labels:
 
 ## Step 4: Create the GitHub Issue
 
-Create the issue with a structured body. The title should follow conventional commit format.
+Create the issue with a structured body containing both the stakeholder-readable summary and the full technical investigation. The title should follow conventional commit format.
 
 ```bash
 gh issue create \
@@ -150,7 +135,7 @@ gh issue create \
 
 ## Technical Context
 
-<What the investigation found about the current architecture in the affected area. Keep it concise — the deep dive is in the spike comment below. 3-5 sentences covering how things work today and why a change is needed.>
+<What the investigation found about the current architecture in the affected area. How things work today and why a change is needed.>
 
 ## Affected Components
 
@@ -159,49 +144,6 @@ gh issue create \
 | <component> | `<file1>`, `<file2>` | <what this component does in the context of this change> |
 | ... | ... | ... |
 
-## Proposed Approach
-
-<High-level strategy — NOT a full implementation plan. That's `build-from-issue`'s job. Describe the direction, not the steps. 3-6 sentences.>
-
-## Scope Assessment
-
-- **Complexity:** <Low / Medium / High>
-- **Confidence:** <High — clear path / Medium — some unknowns / Low — needs discussion>
-- **Estimated files to change:** <count>
-- **Issue type:** `<feat|fix|refactor|chore|perf|docs>`
-
-## Risks & Open Questions
-
-- <risk or unknown that needs human judgment>
-- <design decision that could go either way>
-- ...
-
-## Test Considerations
-
-- <what testing strategy makes sense for this change>
-- <which test levels are needed: unit, integration, e2e>
-- <any test infrastructure that may need to be added>
-
----
-*Created by spike investigation. Use `build-from-issue` to plan and implement.*
-EOF
-)"
-```
-
-**Display the issue URL** so it's easily clickable:
-
-```
-Created issue [#<number>](https://github.com/OWNER/REPO/issues/<number>)
-```
-
-## Step 5: Post Investigation Detail Comment
-
-Post a comment on the newly created issue containing the full technical investigation. This comment is more detailed than the issue body — it's reference material for whoever implements the issue (likely `build-from-issue`).
-
-```bash
-gh issue comment <id> --body "$(cat <<'EOF'
-> **🔬 spike-agent**
-
 ## Technical Investigation
 
 ### Architecture Overview
@@ -232,50 +174,72 @@ gh issue comment <id> --body "$(cat <<'EOF'
 
 <Existing patterns in the codebase that the implementation should be consistent with. Reference specific examples.>
 
-### Test Coverage Notes
+## Proposed Approach
+
+<High-level strategy — NOT a full implementation plan. That's `build-from-issue`'s job. Describe the direction, not the steps. 3-6 sentences.>
+
+## Scope Assessment
 
-<What tests exist for the affected area today. What test patterns should be followed. Any test infrastructure gaps.>
+- **Complexity:** <Low / Medium / High>
+- **Confidence:** <High — clear path / Medium — some unknowns / Low — needs discussion>
+- **Estimated files to change:** <count>
+- **Issue type:** `<feat|fix|refactor|chore|perf|docs>`
+
+## Risks & Open Questions
+
+- <risk or unknown that needs human judgment>
+- <design decision that could go either way>
+- ...
+
+## Test Considerations
+
+- <what testing strategy makes sense for this change>
+- <which test levels are needed: unit, integration, e2e>
+- <any test infrastructure that may need to be added>
+- <what tests exist for the affected area today, what patterns should be followed, any test infrastructure gaps>
 
 ---
-*This investigation provides context for implementation. Next step: review the issue, refine if needed, then use `build-from-issue` to create a plan and implement.*
+*Created by spike investigation. Use `build-from-issue` to plan and implement.*
 EOF
 )"
 ```
 
-### Why the split?
+**Do NOT post a follow-up comment on the issue.** All findings must be contained in the issue body itself.
 
-- **Issue body** = concise, stakeholder-readable. Product managers, tech leads, and other engineers can scan it.
-- **Spike comment** = deep technical context. When `build-from-issue` runs, its `principal-engineer-reviewer` reads issue comments — this gives it a head start so it doesn't have to redo the investigation.
+**Display the issue URL** so it's easily clickable:
+
+```
+Created issue [#<number>](https://github.com/OWNER/REPO/issues/<number>)
+```
 
-## Step 6: Report to User
+## Step 5: Report to User
 
-After creating the issue and posting the investigation comment, report:
+After creating the issue, report:
 
 1. The issue URL (as a clickable markdown link)
 2. A 2-3 sentence summary of what was found
 3. Key risks or decisions that need human attention
 4. Next steps:
 
-> Review the issue and the spike investigation comment. Refine the proposed approach if needed, then use `build-from-issue` on the issue to create an implementation plan and build it.
+> Review the issue. Refine the proposed approach if needed, then use `build-from-issue` on the issue to create an implementation plan and build it.
 
 ## Design Principles
 
-1. **The issue body is for stakeholders; the spike comment is for implementers.** Keep the issue body concise and the comment detailed.
+1. **Everything goes in the issue body.** Do NOT post follow-up comments. The issue body should contain both the stakeholder-readable summary and the full technical investigation, all in one place.
 
 2. **Do NOT create an implementation plan.** The spike identifies the problem space and proposes a direction. The implementation plan is `build-from-issue`'s responsibility, created after human review of the spike.
 
 3. **One round of clarification max.** Don't turn this into an interrogation. If the user provides enough to identify the area of the codebase, start investigating.
 
-4. **The spike comment should save `build-from-issue` work.** When `build-from-issue` runs, it reads issue comments as input context. The spike comment should contain enough detail that its `principal-engineer-reviewer` can build on the investigation rather than starting from scratch.
+4. **The issue should save `build-from-issue` work.** When `build-from-issue` runs, it reads the issue body as input context. The technical investigation section should contain enough detail that its `principal-engineer-reviewer` can build on the investigation rather than starting from scratch.
 
-5. **Cross-reference `build-from-issue`.** Mention it as the natural next step in the issue body footer and the spike comment footer.
+5. **Cross-reference `build-from-issue`.** Mention it as the natural next step in the issue body footer.
 
 ## Useful Commands Reference
 
 | Command | Description |
 | --- | --- |
 | `gh issue create --title "..." --body "..." --label "..."` | Create a new issue |
-| `gh issue comment <id> --body "..."` | Post a comment on an issue |
 | `gh label list --limit 100` | List available labels in the repo |
 | `gh issue edit <id> --add-label "..."` | Add labels to an issue |
 | `gh issue view <id> --json number,title,body,state,labels` | Fetch issue metadata |
@@ -296,9 +260,8 @@ User says: "Allow sandbox egress to private IP space via networking policy"
    - Identifies exact insertion points: policy field addition, SSRF check bypass path, OPA rule extension
    - Assesses: Medium complexity, High confidence, ~6 files
 3. Fetch labels — select `feat`, `sandbox`, `proxy`, `policy`, `review-ready`
-4. Create issue: `feat: allow sandbox egress to private IP space via networking policy`
-5. Post spike comment with full investigation: code references, architecture context, alternative approaches (allowlist vs. blanket bypass vs. per-policy toggle)
-6. Report: "Created issue #59. The investigation found that private IP blocking is enforced at the SSRF check layer in the proxy. The proposed approach adds a policy-level override. Review the issue and use `build-from-issue` when ready."
+4. Create issue: `feat: allow sandbox egress to private IP space via networking policy` — body includes both the summary and full investigation (code references, architecture context, alternative approaches)
+5. Report: "Created issue #59. The investigation found that private IP blocking is enforced at the SSRF check layer in the proxy. The proposed approach adds a policy-level override. Review the issue and use `build-from-issue` when ready."
 
 ### Bug investigation spike
 
@@ -313,9 +276,8 @@ User says: "The proxy retry logic seems too aggressive — I'm seeing cascading
    - Identifies that retries happen without backoff jitter, causing thundering herd
    - Assesses: Low complexity, High confidence, ~2 files
 3. Fetch labels — select `fix`, `proxy`, `review-ready`
-4. Create issue: `fix: proxy retry logic causes cascading failures under load`
-5. Post spike comment with retry code references, current behavior trace, and comparison to standard backoff patterns
-6. Report: "Created issue #74. The proxy retries without jitter or circuit breaking, which amplifies failures under load. Straightforward fix. Review and use `build-from-issue` when ready."
+4. Create issue: `fix: proxy retry logic causes cascading failures under load` — body includes both the summary and full investigation (retry code references, current behavior trace, comparison to standard backoff patterns)
+5. Report: "Created issue #74. The proxy retries without jitter or circuit breaking, which amplifies failures under load. Straightforward fix. Review and use `build-from-issue` when ready."
 
 ### Performance/refactoring spike
 
@@ -330,6 +292,5 @@ User says: "Policy evaluation is getting slow — can we cache compiled OPA poli
    - Identifies that policies are recompiled on every evaluation
    - Assesses: Medium complexity, Medium confidence (cache invalidation is a design decision), ~4 files
 3. Fetch labels — select `perf`, `policy`, `review-ready`
-4. Create issue: `perf: cache compiled OPA policies to reduce evaluation latency`
-5. Post spike comment with compilation hot path, current per-request overhead, cache invalidation strategies considered (TTL vs. content-hash vs. explicit reload), and trade-offs
-6. Report: "Created issue #81. Policies are recompiled per-request with no caching. The main design decision is the cache invalidation strategy — flagged as an open question. Review and use `build-from-issue` when ready."
+4. Create issue: `perf: cache compiled OPA policies to reduce evaluation latency` — body includes both the summary and full investigation (compilation hot path, per-request overhead, cache invalidation strategies with trade-offs)
+5. Report: "Created issue #81. Policies are recompiled per-request with no caching. The main design decision is the cache invalidation strategy — flagged as an open question. Review and use `build-from-issue` when ready."
diff --git a/.agents/skills/debug-navigator-cluster/SKILL.md b/.agents/skills/debug-navigator-cluster/SKILL.md
index 238c10cb..b9011933 100644
--- a/.agents/skills/debug-navigator-cluster/SKILL.md
+++ b/.agents/skills/debug-navigator-cluster/SKILL.md
@@ -1,17 +1,17 @@
 ---
 name: debug-navigator-cluster
-description: Debug why a nemoclaw cluster failed to start or is unhealthy. Use when the user has a failed `nemoclaw cluster admin deploy`, cluster health check failure, or wants to diagnose cluster infrastructure issues. Trigger keywords - debug cluster, cluster failing, cluster not starting, deploy failed, cluster troubleshoot, cluster health, cluster diagnose, why won't my cluster start, health check failed.
+description: Debug why a nemoclaw cluster failed to start or is unhealthy. Use when the user has a failed `nemoclaw gateway start`, cluster health check failure, or wants to diagnose cluster infrastructure issues. Trigger keywords - debug cluster, cluster failing, cluster not starting, deploy failed, cluster troubleshoot, cluster health, cluster diagnose, why won't my cluster start, health check failed, gateway start failed, gateway not starting.
 ---
 
 # Debug NemoClaw Cluster
 
-Diagnose why a nemoclaw cluster failed to start after `nemoclaw cluster admin deploy`.
+Diagnose why a nemoclaw cluster failed to start after `nemoclaw gateway start`.
 
 ## Overview
 
-`nemoclaw cluster admin deploy` creates a Docker container running k3s with the NemoClaw server and Envoy Gateway deployed via Helm. The deployment stages, in order, are:
+`nemoclaw gateway start` creates a Docker container running k3s with the NemoClaw server and Envoy Gateway deployed via Helm. The deployment stages, in order, are:
 
-1. **Pre-deploy check**: `nemoclaw cluster admin deploy` in interactive mode prompts to **reuse** (keep volume, clean stale nodes) or **recreate** (destroy everything, fresh start). `mise run cluster` always recreates before deploy.
+1. **Pre-deploy check**: `nemoclaw gateway start` in interactive mode prompts to **reuse** (keep volume, clean stale nodes) or **recreate** (destroy everything, fresh start). `mise run cluster` always recreates before deploy.
 2. Ensure cluster image is available (local build or remote pull)
 3. Create Docker network (`navigator-cluster`) and volume (`navigator-cluster-{name}`)
 4. Create and start a privileged Docker container (`navigator-cluster-{name}`)
@@ -31,7 +31,7 @@ For local deploys, metadata endpoint selection now depends on Docker connectivit
 - default local Docker socket (`unix:///var/run/docker.sock`): `https://127.0.0.1:{port}` (default port 8080)
 - TCP Docker daemon (`DOCKER_HOST=tcp://<host>:<port>`): `https://<host>:{port}` for non-loopback hosts
 
-The host port is configurable via `--port` on `nemoclaw cluster admin deploy` (default 8080) and is stored in `ClusterMetadata.gateway_port`.
+The host port is configurable via `--port` on `nemoclaw gateway start` (default 8080) and is stored in `ClusterMetadata.gateway_port`.
 
 The TCP host is also added as an extra gateway TLS SAN so mTLS hostname validation succeeds.
 
@@ -302,7 +302,7 @@ If DNS is broken, all image pulls from the distribution registry will fail, as w
 | Helm install job failed | Chart values error or dependency issue | Check `helm-install-navigator` job logs in `kube-system` |
 | Architecture mismatch (remote) | Built on arm64, deploying to amd64 | Cross-build the image for the target architecture |
 | SSH connection failed (remote) | SSH key/host/Docker issues | Test `ssh <host> docker ps` manually |
-| Port conflict | Another service on 6443 or the configured gateway host port (default 8080) | Stop conflicting service or use `--port` to pick a different host port |
+| Port conflict | Another service on 6443 or the configured gateway host port (default 8080) | Stop conflicting service or use `--port` on `nemoclaw gateway start` to pick a different host port |
 | gRPC connect refused to `127.0.0.1:443` in CI | Docker daemon is remote (`DOCKER_HOST=tcp://...`) but metadata still points to loopback | Verify metadata endpoint host matches `DOCKER_HOST` and includes non-loopback host |
 | DNS failures inside container | Entrypoint DNS detection failed | Check `/etc/rancher/k3s/resolv.conf` and container startup logs |
 | `metrics-server` errors in logs | Normal k3s noise, not the root cause | These errors are benign — look for the actual failing health check component |
@@ -331,7 +331,7 @@ docker -H ssh://<host> logs navigator-cluster-<name>
 **Setting up kubectl access** (requires tunnel):
 
 ```bash
-nemoclaw cluster admin tunnel --name <name> --remote <host>
+nemoclaw gateway tunnel --name <name> --remote <host>
 # Then in another terminal:
 export KUBECONFIG=~/.config/nemoclaw/clusters/<name>/kubeconfig
 kubectl get pods -A
diff --git a/.agents/skills/generate-sandbox-policy/SKILL.md b/.agents/skills/generate-sandbox-policy/SKILL.md
index bb9f5d43..6c6c33d9 100644
--- a/.agents/skills/generate-sandbox-policy/SKILL.md
+++ b/.agents/skills/generate-sandbox-policy/SKILL.md
@@ -365,7 +365,7 @@ The policy needs to go somewhere. Determine which mode applies:
 
 1. **Read the existing file** to understand current state:
    - What policies already exist under `network_policies`
-   - What the `filesystem_policy`, `landlock`, `process`, and `inference` sections look like
+   - What the `filesystem_policy`, `landlock`, and `process` sections look like
    - Whether the file uses compact (`{ host: ..., port: ... }`) or expanded YAML style
 
 2. **Check for conflicts**:
@@ -377,7 +377,7 @@ The policy needs to go somewhere. Determine which mode applies:
    - **Modifying an existing policy**: Edit the specific policy in place — add/remove endpoints, change access presets, update rules, add binaries, etc.
    - **Removing a policy**: Delete the policy block if the user asks.
 
-4. **Preserve everything else**: Do not modify `filesystem_policy`, `landlock`, `process`, `inference`, or other policies unless the user explicitly asks.
+4. **Preserve everything else**: Do not modify `filesystem_policy`, `landlock`, `process`, or other policies unless the user explicitly asks.
 
 ### Mode B: Create a New Policy File
 
@@ -410,13 +410,9 @@ process:
 
 network_policies:
   # <generated policies go here>
-
-inference:
-  allowed_routes:
-    - local
 ```
 
-The `filesystem_policy`, `landlock`, `process`, and `inference` sections above are sensible defaults. Tell the user these are defaults and may need adjustment for their environment. The generated `network_policies` block is the primary output.
+The `filesystem_policy`, `landlock`, and `process` sections above are sensible defaults. Tell the user these are defaults and may need adjustment for their environment. Cluster inference is configured separately through `nemoclaw cluster inference set/get`. The generated `network_policies` block is the primary output.
 
 If the user provides a file path, write to it. Otherwise, suggest `deploy/docker/sandbox/dev-sandbox-policy.yaml` for local development or ask where to place it.
 
diff --git a/.agents/skills/generate-sandbox-policy/examples.md b/.agents/skills/generate-sandbox-policy/examples.md
index 7fda8191..66ce819e 100644
--- a/.agents/skills/generate-sandbox-policy/examples.md
+++ b/.agents/skills/generate-sandbox-policy/examples.md
@@ -754,7 +754,7 @@ An exact IP is treated as `/32` — only that specific address is permitted.
       - { path: /usr/bin/curl }
 ```
 
-The agent uses `StrReplace` to insert after the last existing policy in the `network_policies` block. All other sections (`filesystem_policy`, `landlock`, `process`, `inference`) are untouched.
+The agent uses `StrReplace` to insert after the last existing policy in the `network_policies` block. All other sections (`filesystem_policy`, `landlock`, `process`) are untouched.
 
 ---
 
@@ -866,13 +866,9 @@ network_policies:
         access: full
     binaries:
       - { path: /usr/local/bin/claude }
-
-inference:
-  allowed_routes:
-    - local
 ```
 
-The agent notes that `filesystem_policy`, `landlock`, `process`, and `inference` are sensible defaults that may need adjustment.
+The agent notes that `filesystem_policy`, `landlock`, and `process` are sensible defaults that may need adjustment, and that cluster inference is configured separately via `nemoclaw cluster inference set/get` rather than an `inference` policy block.
 
 ---
 
diff --git a/.agents/skills/nemoclaw-cli/SKILL.md b/.agents/skills/nemoclaw-cli/SKILL.md
index eb64d8a1..19c02b73 100644
--- a/.agents/skills/nemoclaw-cli/SKILL.md
+++ b/.agents/skills/nemoclaw-cli/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: nemoclaw-cli
-description: Guide agents through using the NemoClaw CLI (nemoclaw) for sandbox management, provider configuration, policy iteration, BYOC workflows, and inference routing. Covers basic through advanced multi-step workflows. Trigger keywords - nemoclaw, sandbox create, sandbox connect, sandbox logs, provider create, policy set, policy get, image push, port forward, BYOC, bring your own container, use nemoclaw, run nemoclaw, CLI usage, manage sandbox, manage provider.
+description: Guide agents through using the NemoClaw CLI (nemoclaw) for sandbox management, provider configuration, policy iteration, BYOC workflows, and inference routing. Covers basic through advanced multi-step workflows. Trigger keywords - nemoclaw, sandbox create, sandbox connect, logs, provider create, policy set, policy get, image push, forward, port forward, BYOC, bring your own container, use nemoclaw, run nemoclaw, CLI usage, manage sandbox, manage provider, gateway start, gateway select.
 ---
 
 # NemoClaw CLI
@@ -9,7 +9,7 @@ Guide agents through using the `nemoclaw` CLI for sandbox and platform managemen
 
 ## Overview
 
-The NemoClaw CLI (`nemoclaw`) is the primary interface for managing sandboxes, providers, policies, inference routes, and clusters. This skill teaches agents how to orchestrate CLI commands for common and complex workflows.
+The NemoClaw CLI (`nemoclaw`) is the primary interface for managing sandboxes, providers, policies, inference routes, and gateways. This skill teaches agents how to orchestrate CLI commands for common and complex workflows.
 
 **Companion skill**: For creating or modifying sandbox policy YAML content (network rules, L7 inspection, access presets), use the `generate-sandbox-policy` skill. This skill covers the CLI *commands* for the policy lifecycle; `generate-sandbox-policy` covers policy *content authoring*.
 
@@ -26,7 +26,7 @@ This is your primary fallback. Use it freely -- the CLI's help output is authori
 ## Prerequisites
 
 - `nemoclaw` is on the PATH (install via `cargo install --path crates/navigator-cli`)
-- Docker is running (required for cluster operations and BYOC)
+- Docker is running (required for gateway operations and BYOC)
 - For remote clusters: SSH access to the target host
 
 ## Command Reference
@@ -42,21 +42,21 @@ Use this workflow when no cluster exists yet and the user wants to get a sandbox
 ### Step 1: Bootstrap a cluster
 
 ```bash
-nemoclaw cluster admin deploy
+nemoclaw gateway start
 ```
 
-This provisions a local k3s cluster in Docker. The CLI will prompt interactively if a cluster already exists. The cluster is automatically set as the active cluster.
+This provisions a local k3s cluster in Docker. The CLI will prompt interactively if a cluster already exists. The cluster is automatically set as the active gateway.
 
 For remote deployment:
 
 ```bash
-nemoclaw cluster admin deploy --remote user@host --ssh-key ~/.ssh/id_rsa
+nemoclaw gateway start --remote user@host --ssh-key ~/.ssh/id_rsa
 ```
 
 ### Step 2: Verify the cluster
 
 ```bash
-nemoclaw cluster status
+nemoclaw status
 ```
 
 Confirm the cluster is reachable and shows a version.
@@ -139,14 +139,14 @@ nemoclaw sandbox create \
   --provider my-github \
   --provider my-claude \
   --policy ./my-policy.yaml \
-  --sync \
+  --upload .:/sandbox \
   -- claude
 ```
 
 Key flags:
 - `--provider`: Attach one or more providers (repeatable)
 - `--policy`: Custom policy YAML (otherwise uses built-in default or `NEMOCLAW_SANDBOX_POLICY` env var)
-- `--sync`: Push local git-tracked files to `/sandbox` in the container
+- `--upload <PATH>[:<DEST>]`: Upload local files into the sandbox (default dest: `/sandbox`)
 - `--keep`: Keep sandbox alive after the command exits (useful for non-interactive commands)
 - `--forward <PORT>`: Forward a local port (implies `--keep`)
 
@@ -169,30 +169,30 @@ Opens an interactive SSH shell. To configure VS Code Remote-SSH:
 nemoclaw sandbox ssh-config my-sandbox >> ~/.ssh/config
 ```
 
-### Sync files
+### Upload and download files
 
 ```bash
-# Push local files to sandbox
-nemoclaw sandbox sync my-sandbox --up ./src /sandbox/src
+# Upload local files to sandbox
+nemoclaw sandbox upload my-sandbox ./src /sandbox/src
 
-# Pull files from sandbox
-nemoclaw sandbox sync my-sandbox --down /sandbox/output ./local-output
+# Download files from sandbox
+nemoclaw sandbox download my-sandbox /sandbox/output ./local-output
 ```
 
 ### View logs
 
 ```bash
 # Recent logs
-nemoclaw sandbox logs my-sandbox
+nemoclaw logs my-sandbox
 
 # Stream live logs
-nemoclaw sandbox logs my-sandbox --tail
+nemoclaw logs my-sandbox --tail
 
 # Filter by source and level
-nemoclaw sandbox logs my-sandbox --tail --source sandbox --level warn
+nemoclaw logs my-sandbox --tail --source sandbox --level warn
 
 # Logs from the last 5 minutes
-nemoclaw sandbox logs my-sandbox --since 5m
+nemoclaw logs my-sandbox --since 5m
 ```
 
 ### Delete sandboxes
@@ -246,7 +246,7 @@ Use `--keep` so the sandbox stays alive for iteration. The user can work in the
 In a separate terminal or as the agent:
 
 ```bash
-nemoclaw sandbox logs dev --tail --source sandbox
+nemoclaw logs dev --tail --source sandbox
 ```
 
 Look for log lines with `action: deny` -- these indicate blocked network requests. The logs include:
@@ -257,7 +257,7 @@ Look for log lines with `action: deny` -- these indicate blocked network request
 ### Step 3: Pull the current policy
 
 ```bash
-nemoclaw sandbox policy get dev --full > current-policy.yaml
+nemoclaw policy get dev --full > current-policy.yaml
 ```
 
 The `--full` flag outputs valid YAML that can be directly re-submitted. This is the round-trip format.
@@ -277,7 +277,7 @@ Only `network_policies` and `inference` sections can be modified at runtime. If
 ### Step 5: Push the updated policy
 
 ```bash
-nemoclaw sandbox policy set dev --policy current-policy.yaml --wait
+nemoclaw policy set dev --policy current-policy.yaml --wait
 ```
 
 The `--wait` flag blocks until the sandbox confirms the policy is loaded (polls every second). Exit codes:
@@ -288,7 +288,7 @@ The `--wait` flag blocks until the sandbox confirms the policy is loaded (polls
 ### Step 6: Verify the update
 
 ```bash
-nemoclaw sandbox policy list dev
+nemoclaw policy list dev
 ```
 
 Check that the latest revision shows status `loaded`. If `failed`, check the error column for details.
@@ -302,13 +302,13 @@ Return to Step 2. Continue monitoring logs and refining the policy until all req
 View all revisions to understand how the policy evolved:
 
 ```bash
-nemoclaw sandbox policy list dev --limit 50
+nemoclaw policy list dev --limit 50
 ```
 
 Fetch a specific historical revision:
 
 ```bash
-nemoclaw sandbox policy get dev --rev 3 --full
+nemoclaw policy get dev --rev 3 --full
 ```
 
 ---
@@ -335,10 +335,10 @@ When `--from` is specified, the CLI:
 
 ```bash
 # Foreground (blocks)
-nemoclaw sandbox forward start 8080 my-app
+nemoclaw forward start 8080 my-app
 
 # Background (returns immediately)
-nemoclaw sandbox forward start 8080 my-app -d
+nemoclaw forward start 8080 my-app -d
 ```
 
 The service is now reachable at `localhost:8080`.
@@ -347,10 +347,10 @@ The service is now reachable at `localhost:8080`.
 
 ```bash
 # List active forwards
-nemoclaw sandbox forward list
+nemoclaw forward list
 
 # Stop a forward
-nemoclaw sandbox forward stop 8080 my-app
+nemoclaw forward stop 8080 my-app
 ```
 
 ### Step 4: Iterate
@@ -412,7 +412,7 @@ nemoclaw sandbox ssh-config work-session >> ~/.ssh/config
 While the user works, monitor the sandbox logs:
 
 ```bash
-nemoclaw sandbox logs work-session --tail --source sandbox --level warn
+nemoclaw logs work-session --tail --source sandbox --level warn
 ```
 
 Watch for `deny` actions that indicate the user's work is being blocked by policy.
@@ -421,10 +421,10 @@ Watch for `deny` actions that indicate the user's work is being blocked by polic
 
 When denied actions are observed:
 
-1. Pull current policy: `nemoclaw sandbox policy get work-session --full > policy.yaml`
+1. Pull current policy: `nemoclaw policy get work-session --full > policy.yaml`
 2. Modify the policy to allow the blocked actions (use `generate-sandbox-policy` skill for content)
-3. Push the update: `nemoclaw sandbox policy set work-session --policy policy.yaml --wait`
-4. Verify: `nemoclaw sandbox policy list work-session`
+3. Push the update: `nemoclaw policy set work-session --policy policy.yaml --wait`
+4. Verify: `nemoclaw policy list work-session`
 
 The user does not need to disconnect -- policy updates are hot-reloaded within ~30 seconds (or immediately when using `--wait`, which polls for confirmation).
 
@@ -436,79 +436,72 @@ nemoclaw sandbox delete work-session
 
 ---
 
-## Workflow 7: Inference Routing
+## Workflow 7: Cluster Inference
 
-Configure inference routes so sandboxes can access LLM endpoints.
+Configure the cluster's managed inference route for `inference.local`.
 
-### Create an inference route
+### Set cluster inference
+
+First ensure the provider record exists:
 
 ```bash
-nemoclaw inference create \
-  --routing-hint local \
-  --base-url https://my-llm.example.com \
-  --model-id my-model-v1 \
-  --api-key sk-abc123
+nemoclaw provider list
 ```
 
-If `--protocol` is omitted, the CLI auto-detects by probing the endpoint.
-
-### List and manage routes
+Then point cluster inference at that provider and model:
 
 ```bash
-nemoclaw inference list
-nemoclaw inference update my-route --routing-hint local --base-url https://new-url.example.com --model-id my-model-v2
-nemoclaw inference delete my-route
+nemoclaw cluster inference set \
+  --provider nvidia \
+  --model nvidia/nemotron-3-nano-30b-a3b
 ```
 
-### Connect sandbox to inference
+This updates the cluster-managed `inference.local` route. There is no per-route create/list/update/delete workflow for sandbox inference.
 
-Ensure the sandbox policy allows the routing hint:
+### Inspect current inference config
 
-```yaml
-# In the policy YAML
-inference:
-  allowed_routes:
-    - local
+```bash
+nemoclaw cluster inference get
 ```
 
-Then create the sandbox with the policy:
+### How sandboxes use it
 
-```bash
-nemoclaw sandbox create --policy ./policy-with-inference.yaml -- claude
-```
+- Agents send HTTPS requests to `inference.local`.
+- The sandbox intercepts those requests locally and routes them through the cluster inference config.
+- Sandbox policy is separate from cluster inference configuration.
 
 ---
 
-## Workflow 8: Cluster Management
+## Workflow 8: Gateway Management
 
-### List and switch clusters
+### List and switch gateways
 
 ```bash
-nemoclaw cluster list              # See all clusters
-nemoclaw cluster use my-cluster    # Switch active cluster
-nemoclaw cluster status            # Verify connectivity
+nemoclaw gateway select            # See all gateways (no args shows list)
+nemoclaw gateway select my-cluster # Switch active gateway
+nemoclaw status                    # Verify connectivity
 ```
 
 ### Lifecycle
 
 ```bash
-nemoclaw cluster admin deploy                          # Start local cluster
-nemoclaw cluster admin stop                            # Stop (preserves state)
-nemoclaw cluster admin deploy                          # Restart (reuses state)
-nemoclaw cluster admin destroy                         # Destroy permanently
+nemoclaw gateway start                                 # Start local cluster
+nemoclaw gateway stop                                  # Stop (preserves state)
+nemoclaw gateway start                                 # Restart (reuses state)
+nemoclaw gateway destroy                               # Destroy permanently
 ```
 
 ### Remote clusters
 
 ```bash
 # Deploy to remote host
-nemoclaw cluster admin deploy --remote user@host --ssh-key ~/.ssh/id_rsa --name remote-cluster
+nemoclaw gateway start --remote user@host --ssh-key ~/.ssh/id_rsa --name remote-cluster
 
 # Set up kubectl access
-nemoclaw cluster admin tunnel --name remote-cluster
+nemoclaw gateway tunnel --name remote-cluster
 
 # Get cluster info
-nemoclaw cluster admin info --name remote-cluster
+nemoclaw gateway info --name remote-cluster
 ```
 
 ---
@@ -527,10 +520,10 @@ The CLI help is always authoritative. If the help output contradicts this skill,
 
 ```bash
 $ nemoclaw sandbox --help
-# Shows: create, get, list, delete, connect, sync, logs, ssh-config, forward, image, policy
+# Shows: create, get, list, delete, connect, upload, download, ssh-config, image
 
-$ nemoclaw sandbox sync --help
-# Shows: --up, --down flags, positional arguments, usage examples
+$ nemoclaw sandbox upload --help
+# Shows: positional arguments (name, path, dest), usage examples
 ```
 
 ---
@@ -539,23 +532,27 @@ $ nemoclaw sandbox sync --help
 
 | Task | Command |
 |------|---------|
-| Deploy local cluster | `nemoclaw cluster admin deploy` |
-| Check cluster health | `nemoclaw cluster status` |
+| Deploy local cluster | `nemoclaw gateway start` |
+| Check cluster health | `nemoclaw status` |
+| List/switch gateways | `nemoclaw gateway select [name]` |
 | Create sandbox (interactive) | `nemoclaw sandbox create` |
 | Create sandbox with tool | `nemoclaw sandbox create -- claude` |
 | Create with custom policy | `nemoclaw sandbox create --policy ./p.yaml --keep` |
 | Connect to sandbox | `nemoclaw sandbox connect <name>` |
-| Stream live logs | `nemoclaw sandbox logs <name> --tail` |
-| Pull current policy | `nemoclaw sandbox policy get <name> --full > p.yaml` |
-| Push updated policy | `nemoclaw sandbox policy set <name> --policy p.yaml --wait` |
-| Policy revision history | `nemoclaw sandbox policy list <name>` |
+| Stream live logs | `nemoclaw logs <name> --tail` |
+| Pull current policy | `nemoclaw policy get <name> --full > p.yaml` |
+| Push updated policy | `nemoclaw policy set <name> --policy p.yaml --wait` |
+| Policy revision history | `nemoclaw policy list <name>` |
 | Create sandbox from Dockerfile | `nemoclaw sandbox create --from ./Dockerfile --keep` |
-| Forward a port | `nemoclaw sandbox forward start <port> <name> -d` |
+| Forward a port | `nemoclaw forward start <port> <name> -d` |
+| Upload files to sandbox | `nemoclaw sandbox upload <name> <path>` |
+| Download files from sandbox | `nemoclaw sandbox download <name> <path>` |
 | Create provider | `nemoclaw provider create --name N --type T --from-existing` |
 | List providers | `nemoclaw provider list` |
-| Create inference route | `nemoclaw inference create --routing-hint H --base-url U --model-id M` |
+| Configure cluster inference | `nemoclaw cluster inference set --provider P --model M` |
+| View cluster inference | `nemoclaw cluster inference get` |
 | Delete sandbox | `nemoclaw sandbox delete <name>` |
-| Destroy cluster | `nemoclaw cluster admin destroy` |
+| Destroy cluster | `nemoclaw gateway destroy` |
 | Self-teach any command | `nemoclaw <group> <cmd> --help` |
 
 ## Companion Skills
@@ -564,4 +561,4 @@ $ nemoclaw sandbox sync --help
 |-------|------------|
 | `generate-sandbox-policy` | Creating or modifying policy YAML content (network rules, L7 inspection, access presets, endpoint configuration) |
 | `debug-navigator-cluster` | Diagnosing cluster startup or health failures |
-| `tui-development` | Developing features for the Gator TUI (`nemoclaw gator`) |
+| `tui-development` | Developing features for the NemoClaw TUI (`nemoclaw term`) |
diff --git a/.agents/skills/nemoclaw-cli/cli-reference.md b/.agents/skills/nemoclaw-cli/cli-reference.md
index c26f05ac..2ff02478 100644
--- a/.agents/skills/nemoclaw-cli/cli-reference.md
+++ b/.agents/skills/nemoclaw-cli/cli-reference.md
@@ -9,7 +9,7 @@ Quick-reference for the `nemoclaw` command-line interface. For workflow guidance
 | Flag | Description |
 |------|-------------|
 | `-v`, `--verbose` | Increase verbosity (`-v` = info, `-vv` = debug, `-vvv` = trace) |
-| `-c`, `--cluster <NAME>` | Cluster to operate on. Also settable via `NEMOCLAW_CLUSTER` env var. Falls back to active cluster in `~/.config/nemoclaw/active_cluster`. |
+| `-c`, `--gateway <NAME>` | Gateway to operate on. Also settable via `NEMOCLAW_CLUSTER` env var. Falls back to active gateway in `~/.config/nemoclaw/active_cluster`. |
 
 ## Environment Variables
 
@@ -24,68 +24,54 @@ Quick-reference for the `nemoclaw` command-line interface. For workflow guidance
 
 ```
 nemoclaw
-├── cluster
-│   ├── status
-│   ├── use <name>
-│   ├── list
-│   └── admin
-│       ├── deploy [opts]
-│       ├── stop [opts]
-│       ├── destroy [opts]
-│       ├── info [--name]
-│       └── tunnel [opts]
+├── gateway
+│   ├── start [opts]
+│   ├── stop [opts]
+│   ├── destroy [opts]
+│   ├── info [--name]
+│   ├── tunnel [opts]
+│   └── select [name]
+├── status
+├── inference
+│   ├── set --provider --model
+│   ├── update [--provider] [--model]
+│   └── get
 ├── sandbox
 │   ├── create [opts] [-- CMD...]
 │   ├── get <name>
 │   ├── list [opts]
 │   ├── delete <name>...
 │   ├── connect <name>
-│   ├── sync <name> {--up|--down} <path> [dest]
-│   ├── logs <name> [opts]
+│   ├── upload <name> <path> [dest]
+│   ├── download <name> <path> [dest]
 │   ├── ssh-config <name>
-│   ├── forward
-│   │   ├── start <port> <name> [-d]
-│   │   ├── stop <port> <name>
-│   │   └── list
-│   ├── image
-│   │   └── push [opts]
-│   └── policy
-│       ├── set <name> --policy <path> [--wait]
-│       ├── get <name> [--full]
-│       └── list <name>
+│   └── image
+│       └── push [opts]
+├── forward
+│   ├── start <port> <name> [-d]
+│   ├── stop <port> <name>
+│   └── list
+├── logs <name> [opts]
+├── policy
+│   ├── set <name> --policy <path> [--wait]
+│   ├── get <name> [--full]
+│   └── list <name>
 ├── provider
 │   ├── create --name --type [opts]
 │   ├── get <name>
 │   ├── list [opts]
 │   ├── update <name> --type [opts]
 │   └── delete <name>...
-├── inference
-│   ├── create [opts]
-│   ├── update <name> [opts]
-│   ├── delete <name>...
-│   └── list [opts]
-├── gator
+├── term
 ├── completions <shell>
 └── ssh-proxy [opts]
 ```
 
 ---
 
-## Cluster Commands
-
-### `nemoclaw cluster status`
-
-Show server connectivity and version.
-
-### `nemoclaw cluster use <name>`
+## Gateway Commands
 
-Set the active cluster. Writes to `~/.config/nemoclaw/active_cluster`.
-
-### `nemoclaw cluster list`
-
-List all provisioned clusters. Active cluster marked with `*`.
-
-### `nemoclaw cluster admin deploy`
+### `nemoclaw gateway start`
 
 Provision or start a cluster (local or remote).
 
@@ -99,8 +85,9 @@ Provision or start a cluster (local or remote).
 | `--kube-port [PORT]` | none | Expose K8s control plane on host port |
 | `--update-kube-config` | false | Write kubeconfig into `~/.kube/config` |
 | `--get-kubeconfig` | false | Print kubeconfig to stdout |
+| `--recreate` | false | Destroy and recreate from scratch if a gateway already exists (skips interactive prompt) |
 
-### `nemoclaw cluster admin stop`
+### `nemoclaw gateway stop`
 
 Stop a cluster (preserves state for later restart).
 
@@ -110,11 +97,11 @@ Stop a cluster (preserves state for later restart).
 | `--remote <USER@HOST>` | SSH destination |
 | `--ssh-key <PATH>` | SSH private key |
 
-### `nemoclaw cluster admin destroy`
+### `nemoclaw gateway destroy`
 
 Destroy a cluster and all its state. Same flags as `stop`.
 
-### `nemoclaw cluster admin info`
+### `nemoclaw gateway info`
 
 Show deployment details: endpoint, kubeconfig path, kube port, remote host.
 
@@ -122,7 +109,7 @@ Show deployment details: endpoint, kubeconfig path, kube port, remote host.
 |------|-------------|
 | `--name <NAME>` | Cluster name (defaults to active) |
 
-### `nemoclaw cluster admin tunnel`
+### `nemoclaw gateway tunnel`
 
 Print or start an SSH tunnel for kubectl access to a remote cluster.
 
@@ -133,6 +120,18 @@ Print or start an SSH tunnel for kubectl access to a remote cluster.
 | `--ssh-key <PATH>` | SSH private key |
 | `--print-command` | Only print the SSH command, don't execute |
 
+### `nemoclaw gateway select [name]`
+
+Set the active gateway. Writes to `~/.config/nemoclaw/active_cluster`. When called without arguments, lists all provisioned gateways with the active one marked with `*`.
+
+---
+
+## Status Command
+
+### `nemoclaw status`
+
+Show server connectivity and version for the active gateway.
+
 ---
 
 ## Sandbox Commands
@@ -145,7 +144,7 @@ Create a sandbox, wait for readiness, then connect or execute the trailing comma
 |------|-------------|
 | `--name <NAME>` | Sandbox name (auto-generated if omitted) |
 | `--from <SOURCE>` | Sandbox source: community name, Dockerfile path, directory, or image reference (BYOC) |
-| `--sync` | Sync local git-tracked files into sandbox at `/sandbox` |
+| `--upload <PATH>[:<DEST>]` | Upload local files into sandbox (default dest: `/sandbox`) |
 | `--keep` | Keep sandbox alive after non-interactive commands finish |
 | `--provider <NAME>` | Provider to attach (repeatable) |
 | `--policy <PATH>` | Path to custom policy YAML |
@@ -154,6 +153,10 @@ Create a sandbox, wait for readiness, then connect or execute the trailing comma
 | `--ssh-key <PATH>` | SSH private key for auto-bootstrap |
 | `--tty` | Force pseudo-terminal allocation |
 | `--no-tty` | Disable pseudo-terminal allocation |
+| `--bootstrap` | Auto-bootstrap a gateway if none is available (skips interactive prompt) |
+| `--no-bootstrap` | Never auto-bootstrap; error immediately if no gateway is available |
+| `--auto-providers` | Auto-create missing providers from local credentials (skips interactive prompt) |
+| `--no-auto-providers` | Never auto-create providers; skip missing providers silently |
 | `[-- COMMAND...]` | Command to execute (defaults to interactive shell) |
 
 ### `nemoclaw sandbox get <name>`
@@ -179,27 +182,25 @@ Delete one or more sandboxes by name. Stops any background port forwards.
 
 Open an interactive SSH shell to a sandbox.
 
-### `nemoclaw sandbox sync <name> {--up <path> | --down <path>} [dest]`
+### `nemoclaw sandbox upload <name> <path> [dest]`
 
-Sync files to/from a sandbox using tar-over-SSH.
+Upload local files to a sandbox using tar-over-SSH.
 
-| Flag | Description |
-|------|-------------|
-| `--up <LOCAL_PATH>` | Push local files to sandbox |
-| `--down <SANDBOX_PATH>` | Pull sandbox files to local |
-| `[DEST]` | Destination path (default: `/sandbox` for up, `.` for down) |
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `<name>` | -- | Sandbox name (required) |
+| `<path>` | -- | Local path to upload (required) |
+| `[dest]` | `/sandbox` | Destination path in sandbox |
 
-### `nemoclaw sandbox logs <name>`
+### `nemoclaw sandbox download <name> <path> [dest]`
 
-View sandbox logs. Supports one-shot and streaming.
+Download files from a sandbox using tar-over-SSH.
 
-| Flag | Default | Description |
-|------|---------|-------------|
-| `-n <N>` | 200 | Number of log lines |
-| `--tail` | false | Stream live logs |
-| `--since <DURATION>` | none | Only show logs from this duration ago (e.g., `5m`, `1h`) |
-| `--source <SOURCE>` | `all` | Filter: `gateway`, `sandbox`, or `all` (repeatable) |
-| `--level <LEVEL>` | none | Minimum level: `error`, `warn`, `info`, `debug`, `trace` |
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `<name>` | -- | Sandbox name (required) |
+| `<path>` | -- | Sandbox path to download (required) |
+| `[dest]` | `.` | Local destination path |
 
 ### `nemoclaw sandbox ssh-config <name>`
 
@@ -209,7 +210,7 @@ Print an SSH config `Host` block for a sandbox. Useful for VS Code Remote-SSH.
 
 ## Port Forwarding Commands
 
-### `nemoclaw sandbox forward start <port> <name>`
+### `nemoclaw forward start <port> <name>`
 
 Start forwarding a local port to a sandbox.
 
@@ -219,19 +220,35 @@ Start forwarding a local port to a sandbox.
 | `<name>` | Sandbox name |
 | `-d`, `--background` | Run in background |
 
-### `nemoclaw sandbox forward stop <port> <name>`
+### `nemoclaw forward stop <port> <name>`
 
 Stop a background port forward.
 
-### `nemoclaw sandbox forward list`
+### `nemoclaw forward list`
 
 List all active port forwards (sandbox, port, PID, status).
 
 ---
 
+## Logs Command
+
+### `nemoclaw logs <name>`
+
+View sandbox logs. Supports one-shot and streaming.
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `-n <N>` | 200 | Number of log lines |
+| `--tail` | false | Stream live logs |
+| `--since <DURATION>` | none | Only show logs from this duration ago (e.g., `5m`, `1h`) |
+| `--source <SOURCE>` | `all` | Filter: `gateway`, `sandbox`, or `all` (repeatable) |
+| `--level <LEVEL>` | none | Minimum level: `error`, `warn`, `info`, `debug`, `trace` |
+
+---
+
 ## Policy Commands
 
-### `nemoclaw sandbox policy set <name> --policy <PATH>`
+### `nemoclaw policy set <name> --policy <PATH>`
 
 Update the policy on a live sandbox. Only dynamic fields (`network_policies`, `inference`) can be changed at runtime.
 
@@ -243,7 +260,7 @@ Update the policy on a live sandbox. Only dynamic fields (`network_policies`, `i
 
 Exit codes with `--wait`: 0 = loaded, 1 = failed, 124 = timeout.
 
-### `nemoclaw sandbox policy get <name>`
+### `nemoclaw policy get <name>`
 
 Show current active policy for a sandbox.
 
@@ -252,7 +269,7 @@ Show current active policy for a sandbox.
 | `--rev <VERSION>` | 0 (latest) | Show a specific revision |
 | `--full` | false | Print the full policy as YAML (round-trips with `--policy` input) |
 
-### `nemoclaw sandbox policy list <name>`
+### `nemoclaw policy list <name>`
 
 List policy revision history (version, hash, status, created, error).
 
@@ -302,46 +319,37 @@ Delete one or more providers by name.
 
 ---
 
-## Inference Commands
+## Cluster Inference Commands
 
-### `nemoclaw inference create`
+### `nemoclaw cluster inference set`
 
-Create an inference route. Auto-detects supported protocols if `--protocol` is omitted.
+Configure the managed cluster inference route used by `inference.local`. Both flags are required.
 
 | Flag | Default | Description |
 |------|---------|-------------|
-| `--name <NAME>` | auto-generated | Route name |
-| `--routing-hint <HINT>` | -- | Routing hint (required) |
-| `--base-url <URL>` | -- | Inference endpoint base URL (required) |
-| `--protocol <PROTO>` | auto-detected | Protocol(s): `openai_chat_completions`, `openai_completions`, `anthropic_messages`. Repeatable. |
-| `--api-key <KEY>` | `""` | API key for the endpoint |
-| `--model-id <ID>` | -- | Model identifier (required) |
-| `--disabled` | false | Create in disabled state |
-
-### `nemoclaw inference update <name>`
-
-Update an existing inference route. Same flags as `create`.
+| `--provider <NAME>` | -- | Provider record name (required) |
+| `--model <ID>` | -- | Model identifier to use for generation requests (required) |
 
-### `nemoclaw inference delete <NAME>...`
+### `nemoclaw cluster inference update`
 
-Delete inference routes by name.
-
-### `nemoclaw inference list`
-
-List inference routes.
+Partially update the cluster inference configuration. Fetches the current config and applies only the provided overrides. At least one flag is required.
 
 | Flag | Default | Description |
 |------|---------|-------------|
-| `--limit <N>` | 100 | Max routes |
-| `--offset <N>` | 0 | Pagination offset |
+| `--provider <NAME>` | unchanged | Provider record name |
+| `--model <ID>` | unchanged | Model identifier |
+
+### `nemoclaw cluster inference get`
+
+Show the current cluster inference configuration.
 
 ---
 
 ## Other Commands
 
-### `nemoclaw gator`
+### `nemoclaw term`
 
-Launch the Gator interactive TUI.
+Launch the NemoClaw interactive TUI.
 
 ### `nemoclaw completions <shell>`
 
diff --git a/.agents/skills/tui-development/SKILL.md b/.agents/skills/tui-development/SKILL.md
index f2986948..fc28341a 100644
--- a/.agents/skills/tui-development/SKILL.md
+++ b/.agents/skills/tui-development/SKILL.md
@@ -1,17 +1,17 @@
 ---
 name: tui-development
-description: Guide for developing the "Gator" TUI — a ratatui-based terminal UI for the NemoClaw platform. Covers architecture, navigation, data fetching, theming, UX conventions, and development workflow. Trigger keywords - gator, TUI, terminal UI, ratatui, navigator-tui, tui development, gator feature, gator bug.
+description: Guide for developing the NemoClaw TUI — a ratatui-based terminal UI for the NemoClaw platform. Covers architecture, navigation, data fetching, theming, UX conventions, and development workflow. Trigger keywords - term, TUI, terminal UI, ratatui, navigator-tui, tui development, tui feature, tui bug.
 ---
 
-# Gator TUI Development Guide
+# NemoClaw TUI Development Guide
 
-Comprehensive reference for any agent working on the Gator TUI.
+Comprehensive reference for any agent working on the NemoClaw TUI.
 
 ## 1. Overview
 
-Gator is a ratatui-based terminal UI for the NemoClaw platform. It provides a keyboard-driven interface for managing clusters, sandboxes, and logs — the same operations available via the `nemoclaw` CLI, but with a live, interactive dashboard.
+The NemoClaw TUI is a ratatui-based terminal UI for the NemoClaw platform. It provides a keyboard-driven interface for managing gateways, sandboxes, and logs — the same operations available via the `nemoclaw` CLI, but with a live, interactive dashboard.
 
-- **Launched via:** `nemoclaw gator` or `mise run gator`
+- **Launched via:** `nemoclaw term` or `mise run term`
 - **Crate:** `crates/navigator-tui/`
 - **Key dependencies:**
   - `ratatui` (workspace version) — uses `frame.size()` (not `frame.area()`)
@@ -39,7 +39,7 @@ Cluster (discovered via navigator_bootstrap::list_clusters())
 The **title bar** always reflects this hierarchy, reading left-to-right from general to specific:
 
 ```
- Gator │ Current Cluster: <name> (<status>) │ <screen/context>
+ NemoClaw │ Current Cluster: <name> (<status>) │ <screen/context>
 ```
 
 ## 3. Navigation & Screen Architecture
@@ -104,8 +104,8 @@ Every frame renders four vertical regions:
 
 ### Title bar examples
 
-- Dashboard: ` Gator │ Current Cluster: nemoclaw (Healthy) │ Dashboard`
-- Sandbox detail: ` Gator │ Current Cluster: nemoclaw (Healthy) │ Sandbox: my-sandbox`
+- Dashboard: ` NemoClaw │ Current Cluster: nemoclaw (Healthy) │ Dashboard`
+- Sandbox detail: ` NemoClaw │ Current Cluster: nemoclaw (Healthy) │ Sandbox: my-sandbox`
 
 ### Adding a new screen
 
@@ -193,7 +193,7 @@ All colors and styles are defined in `crates/navigator-tui/src/theme.rs`.
 | `MUTED` | White + DIM modifier | Secondary info, separators (`│`), unfocused items |
 | `HEADING` | White + BOLD | Panel titles, sandbox/cluster names when active |
 | `ACCENT` | NVIDIA_GREEN foreground | Selected row marker (`▌`), sandbox source labels |
-| `ACCENT_BOLD` | NVIDIA_GREEN + BOLD | "Gator" brand text, command prompt `:` |
+| `ACCENT_BOLD` | NVIDIA_GREEN + BOLD | "NemoClaw" brand text, command prompt `:` |
 | `SELECTED` | BOLD modifier only | Selected row text emphasis |
 | `BORDER` | EVERGLADE foreground | Unfocused panel borders |
 | `BORDER_FOCUSED` | NVIDIA_GREEN foreground | Focused panel borders |
@@ -225,14 +225,14 @@ The `confirm_delete` flag in `App` gates destructive key handling — while true
 
 ### CLI parity
 
-Gator actions should parallel `nemoclaw` CLI commands so users have familiar mental models:
+TUI actions should parallel `nemoclaw` CLI commands so users have familiar mental models:
 
-| CLI Command | Gator Equivalent |
+| CLI Command | TUI Equivalent |
 | --- | --- |
 | `nemoclaw sandbox list` | Sandbox table on Dashboard |
 | `nemoclaw sandbox delete <name>` | `[d]` on sandbox detail, then `[y]` to confirm |
-| `nemoclaw sandbox logs <name>` | `[l]` on sandbox detail to open log viewer |
-| `nemoclaw cluster health` | Status in title bar + cluster list |
+| `nemoclaw logs <name>` | `[l]` on sandbox detail to open log viewer |
+| `nemoclaw status` | Status in title bar + cluster list |
 
 When adding new TUI features, check what the CLI offers and maintain consistency.
 
@@ -405,11 +405,11 @@ The connect timeout for cluster switching is 10 seconds with HTTP/2 keepalive at
 # Build the crate
 cargo build -p navigator-tui
 
-# Run gator against the active cluster
-mise run gator
+# Run the TUI against the active cluster
+mise run term
 
 # Run with cargo-watch for hot-reload during development
-mise run gator:dev
+mise run term:dev
 
 # Format
 cargo fmt -p navigator-tui
diff --git a/.claude/agent-memory/arch-doc-writer/MEMORY.md b/.claude/agent-memory/arch-doc-writer/MEMORY.md
index 8035ba37..792fad21 100644
--- a/.claude/agent-memory/arch-doc-writer/MEMORY.md
+++ b/.claude/agent-memory/arch-doc-writer/MEMORY.md
@@ -1,7 +1,7 @@
 # Arch Doc Writer Memory
 
 ## Project Structure
-- Crates: `navigator-cli`, `navigator-server`, `navigator-sandbox`, `navigator-bootstrap`, `navigator-core`, `navigator-providers`, `navigator-router`
+- Crates: `navigator-cli`, `navigator-server`, `navigator-sandbox`, `navigator-bootstrap`, `navigator-core`, `navigator-providers`, `navigator-router`, `navigator-policy`
 - CLI entry: `crates/navigator-cli/src/main.rs` (clap parser + dispatch)
 - CLI logic: `crates/navigator-cli/src/run.rs` (all command implementations)
 - Sandbox entry: `crates/navigator-sandbox/src/lib.rs` (`run_sandbox()`)
@@ -9,12 +9,13 @@
 - Identity cache: `crates/navigator-sandbox/src/identity.rs` (SHA256 TOFU, uses Mutex<HashMap> NOT DashMap)
 - L7 inspection: `crates/navigator-sandbox/src/l7/` (mod.rs, tls.rs, relay.rs, rest.rs, provider.rs, inference.rs)
 - Proxy: `crates/navigator-sandbox/src/proxy.rs`
+- Policy crate: `crates/navigator-policy/src/lib.rs` (YAML<->proto conversion, validation, restrictive default)
 - Server multiplex: `crates/navigator-server/src/multiplex.rs`
 - SSH tunnel: `crates/navigator-server/src/ssh_tunnel.rs`
 - Sandbox SSH server: `crates/navigator-sandbox/src/ssh.rs`
 - Providers: `crates/navigator-providers/src/providers/` (per-provider modules)
 - Bootstrap: `crates/navigator-bootstrap/src/lib.rs` (cluster lifecycle)
-- Proto files: `proto/` directory (navigator.proto, sandbox.proto, datamodel.proto)
+- Proto files: `proto/` directory (navigator.proto, sandbox.proto, datamodel.proto, inference.proto)
 
 ## Architecture Docs
 - Files renamed from numbered prefix format to descriptive names (e.g., `2 - server-architecture.md` -> `gateway-architecture.md`)
@@ -24,7 +25,9 @@
 
 ## Key Patterns
 - OPA baked-in rules: `include_str!("../data/sandbox-policy.rego")` in opa.rs
-- Policy loading: gRPC mode (NAVIGATOR_SANDBOX_ID + NAVIGATOR_ENDPOINT) or file mode (--policy-rules + --policy-data)
+- Policy loading: gRPC mode (NEMOCLAW_SANDBOX_ID + NEMOCLAW_ENDPOINT) or file mode (--policy-rules + --policy-data)
+- Env vars: sandbox uses NEMOCLAW_* prefix (e.g., NEMOCLAW_SANDBOX_ID, NEMOCLAW_ENDPOINT, NEMOCLAW_POLICY_RULES)
+- CLI flag: `--navigator-endpoint` (NOT `--nemoclaw-endpoint`)
 - Provider env injection: both entrypoint process (tokio Command) and SSH shell (std Command)
 - Cluster bootstrap: `sandbox_create_with_bootstrap()` auto-deploys when no cluster exists (main.rs ~line 632)
 - CLI cluster resolution: --cluster flag > NAVIGATOR_CLUSTER env > active cluster file
@@ -41,8 +44,9 @@
 
 ## Server Crate Details
 - Two gRPC services: Navigator (grpc.rs) and Inference (inference.rs), multiplexed via GrpcRouter by URI path
-- Gateway is control-plane only for inference: route CRUD + GetSandboxInferenceBundle
-- GetSandboxInferenceBundle: returns SandboxResolvedRoute list + revision hash + generated_at_ms for a sandbox_id
+- Gateway is control-plane only for inference: SetClusterInference + GetClusterInference + GetInferenceBundle
+- GetInferenceBundle: resolves managed route from provider record at request time, returns ResolvedRoute list + revision hash + generated_at_ms
+- SetClusterInference: takes provider_name + model_id, stores only references (endpoint/key/protocols resolved at bundle time)
 - Persistence: single `objects` table, protobuf payloads, Store enum dispatches SQLite vs Postgres by URL prefix
 - Persistence CRUD: upsert ON CONFLICT (id) not (object_type, id); list ORDER BY created_at_ms ASC, name ASC (not id!)
 - --db-url has no code default; Helm values.yaml sets `sqlite:/var/navigator/navigator.db`
@@ -83,10 +87,10 @@
 - Poll loop: `run_policy_poll_loop()` in lib.rs, spawned after child process, gRPC mode only
 - `OpaEngine::reload_from_proto()`: reuses `from_proto()` pipeline, atomically swaps inner engine, LKG on failure
 - `CachedNavigatorClient` in grpc_client.rs: persistent mTLS channel for poll + status report (mirrors CachedInferenceClient)
-- Dynamic domains: network_policies, inference (OPA engine swap). Static domains: filesystem, landlock, process (pre_exec, immutable)
+- Dynamic domains: network_policies only (inference removed from policy). Static domains: filesystem, landlock, process (pre_exec, immutable)
 - Server-side: `UpdateSandboxPolicy` RPC rejects changes to static fields or network mode changes
 - Server-side validation: `validate_static_fields_unchanged()` + `validate_network_mode_unchanged()` in grpc.rs
-- Poll interval: `NAVIGATOR_POLICY_POLL_INTERVAL_SECS` env var (default 30), no CLI flag
+- Poll interval: `NEMOCLAW_POLICY_POLL_INTERVAL_SECS` env var (default 30), no CLI flag
 - Version tracking: monotonic i64 per sandbox, `GetSandboxPolicyResponse` has version + policy_hash
 - Version 1 backfill: lazy on first `GetSandboxPolicy` from spec.policy if no policy_revisions row exists
 - `supersede_pending_policies()`: marks older pending revisions as superseded when new version persisted
@@ -99,22 +103,34 @@
 - CLI: `sandbox_policy_set()` in run.rs (~line 2901): loads YAML, calls UpdateSandboxPolicy, optionally polls for status
 - CLI: `sandbox_policy_get()` in run.rs (~line 3015): supports --rev N (version=0 means latest) and --full (YAML output via policy_to_yaml)
 - CLI: `sandbox_logs()` in run.rs (~line 3124): --source (all/gateway/sandbox) and --level (error/warn/info/debug/trace) filters
-- Deterministic hashing: `deterministic_policy_hash()` in grpc.rs (~line 1133): sorts network_policies by key, hashes fields individually
+- Deterministic hashing: `deterministic_policy_hash()` in grpc.rs (~line 1222): sorts network_policies by key, hashes fields individually, NO inference field
 - Idempotent UpdateSandboxPolicy: compares hash of new policy to latest stored hash, returns existing version if match
-- `policy_to_yaml()` in run.rs (~line 1623): converts proto to YAML via `PolicyYaml` struct (uses BTreeMap for ordered keys)
-- `policy_record_to_revision()` in grpc.rs (~line 1234): `include_policy` param controls whether full proto is included
+- `policy_to_yaml()` in run.rs: converts proto to YAML via navigator_policy::serialize_sandbox_policy (moved to navigator-policy crate)
+- `policy_record_to_revision()` in grpc.rs (~line 1334): `include_policy` param controls whether full proto is included
 - Server-side log filtering: `source_matches()` + `level_matches()` in grpc.rs, applied in both get_sandbox_logs and watch_sandbox
-- Standalone `proxy_inference()` was removed; proxy uses `CachedInferenceClient` via `InferenceContext.grpc_client` (OnceCell)
+- Standalone `proxy_inference()` was removed; inference handled in-sandbox by navigator-router
+- Provider types: claude, codex, opencode, generic, openai, anthropic, nvidia, gitlab, github, outlook
 
 ## Policy System Details
-- YAML data file top-level keys: filesystem_policy, landlock, process, network_policies, inference
+- YAML data file top-level keys: filesystem_policy, landlock, process, network_policies (NO inference key -- removed)
+- Proto SandboxPolicy fields: version, filesystem, landlock, process, network_policies (NO inference field)
 - Proto message field `filesystem` maps to YAML key `filesystem_policy` (different names!)
-- Behavioral trigger: network_policies non-empty -> proxy mode, empty -> block mode (seccomp blocks AF_INET/AF_INET6)
+- IMPORTANT: Sandbox always runs in Proxy mode. NetworkMode::Block exists as enum variant but is NEVER set.
+- Both file mode and gRPC mode set NetworkMode::Proxy unconditionally (see load_policy() in lib.rs and TryFrom in policy.rs)
+- Reason: proxy always needed so inference.local is addressable + all egress evaluated by OPA
+- OPA two-action model: Allow, Deny (NetworkAction in opa.rs). InspectForInference was REMOVED.
+- Rego network_action rule: "allow" or "deny" only (no "inspect_for_inference")
 - Behavioral trigger: endpoint `protocol` field -> L7 inspection; absent -> L4 raw copy_bidirectional
 - Behavioral trigger: `tls: terminate` -> MITM TLS with ephemeral CA; requires `protocol` to also be set
 - Behavioral trigger: `enforcement: enforce` -> deny at proxy; `audit` (default) -> log + forward
 - Access presets: read-only (GET/HEAD/OPTIONS), read-write (+POST/PUT/PATCH), full (*/*)
 - Validation: rules+access mutual exclusion, protocol requires rules/access, sql+enforce blocked, empty rules rejected
+- YAML policy parsing moved to navigator-policy crate (parse_sandbox_policy, serialize_sandbox_policy)
+- PolicyFile uses deny_unknown_fields for strict YAML parsing
+- restrictive_default_policy() in navigator-policy: no network policies, sandbox user, best_effort landlock
+- CONTAINER_POLICY_PATH: /etc/navigator/policy.yaml (well-known path for container-shipped policy)
+- clear_process_identity(): clears run_as_user/run_as_group for custom images
+- Policy safety validation: validate_sandbox_policy() checks root identity, path traversal, relative paths, overly broad paths, max 256 paths, max 4096 chars
 - Identity binding: /proc/net/tcp -> inode -> PID -> /proc/PID/exe + ancestors + cmdline, SHA256 TOFU cache
 - Network namespace: 10.200.0.1 (host/proxy) <-> 10.200.0.2 (sandbox), port 3128 default
 - Enforcement order in pre_exec: setns -> drop_privileges -> landlock -> seccomp
@@ -132,11 +148,14 @@
 
 ## Inference Routing Details
 - Sandbox-local execution via navigator-router crate
-- OPA three-action model: Allow, InspectForInference, Deny (`NetworkAction` in opa.rs)
-- InferenceContext: Router + patterns + `Arc<RwLock<Vec<ResolvedRoute>>>` route cache
+- InferenceContext in proxy.rs: Router + patterns + `Arc<RwLock<Vec<ResolvedRoute>>>` route cache
 - Route sources: `--inference-routes` YAML file (standalone) > cluster bundle via gRPC; empty routes gracefully disable
 - Cluster bundle refreshed every ROUTE_REFRESH_INTERVAL_SECS (30s)
-- Patterns: POST /v1/chat/completions, /v1/completions, /v1/responses, /v1/messages
+- Patterns: POST /v1/chat/completions, /v1/completions, /v1/responses, /v1/messages; GET /v1/models, /v1/models/*
+- inference.local CONNECT intercepted BEFORE OPA evaluation in proxy
+- InferenceProviderProfile in navigator-core/src/inference.rs: centralized provider metadata
+- proxy.rs: ONLY CONNECT to inference.local is handled; non-CONNECT requests get 403 for ALL hosts
+- Buffer: INITIAL_INFERENCE_BUF=64KiB, MAX_INFERENCE_BUF=10MiB; grows by doubling
 - Dev sandbox: `mise run sandbox -e VAR_NAME` forwards host env vars; NVIDIA_API_KEY always passed
 
 ## Log Streaming Details
@@ -162,4 +181,4 @@
 ## Naming Conventions
 - The project name "Navigator" appears in code but docs should use generic terms per user preference
 - CLI binary: `navigator` (aliased as `nav` in dev via mise)
-- Provider types: claude, codex, opencode, openclaw, generic, nvidia, gitlab, github, outlook
+- Provider types: claude, codex, opencode, generic, openai, anthropic, nvidia, gitlab, github, outlook (see ProviderRegistry::new())
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 6e7ebbd8..5fefb924 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -20,7 +20,7 @@ jobs:
     name: License Headers
     runs-on: build-amd64
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -41,7 +41,7 @@ jobs:
         runner: [build-amd64, build-arm64]
     runs-on: ${{ matrix.runner }}
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -81,7 +81,7 @@ jobs:
         runner: [build-amd64, build-arm64]
     runs-on: ${{ matrix.runner }}
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/ci-image.yml b/.github/workflows/ci-image.yml
index 83e74b84..6fb20379 100644
--- a/.github/workflows/ci-image.yml
+++ b/.github/workflows/ci-image.yml
@@ -12,7 +12,7 @@ on:
 
 env:
   REGISTRY: ghcr.io
-  CI_IMAGE: ghcr.io/nvidia/nv-agent-env/ci
+  CI_IMAGE: ghcr.io/nvidia/nemoclaw/ci
 
 permissions:
   contents: read
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 2bc8468a..80462e45 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -32,7 +32,7 @@ jobs:
     runs-on: build-amd64
     timeout-minutes: ${{ inputs.timeout-minutes }}
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -41,10 +41,24 @@ jobs:
         - /var/run/docker.sock:/var/run/docker.sock
     env:
       IMAGE_TAG: ${{ github.sha }}
-      IMAGE_REGISTRY: ghcr.io/nvidia/nv-agent-env
+      IMAGE_REGISTRY: ghcr.io/nvidia/nemoclaw
       DOCKER_PUSH: ${{ inputs.push && '1' || '0' }}
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Fetch tags
+        run: git fetch --tags --force
+
+      - name: Compute cargo version
+        id: version
+        run: |
+          set -eu
+          echo "cargo_version=$(uv run python tasks/scripts/release.py get-version --cargo)" >> "$GITHUB_OUTPUT"
 
       - name: Log in to GHCR
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
@@ -55,4 +69,5 @@ jobs:
       - name: Build ${{ inputs.component }} image
         env:
           DOCKER_BUILDER: navigator
+          NEMOCLAW_CARGO_VERSION: ${{ steps.version.outputs.cargo_version }}
         run: mise run --no-prepare docker:build:${{ inputs.component }}
diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml
index dd8685b3..22baf803 100644
--- a/.github/workflows/docs-build.yml
+++ b/.github/workflows/docs-build.yml
@@ -25,7 +25,7 @@ jobs:
     build:
         runs-on: build-arm64
         container:
-            image: ghcr.io/nvidia/nv-agent-env/ci:latest
+            image: ghcr.io/nvidia/nemoclaw/ci:latest
             credentials:
                 username: ${{ github.actor }}
                 password: ${{ secrets.GITHUB_TOKEN }}
@@ -71,7 +71,7 @@ jobs:
         needs: [build]
         runs-on: build-arm64
         container:
-            image: ghcr.io/nvidia/nv-agent-env/ci:latest
+            image: ghcr.io/nvidia/nemoclaw/ci:latest
             credentials:
                 username: ${{ github.actor }}
                 password: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
index e569a8cb..63720601 100644
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: build-amd64
     timeout-minutes: 30
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -28,9 +28,9 @@ jobs:
     env:
       MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       IMAGE_TAG: ${{ inputs.image-tag }}
-      NEMOCLAW_REGISTRY: ghcr.io/nvidia/nv-agent-env
+      NEMOCLAW_REGISTRY: ghcr.io/nvidia/nemoclaw
       NEMOCLAW_REGISTRY_HOST: ghcr.io
-      NEMOCLAW_REGISTRY_NAMESPACE: nvidia/nv-agent-env
+      NEMOCLAW_REGISTRY_NAMESPACE: nvidia/nemoclaw
       NEMOCLAW_REGISTRY_USERNAME: ${{ github.actor }}
       NEMOCLAW_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
     steps:
@@ -40,7 +40,7 @@ jobs:
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
 
       - name: Pull cluster image
-        run: docker pull ghcr.io/nvidia/nv-agent-env/cluster:${{ inputs.image-tag }}
+        run: docker pull ghcr.io/nvidia/nemoclaw/cluster:${{ inputs.image-tag }}
 
       - name: Install Python dependencies and generate protobuf stubs
         run: uv sync --frozen && mise run --no-prepare python:proto
@@ -50,8 +50,13 @@ jobs:
           GATEWAY_HOST: host.docker.internal
           GATEWAY_PORT: "8080"
           SKIP_IMAGE_PUSH: "1"
-          NEMOCLAW_CLUSTER_IMAGE: ghcr.io/nvidia/nv-agent-env/cluster:${{ inputs.image-tag }}
+          NEMOCLAW_CLUSTER_IMAGE: ghcr.io/nvidia/nemoclaw/cluster:${{ inputs.image-tag }}
         run: mise run --no-prepare --skip-deps cluster
 
+      - name: Install SSH client for Rust CLI e2e tests
+        run: apt-get update && apt-get install -y --no-install-recommends openssh-client && rm -rf /var/lib/apt/lists/*
+
       - name: Run E2E tests
-        run: mise run --no-prepare --skip-deps test:e2e:sandbox
+        run: |
+          mise run --no-prepare --skip-deps e2e:python
+          mise run --no-prepare --skip-deps e2e:rust
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 53d638fe..0569a21e 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -29,13 +29,34 @@ jobs:
     with:
       component: cluster
 
+  tag-ghcr-latest:
+    name: Tag GHCR Images as Latest
+    needs: [build-server, build-sandbox, build-cluster]
+    runs-on: build-amd64
+    timeout-minutes: 10
+    steps:
+      - name: Log in to GHCR
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Tag images as latest
+        run: |
+          set -euo pipefail
+          REGISTRY="ghcr.io/nvidia/nemoclaw"
+          for component in server sandbox cluster; do
+            echo "Tagging ${REGISTRY}/${component}:${{ github.sha }} as latest..."
+            docker buildx imagetools create \
+              --prefer-index=false \
+              -t "${REGISTRY}/${component}:latest" \
+              "${REGISTRY}/${component}:${{ github.sha }}"
+          done
+
   publish-containers:
     name: Publish Containers
     needs: [build-server, build-sandbox, build-cluster]
     runs-on: build-amd64
     timeout-minutes: 120
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -50,6 +71,21 @@ jobs:
       AWS_DEFAULT_REGION: us-west-2
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Fetch tags
+        run: git fetch --tags --force
+
+      - name: Compute image versions
+        id: version
+        run: |
+          set -euo pipefail
+          echo "cargo_version=$(uv run python tasks/scripts/release.py get-version --cargo)" >> "$GITHUB_OUTPUT"
+          echo "docker_version=$(uv run python tasks/scripts/release.py get-version --docker)" >> "$GITHUB_OUTPUT"
 
       - name: Log in to GHCR
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
@@ -65,6 +101,8 @@ jobs:
           DOCKER_BUILDER: navigator
           IMAGE_TAG: dev
           TAG_LATEST: "true"
+          EXTRA_DOCKER_TAGS: ${{ steps.version.outputs.docker_version }}
+          NEMOCLAW_CARGO_VERSION: ${{ steps.version.outputs.cargo_version }}
         run: mise run --no-prepare docker:publish:cluster:multiarch
 
   build-python-wheels:
@@ -76,7 +114,7 @@ jobs:
       wheel_version: ${{ steps.version.outputs.wheel_version }}
       s3_prefix: ${{ steps.upload.outputs.s3_prefix }}
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -223,7 +261,7 @@ jobs:
     runs-on: ${{ matrix.runner }}
     timeout-minutes: 60
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
@@ -313,7 +351,7 @@ jobs:
     runs-on: build-amd64
     timeout-minutes: 60
     container:
-      image: ghcr.io/nvidia/nv-agent-env/ci:latest
+      image: ghcr.io/nvidia/nemoclaw/ci:latest
       credentials:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 0e825fa8..3af0edc6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 
 # Build output
 /target/
+e2e/rust/target/
 debug/
 release/
 
diff --git a/Cargo.lock b/Cargo.lock
index 5c9a7311..834ed2e9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2727,6 +2727,7 @@ dependencies = [
  "serde_json",
  "serde_yaml",
  "tar",
+ "temp-env",
  "tempfile",
  "thiserror 2.0.18",
  "tokio",
@@ -2852,6 +2853,7 @@ dependencies = [
  "kube-runtime",
  "miette",
  "navigator-core",
+ "navigator-policy",
  "petname",
  "pin-project-lite",
  "prost",
@@ -4880,6 +4882,15 @@ dependencies = [
  "xattr",
 ]
 
+[[package]]
+name = "temp-env"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050"
+dependencies = [
+ "parking_lot",
+]
+
 [[package]]
 name = "tempfile"
 version = "3.24.0"
diff --git a/README.md b/README.md
index 91411961..1f937915 100644
--- a/README.md
+++ b/README.md
@@ -10,20 +10,34 @@ It transforms the data center from a static deployment target into a continuous
 
 ### Prerequisites
 
-- **Docker** — Docker Desktop (or a Docker daemon) must be running.
-- **Python 3.12+**
-- [**uv**](https://docs.astral.sh/uv/) 0.9+
+<!-- referenced in docs/get-started/quickstart.md -->
+<!-- quickstart-prereqs-start -->
+| Requirement | Details                                                                   |
+|-------------|---------------------------------------------------------------------------|
+| **Docker**  | Docker Desktop or a standalone Docker Engine daemon, running.             |
+| **Python**  | 3.12 or later.                                                            |
+<!-- quickstart-prereqs-end -->
 
 ### Install
 
+<!-- referenced in docs/get-started/quickstart.md -->
+<!-- quickstart-install-start -->
 ```bash
-uv pip install nemoclaw \
-  --upgrade \
-  --pre \
-  --index-url https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi/simple
+pip install nemoclaw
 ```
+<!-- quickstart-install-end -->
 
-The `nemoclaw` binary is installed into your Python environment. Use `uv run nemoclaw` to invoke it, or activate your venv first with `source .venv/bin/activate`.
+### Install from Source (Developer)
+
+Requires [mise](https://mise.jdx.dev/), Rust 1.88+, Python 3.12+, and Docker.
+
+```bash
+git clone https://github.com/NVIDIA/NemoClaw.git
+cd NemoClaw
+mise trust
+```
+
+`mise` installs all remaining toolchain dependencies automatically. The local `nemoclaw` script builds and runs the debug CLI binary, so you can invoke `nemoclaw` directly from the repo. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the full development workflow.
 
 ### Create a sandbox
 
@@ -55,13 +69,13 @@ For additional sandbox images see the [NVIDIA/NemoClaw-Community](https://github
 To deploy a cluster explicitly:
 
 ```bash
-nemoclaw cluster admin deploy
+nemoclaw gateway start
 ```
 
 For remote deployment:
 
 ```bash
-nemoclaw cluster admin deploy --remote user@host
+nemoclaw gateway start --remote user@host
 ```
 
 ### Upgrading
@@ -69,7 +83,7 @@ nemoclaw cluster admin deploy --remote user@host
 To upgrade, redeploy your cluster to pick up the latest server and sandbox images:
 
 ```bash
-nemoclaw cluster admin deploy
+nemoclaw gateway start
 ```
 
 This will prompt you to recreate the cluster. Select "yes" to recreate the cluster.
diff --git a/TESTING.md b/TESTING.md
index bdde8105..f7404b91 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -15,7 +15,7 @@ crates/*/src/          # Inline #[cfg(test)] modules
 crates/*/tests/        # Rust integration tests
 python/navigator/      # Python unit tests (*_test.py suffix)
 e2e/python/            # Python E2E tests (test_*.py prefix)
-e2e/bash/              # Bash E2E scripts
+e2e/rust/              # Rust CLI E2E tests
 ```
 
 ## Rust Tests
@@ -130,20 +130,41 @@ def test_multiply(sandbox):
 | `inference_client` | session | Client for managing inference routes |
 | `mock_inference_route` | session | Creates a mock OpenAI-protocol route for tests |
 
-### Bash E2E (`e2e/bash/`)
+### Rust CLI E2E (`e2e/rust/`)
 
-Self-contained shell scripts that exercise the CLI directly:
+Rust-based e2e tests that exercise the `nemoclaw` CLI binary as a subprocess.
+They live in the `nemoclaw-e2e` crate and use a shared harness for sandbox
+lifecycle management, output parsing, and cleanup.
 
-- `test_sandbox_sync.sh` — file sync round-trip
-- `test_sandbox_custom_image.sh` — custom Docker image build and run
-- `test_port_forward.sh` — TCP port forwarding through a sandbox
+Tests:
 
-Pattern: `set -euo pipefail`, cleanup via `trap`, poll-based readiness checks
-parsing CLI output.
+- `tests/custom_image.rs` — custom Docker image build and sandbox run
+- `tests/sync.rs` — bidirectional file sync round-trip (including large files)
+- `tests/port_forward.rs` — TCP port forwarding through a sandbox
+
+Run all CLI e2e tests:
+
+```bash
+mise run e2e:rust
+```
+
+Run a single test directly with cargo:
+
+```bash
+cargo test --manifest-path e2e/rust/Cargo.toml --features e2e --test sync
+```
+
+The harness (`e2e/rust/src/harness/`) provides:
+
+| Module | Purpose |
+|---|---|
+| `binary` | Builds and resolves the `nemoclaw` binary from the workspace |
+| `sandbox` | `SandboxGuard` RAII type — creates sandboxes and deletes them on drop |
+| `output` | ANSI stripping and field extraction from CLI output |
+| `port` | `wait_for_port()` and `find_free_port()` for TCP testing |
 
 ## Environment Variables
 
 | Variable | Purpose |
 |---|---|
 | `NEMOCLAW_CLUSTER` | Override active cluster name for E2E tests |
-| `NAV_BIN` | Override `nemoclaw` binary path in bash E2E tests |
diff --git a/architecture/README.md b/architecture/README.md
index 431f86e9..93d12060 100644
--- a/architecture/README.md
+++ b/architecture/README.md
@@ -10,7 +10,7 @@ The platform packages the entire infrastructure -- orchestration gateway, sandbo
 
 ## How the Subsystems Fit Together
 
-The following diagram shows how the major subsystems interact at a high level. Users interact through the CLI, which communicates with a central gateway. The gateway manages sandbox lifecycle in Kubernetes, and each sandbox enforces its own policy locally.
+The following diagram shows how the major subsystems interact at a high level. Users interact through the CLI, which communicates with a central gateway. The gateway manages sandbox lifecycle in Kubernetes, and each sandbox enforces its own policy locally. Inference API calls to `inference.local` are routed locally within the sandbox by an embedded inference router, without traversing the gateway at request time.
 
 ```mermaid
 flowchart TB
@@ -25,6 +25,7 @@ flowchart TB
         subgraph SBX["Sandbox Pod"]
             SUPERVISOR["Sandbox Supervisor"]
             PROXY["Network Proxy"]
+            ROUTER["Inference Router"]
             CHILD["Agent Process (restricted)"]
             OPA["Policy Engine (OPA)"]
         end
@@ -33,20 +34,20 @@ flowchart TB
     subgraph EXT["External Services"]
         HOSTS["Allowed Hosts (github.com, api.anthropic.com, ...)"]
         CREDS["Provider APIs (Claude, GitHub, GitLab, ...)"]
-        BACKEND["Inference Backends (LM Studio, vLLM, ...)"]
+        BACKEND["Inference Backends (OpenAI, Anthropic, NVIDIA, local)"]
     end
 
     CLI -- "gRPC / HTTPS" --> SERVER
     CLI -- "SSH over HTTP CONNECT" --> SERVER
     SERVER -- "CRUD + Watch" --> DB
     SERVER -- "Create / Delete Pods" --> SBX
-    SUPERVISOR -- "Fetch Policy + Credentials" --> SERVER
+    SUPERVISOR -- "Fetch Policy + Credentials + Inference Bundle" --> SERVER
     SUPERVISOR -- "Spawn + Restrict" --> CHILD
     CHILD -- "All network traffic" --> PROXY
     PROXY -- "Evaluate request" --> OPA
     PROXY -- "Allowed traffic only" --> HOSTS
-    PROXY -- "Inference reroute (gRPC)" --> SERVER
-    SERVER -- "Proxied inference" --> BACKEND
+    PROXY -- "inference.local requests" --> ROUTER
+    ROUTER -- "Proxied inference" --> BACKEND
     SERVER -. "Store / retrieve credentials" .-> CREDS
 ```
 
@@ -83,9 +84,9 @@ When the agent (or any tool running inside the sandbox) tries to connect to a re
 3. **Evaluates the request against policy** using the OPA engine. The policy can allow or deny connections based on the destination hostname, port, and the identity of the requesting program.
 4. **Rejects connections to internal IP addresses** as a defense against SSRF (Server-Side Request Forgery). Even if the policy allows a hostname, the proxy resolves DNS before connecting and blocks any result that points to a private network address (e.g., cloud metadata endpoints, localhost, or RFC 1918 ranges). This prevents an attacker from redirecting an allowed hostname to internal infrastructure.
 5. **Performs protocol-aware inspection (L7)** for configured endpoints. The proxy can terminate TLS, inspect the underlying HTTP traffic, and enforce rules on individual API requests -- not just connection-level allow/deny. This operates in either audit mode (log violations but allow traffic) or enforce mode (block violations).
-6. **Intercepts inference API calls** when the sandbox has inference routing configured. Connections that don't match any explicit network policy but have inference routes available are TLS-terminated and inspected. Known inference API patterns (OpenAI, Anthropic) are detected and rerouted through the gateway to the configured backend, while non-inference requests are denied.
+6. **Intercepts inference API calls** to `inference.local`. When the agent sends an HTTPS CONNECT request to `inference.local`, the proxy bypasses OPA evaluation entirely and handles the connection through a dedicated inference interception path. It TLS-terminates the connection, parses the HTTP request, detects known inference API patterns (OpenAI, Anthropic, model discovery), and routes matching requests locally through the sandbox's embedded inference router (`navigator-router`). Non-inference requests to `inference.local` are denied with 403.
 
-The proxy generates an ephemeral certificate authority at startup and injects it into the sandbox's trust store. This allows it to transparently inspect HTTPS traffic when L7 inspection is configured for an endpoint.
+The proxy generates an ephemeral certificate authority at startup and injects it into the sandbox's trust store. This allows it to transparently inspect HTTPS traffic when L7 inspection is configured for an endpoint, and to serve TLS for `inference.local` interception.
 
 For more detail, see [Sandbox Architecture](sandbox.md) (Proxy Routing section).
 
@@ -101,6 +102,7 @@ Key responsibilities:
 - **TLS termination**: The gateway supports TLS with automatic protocol negotiation, so gRPC and HTTP clients can connect securely on the same port.
 - **SSH tunnel gateway**: The gateway provides the entry point for SSH connections into sandboxes (see Sandbox Connect below).
 - **Real-time updates**: The gateway streams sandbox status changes to the CLI, so users see live progress when a sandbox is starting up.
+- **Inference bundle resolution**: The gateway stores cluster-level inference configuration (provider name + model ID) and resolves it into bundles containing endpoint URLs, API keys, supported protocols, provider type, and auth metadata. Sandboxes fetch these bundles at startup and refresh them periodically. The gateway does not proxy inference traffic at request time -- it only provides configuration.
 
 For more detail, see [Gateway Architecture](gateway.md).
 
@@ -153,7 +155,7 @@ AI agents typically need credentials to access external services -- an API key f
 
 The provider system handles:
 
-- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, GitHub, GitLab, and others.
+- **Automatic discovery**: The CLI scans the user's local machine for existing credentials (environment variables, configuration files) and offers to upload them to the gateway. Supported providers include Claude, Codex, OpenCode, OpenAI, Anthropic, NVIDIA, GitHub, GitLab, and others.
 - **Secure storage**: Credentials are stored on the gateway, separate from sandbox definitions. They never appear in Kubernetes pod specifications.
 - **Runtime injection**: When a sandbox starts, the supervisor process fetches the credentials from the gateway via gRPC and injects them as environment variables into every process it spawns (both the initial agent process and any SSH sessions).
 - **CLI management**: Users can create, update, list, and delete providers through standard CLI commands.
@@ -164,47 +166,37 @@ For more detail, see [Providers](sandbox-providers.md).
 
 ### Inference Routing
 
-The inference routing system transparently intercepts AI inference API calls from sandboxed agents and reroutes them through the gateway to policy-controlled backends. This enables organizations to redirect inference traffic to local or self-hosted models without modifying the agent's code.
+The inference routing system transparently intercepts AI inference API calls from sandboxed agents and routes them to configured backends. Routing happens locally within the sandbox -- the proxy intercepts connections to `inference.local`, and the embedded `navigator-router` forwards requests directly to the backend without traversing the gateway at request time.
 
 **How it works end-to-end:**
 
-1. The sandbox policy includes an `inference.allowed_routes` list (e.g., `["local"]`).
-2. When the agent makes an HTTPS request to any endpoint (e.g., `api.openai.com`), the proxy evaluates it:
-   - If the endpoint + binary is explicitly allowed in `network_policies` -- pass through directly.
-   - If no policy match but inference routes are configured -- **intercept** (OPA returns the `inspect_for_inference` action).
-   - Otherwise -- deny.
-3. For intercepted connections, the proxy:
-   - TLS-terminates the client connection using the sandbox's ephemeral CA.
-   - Parses the HTTP request.
-   - Detects known inference API patterns (e.g., `POST /v1/chat/completions` for OpenAI, `POST /v1/messages` for Anthropic).
-   - Strips authorization headers and forwards the request to the gateway via gRPC (`ProxyInference` RPC).
-4. The gateway's inference service:
-   - Loads the sandbox's policy to get `allowed_routes`.
-   - Finds enabled inference routes whose `routing_hint` matches the allowed list.
-   - Selects a compatible route by matching the source protocol (e.g., `openai_chat_completions`).
-   - Forwards the request to the route's backend URL, rewriting the authorization header with the route's API key.
-5. The response flows back through the gateway to the proxy to the agent -- the agent sees a normal HTTP response as if it came from the original API.
+1. An operator configures cluster-level inference via `nemoclaw cluster inference set --provider <name> --model <id>`. This stores a reference to the named provider and model on the gateway.
+2. When a sandbox starts, the supervisor fetches an inference bundle from the gateway via the `GetInferenceBundle` RPC. The gateway resolves the stored provider reference into a complete route: endpoint URL, API key, supported protocols, provider type, and auth metadata. The sandbox refreshes this bundle every 30 seconds.
+3. The agent sends requests to `https://inference.local` using standard OpenAI or Anthropic SDK calls.
+4. The sandbox proxy intercepts the HTTPS CONNECT to `inference.local` (bypassing OPA policy evaluation), TLS-terminates the connection using the sandbox's ephemeral CA, and parses the HTTP request.
+5. Known inference API patterns are detected (e.g., `POST /v1/chat/completions` for OpenAI, `POST /v1/messages` for Anthropic, `GET /v1/models` for model discovery). Matching requests are forwarded to the first compatible route by the `navigator-router`, which rewrites the auth header, injects provider-specific default headers (e.g., `anthropic-version` for Anthropic), and overrides the model field in the request body.
+6. Non-inference requests to `inference.local` are denied with 403.
 
 **Key design properties:**
 
-- Agents need zero code changes -- standard OpenAI/Anthropic SDK calls work transparently.
-- The sandbox never sees the real API key for the backend -- credential isolation is maintained.
-- Policy controls which routes a sandbox can access via `inference.allowed_routes`.
-- Routes are managed as server-side resources via CLI (`nemoclaw inference create/update/delete/list`).
+- Agents need zero code changes -- standard OpenAI/Anthropic SDK calls work transparently when pointed at `inference.local`.
+- The sandbox never sees the real API key for the backend -- credential isolation is maintained through the gateway's bundle resolution.
+- Routing is explicit via `inference.local`; OPA network policy is not involved in inference routing.
+- Provider-specific behavior (auth header style, default headers, supported protocols) is centralized in `InferenceProviderProfile` definitions in `navigator-core`. Supported inference provider types are openai, anthropic, and nvidia.
+- Cluster inference is managed via CLI (`nemoclaw cluster inference set/get`).
 
-**Inference routes** are stored on the gateway as protobuf objects (`InferenceRoute` in `proto/inference.proto`) and have these fields: `routing_hint` (name for policy matching), `base_url` (backend endpoint), `protocols` (supported API protocols like `openai_chat_completions` or `anthropic_messages`), `api_key`, `model_id`, and `enabled` flag.
+**Inference routes** are stored on the gateway as protobuf objects (`InferenceRoute` in `proto/inference.proto`). Cluster inference uses a managed singleton route entry keyed by `inference.local` and configured from provider + model settings. Endpoint, credentials, and protocols are resolved from the referenced provider record at bundle fetch time, so rotating a provider's API key takes effect on the next bundle refresh without reconfiguring the route.
 
 **Components involved:**
 
 | Component | Location | Role |
 |---|---|---|
-| OPA `network_action` rule | `crates/navigator-sandbox/data/sandbox-policy.rego` | Returns `inspect_for_inference` when no explicit policy match and inference routes exist |
-| Proxy interception | `crates/navigator-sandbox/src/proxy.rs` | TLS-terminates intercepted connections, parses HTTP, calls gateway |
+| Proxy inference interception | `crates/navigator-sandbox/src/proxy.rs` | Intercepts `inference.local` CONNECT requests, TLS-terminates, dispatches to router |
 | Inference pattern detection | `crates/navigator-sandbox/src/l7/inference.rs` | Matches HTTP method + path against known inference API patterns |
-| gRPC forwarding | `crates/navigator-sandbox/src/grpc_client.rs` | Sends `ProxyInferenceRequest` to the gateway |
-| Gateway inference service | `crates/navigator-server/src/inference.rs` | Resolves routes from policy, delegates to router |
-| Inference router | `crates/navigator-router/src/lib.rs` | Selects a compatible route by protocol and proxies to the backend |
-| Proto definitions | `proto/inference.proto` | `InferenceRouteSpec`, `ProxyInferenceRequest/Response`, CRUD RPCs |
+| Local inference router | `crates/navigator-router/src/lib.rs` | Selects a compatible route by protocol and proxies to the backend |
+| Provider profiles | `crates/navigator-core/src/inference.rs` | Centralized auth, headers, protocols, and endpoint defaults per provider type |
+| Gateway inference service | `crates/navigator-server/src/inference.rs` | Stores cluster inference config, resolves bundles with credentials from provider records |
+| Proto definitions | `proto/inference.proto` | `ClusterInferenceConfig`, `ResolvedRoute`, bundle RPCs |
 
 
 ### Container and Build System
@@ -228,9 +220,10 @@ Sandbox behavior is governed by policies written in YAML and evaluated by an emb
 - **Filesystem access**: Which directories are readable, which are writable.
 - **Network access**: Which remote hosts each program in the sandbox can connect to, with per-binary granularity.
 - **Process privileges**: What user/group the agent runs as.
-- **Inference routing**: Which AI model backends the sandbox can route inference traffic to, referenced by `routing_hint` name.
 - **L7 inspection rules**: Protocol-level constraints on HTTP API calls for specific endpoints.
 
+Inference routing to `inference.local` is configured separately at the cluster level and does not require network policy entries. The OPA engine evaluates only explicit network policies; `inference.local` connections bypass OPA entirely and are handled by the proxy's dedicated inference interception path.
+
 Policies are not intended to be hand-edited by end users in normal operation. They are associated with sandboxes at creation time and fetched by the sandbox supervisor at startup via gRPC. For development and testing, policies can also be loaded from local files.
 
 For more detail, see [Policy Language](security-policy.md).
@@ -239,12 +232,13 @@ For more detail, see [Policy Language](security-policy.md).
 
 The CLI is the primary way users interact with the platform. It provides commands organized into four groups:
 
-- **Cluster management** (`nemoclaw cluster`): Deploy, stop, destroy, and inspect clusters. Supports both local and remote (SSH) targets. Includes a tunnel command for accessing the Kubernetes API on remote clusters.
-- **Sandbox management** (`nemoclaw sandbox`): Create sandboxes (with optional file sync and provider auto-discovery), list running sandboxes, connect to sandboxes via SSH, and delete sandboxes.
+- **Gateway management** (`nemoclaw gateway`): Deploy, stop, destroy, and inspect clusters. Supports both local and remote (SSH) targets. Includes a tunnel command for accessing the Kubernetes API on remote clusters.
+- **Sandbox management** (`nemoclaw sandbox`): Create sandboxes (with optional file upload and provider auto-discovery), connect to sandboxes via SSH, and delete sandboxes.
+- **Top-level commands**: `nemoclaw status` (cluster health), `nemoclaw logs` (sandbox logs), `nemoclaw forward` (port forwarding), `nemoclaw policy` (sandbox policy management).
 - **Provider management** (`nemoclaw provider`): Create, update, list, and delete external service credentials.
-- **Inference management** (`nemoclaw inference`): Configure routing rules for AI model API endpoints.
+- **Inference management** (`nemoclaw cluster inference`): Configure cluster-level inference by specifying a provider and model. The gateway resolves endpoint and credential details from the named provider record.
 
-The CLI resolves which cluster to operate on through a priority chain: explicit `--cluster` flag, then the `NEMOCLAW_CLUSTER` environment variable, then the active cluster set by `nemoclaw cluster use`. It supports TLS client certificates for mutual authentication with the gateway.
+The CLI resolves which cluster to operate on through a priority chain: explicit `--gateway` flag, then the `NEMOCLAW_CLUSTER` environment variable, then the active cluster set by `nemoclaw gateway select`. It supports TLS client certificates for mutual authentication with the gateway.
 
 ## How Users Get Started
 
@@ -290,7 +284,9 @@ This opens an interactive SSH session into the sandbox, with all provider creden
 | [Sandbox Architecture](sandbox.md) | The sandbox execution environment: policy enforcement, Landlock, seccomp, network namespaces, and the network proxy. |
 | [Container Management](build-containers.md) | Container images, Dockerfiles, Helm charts, build tasks, and CI/CD. |
 | [Sandbox Connect](sandbox-connect.md) | SSH tunneling into sandboxes through the gateway. |
+| [Sandbox Custom Containers](sandbox-custom-containers.md) | Building and using custom container images for sandboxes. |
 | [Providers](sandbox-providers.md) | External credential management, auto-discovery, and runtime injection. |
 | [Policy Language](security-policy.md) | The YAML/Rego policy system that governs sandbox behavior. |
-| [Inference Routing](inference-routing.md) | Transparent interception and rerouting of AI inference API calls from sandboxed agents to policy-controlled backends. |
-| [Local Inference Routing Demo](inference-routing-local-demo.md) | Step-by-step recording script for showing OpenAI SDK interception and reroute to a local LM Studio backend. |
+| [Inference Routing](inference-routing.md) | Transparent interception and sandbox-local routing of AI inference API calls to configured backends. |
+| [System Architecture](system-architecture.md) | Top-level system architecture diagram with all deployable components and communication flows. |
+| [TUI](tui.md) | Terminal user interface for sandbox interaction. |
diff --git a/architecture/build-containers.md b/architecture/build-containers.md
index d4e9d83e..048f0ff8 100644
--- a/architecture/build-containers.md
+++ b/architecture/build-containers.md
@@ -99,7 +99,7 @@ A k3s image with bundled Helm charts and Kubernetes manifests for single-contain
 
 **Defined in** `deploy/docker/Dockerfile.cluster`.
 
-**Base image:** `rancher/k3s:v1.29.8-k3s1` (configurable via `K3S_VERSION` build arg).
+**Base image:** `rancher/k3s:v1.35.2-k3s1` (configurable via `K3S_VERSION` build arg).
 
 **Layers added:**
 
@@ -360,7 +360,7 @@ After building, the script:
 1. Resolves the local registry address (defaults to `127.0.0.1:5000/navigator`). In CI, uses `$CI_REGISTRY_IMAGE`.
 2. Ensures a local Docker registry container (`navigator-local-registry`) is running on port 5000 (creates one if needed).
 3. Pushes prebuilt local component images (server, sandbox) to the local registry via `cluster-push-component.sh`.
-4. Runs `nav cluster admin deploy --name <CLUSTER_NAME> --update-kube-config` to create or update the cluster container.
+4. Runs `nav gateway start --name <CLUSTER_NAME> --update-kube-config` to create or update the cluster container.
 
 ### Environment Variables
 
@@ -368,7 +368,7 @@ After building, the script:
 |---|---|---|
 | `IMAGE_TAG` | `dev` | Tag for built images |
 | `RUST_BUILD_PROFILE` | `debug` | `debug` or `release` for sandbox builds |
-| `K3S_VERSION` | `v1.29.8-k3s1` | k3s version for cluster image |
+| `K3S_VERSION` | `v1.35.2-k3s1` | k3s version for cluster image (optional override; default in Dockerfile.cluster) |
 
 | `CLUSTER_NAME` | basename of `$PWD` | Name for local cluster deployment |
 | `DOCKER_PLATFORM` | (unset) | Target platform for multi-arch builds (e.g., `linux/amd64`) |
@@ -451,6 +451,10 @@ Artifactory:
 - Wheels are uploaded to `s3://navigator-pypi-artifacts/navigator/<wheel-version>/`.
 - A follow-up job on the `nv` runner lists that version prefix, downloads the
   wheels, and publishes them to Artifactory.
+- Container publish jobs compute the same Cargo version once and pass it through
+  Docker builds so `navigator-server` reports the packaged artifact version at runtime.
+- Published images keep the floating `latest` tag and also receive an explicit
+  version tag for the same manifest.
 
 ### Auto-Deployed Components in Cluster
 
diff --git a/architecture/cluster-single-node.md b/architecture/cluster-single-node.md
index 208dc1c0..1e57cc40 100644
--- a/architecture/cluster-single-node.md
+++ b/architecture/cluster-single-node.md
@@ -18,7 +18,7 @@ Out of scope:
 ## Components
 
 - `crates/navigator-cli/src/main.rs`: CLI entry point; `clap`-based command parsing.
-- `crates/navigator-cli/src/run.rs`: CLI command implementations (`cluster_admin_deploy`, `cluster_admin_stop`, `cluster_admin_destroy`, `cluster_admin_info`, `cluster_admin_tunnel`).
+- `crates/navigator-cli/src/run.rs`: CLI command implementations (`gateway_start`, `gateway_stop`, `gateway_destroy`, `gateway_info`, `gateway_tunnel`).
 - `crates/navigator-cli/src/bootstrap.rs`: Auto-bootstrap helpers for `sandbox create` (offers to deploy a cluster when one is unreachable).
 - `crates/navigator-bootstrap/src/lib.rs`: Cluster lifecycle orchestration (`deploy_cluster`, `deploy_cluster_with_logs`, `cluster_handle`, `check_existing_deployment`).
 - `crates/navigator-bootstrap/src/docker.rs`: Docker API wrappers (network, volume, container, image operations).
@@ -39,20 +39,20 @@ Out of scope:
 
 ## CLI Commands
 
-All cluster lifecycle commands live under `nemoclaw cluster admin`:
+All cluster lifecycle commands live under `nemoclaw gateway`:
 
 | Command | Description |
 |---|---|
-| `nemoclaw cluster admin deploy [--name NAME] [--remote user@host] [--ssh-key PATH]` | Provision or update a cluster |
-| `nemoclaw cluster admin stop [--name NAME] [--remote user@host]` | Stop the container (preserves state) |
-| `nemoclaw cluster admin destroy [--name NAME] [--remote user@host]` | Destroy container, attached volumes, kubeconfig directory, metadata, and network |
-| `nemoclaw cluster admin info [--name NAME]` | Show deployment details (endpoint, kubeconfig path, SSH host) |
-| `nemoclaw cluster admin tunnel [--name NAME] [--remote user@host] [--print-command]` | Start or print SSH tunnel for kubectl access |
-| `nemoclaw cluster status` | Show gateway health via gRPC/HTTP |
-| `nemoclaw cluster use <name>` | Set the active cluster |
-| `nemoclaw cluster list` | List all clusters with metadata |
+| `nemoclaw gateway start [--name NAME] [--remote user@host] [--ssh-key PATH]` | Provision or update a cluster |
+| `nemoclaw gateway stop [--name NAME] [--remote user@host]` | Stop the container (preserves state) |
+| `nemoclaw gateway destroy [--name NAME] [--remote user@host]` | Destroy container, attached volumes, kubeconfig directory, metadata, and network |
+| `nemoclaw gateway info [--name NAME]` | Show deployment details (endpoint, kubeconfig path, SSH host) |
+| `nemoclaw gateway tunnel [--name NAME] [--remote user@host] [--print-command]` | Start or print SSH tunnel for kubectl access |
+| `nemoclaw status` | Show gateway health via gRPC/HTTP |
+| `nemoclaw gateway select <name>` | Set the active cluster |
+| `nemoclaw gateway select` | List all clusters with metadata |
 
-The `--name` flag defaults to `"nemoclaw"`. When omitted on commands that accept it, the CLI resolves the active cluster via: `--cluster` flag, then `NEMOCLAW_CLUSTER` env, then `~/.config/nemoclaw/active_cluster` file.
+The `--name` flag defaults to `"nemoclaw"`. When omitted on commands that accept it, the CLI resolves the active cluster via: `--gateway` flag, then `NEMOCLAW_CLUSTER` env, then `~/.config/nemoclaw/active_cluster` file.
 
 ## Local Task Flows (`mise`)
 
@@ -76,7 +76,7 @@ sequenceDiagram
   participant L as Local Docker daemon
   participant R as Remote Docker daemon (SSH)
 
-  U->>C: nemoclaw cluster admin deploy --remote user@host
+  U->>C: nemoclaw gateway start --remote user@host
   C->>B: deploy_cluster(DeployOptions)
 
   B->>B: create_ssh_docker_client (ssh://, 600s timeout)
@@ -149,7 +149,7 @@ flowchart LR
 
 The `deploy_cluster_with_logs` variant accepts an `FnMut(String)` callback for progress reporting. The CLI wraps this in a `ClusterDeployLogPanel` for interactive terminals.
 
-**Pre-deploy check** (CLI layer in `cluster_admin_deploy`): In interactive terminals, `check_existing_deployment` inspects whether a container or volume already exists. If found, the user is prompted to destroy and recreate or reuse the existing cluster.
+**Pre-deploy check** (CLI layer in `gateway_start`): In interactive terminals, `check_existing_deployment` inspects whether a container or volume already exists. If found, the user is prompted to destroy and recreate or reuse the existing cluster.
 
 ### 2) Image readiness
 
@@ -231,14 +231,14 @@ Metadata location: `~/.config/nemoclaw/clusters/{name}_metadata.json`
 
 Note: metadata is stored at the `clusters/` level (not nested inside `{name}/` like kubeconfig and mTLS).
 
-After deploy, the CLI calls `save_active_cluster(name)`, writing the cluster name to `~/.config/nemoclaw/active_cluster`. Subsequent commands that don't specify `--cluster` or `NEMOCLAW_CLUSTER` resolve to this active cluster.
+After deploy, the CLI calls `save_active_cluster(name)`, writing the cluster name to `~/.config/nemoclaw/active_cluster`. Subsequent commands that don't specify `--gateway` or `NEMOCLAW_CLUSTER` resolve to this active cluster.
 
 ## Container Image
 
 The cluster image is defined in `deploy/docker/Dockerfile.cluster`:
 
 ```
-Base:  rancher/k3s:v1.29.8-k3s1
+Base:  rancher/k3s:v1.35.2-k3s1
 ```
 
 Layers added:
@@ -328,7 +328,7 @@ ssh -L 6443:127.0.0.1:6443 -N user@host
 CLI helper:
 
 ```bash
-nemoclaw cluster admin tunnel --name <name>
+nemoclaw gateway tunnel --name <name>
 ```
 
 The `--remote` flag is optional; the CLI resolves the SSH destination from stored cluster metadata. Pass `--print-command` to print the SSH command without executing it.
@@ -355,7 +355,7 @@ The `--remote` flag is optional; the CLI resolves the SSH destination from store
 4. Remove the stored kubeconfig file.
 5. Remove the network if no containers remain attached (`cleanup_network_if_unused()`).
 
-**CLI layer** (`cluster_admin_destroy()` in `run.rs` additionally):
+**CLI layer** (`gateway_destroy()` in `run.rs` additionally):
 
 6. Remove the metadata JSON file via `remove_cluster_metadata()`.
 7. Clear the active cluster reference if it matches the destroyed cluster.
diff --git a/architecture/gateway.md b/architecture/gateway.md
index 26ff36bc..b6b42c91 100644
--- a/architecture/gateway.md
+++ b/architecture/gateway.md
@@ -61,7 +61,7 @@ graph TD
 | Gateway runtime | `crates/navigator-server/src/lib.rs` | `ServerState` struct, `run_server()` accept loop |
 | Protocol mux | `crates/navigator-server/src/multiplex.rs` | `MultiplexService`, `MultiplexedService`, `GrpcRouter`, `BoxBody` |
 | gRPC: Navigator | `crates/navigator-server/src/grpc.rs` | `NavigatorService` -- sandbox CRUD, provider CRUD, watch, exec, SSH sessions, policy delivery |
-| gRPC: Inference | `crates/navigator-server/src/inference.rs` | `InferenceService` -- inference route CRUD and sandbox inference bundle delivery |
+| gRPC: Inference | `crates/navigator-server/src/inference.rs` | `InferenceService` -- cluster inference config (set/get) and sandbox inference bundle delivery |
 | HTTP | `crates/navigator-server/src/http.rs` | Health endpoints, merged with SSH tunnel router |
 | SSH tunnel | `crates/navigator-server/src/ssh_tunnel.rs` | HTTP CONNECT handler at `/connect/ssh` |
 | TLS | `crates/navigator-server/src/tls.rs` | `TlsAcceptor` wrapping rustls with ALPN |
@@ -78,9 +78,9 @@ Proto definitions consumed by the gateway:
 | Proto file | Package | Defines |
 |------------|---------|---------|
 | `proto/navigator.proto` | `navigator.v1` | `Navigator` service, sandbox/provider/SSH/watch messages |
-| `proto/inference.proto` | `navigator.inference.v1` | `Inference` service, route CRUD messages, `GetSandboxInferenceBundle` |
+| `proto/inference.proto` | `navigator.inference.v1` | `Inference` service: `SetClusterInference`, `GetClusterInference`, `GetInferenceBundle` |
 | `proto/datamodel.proto` | `navigator.datamodel.v1` | `Sandbox`, `SandboxSpec`, `SandboxStatus`, `Provider`, `SandboxPhase` |
-| `proto/sandbox.proto` | `navigator.sandbox.v1` | `SandboxPolicy`, `NetworkPolicyRule`, `InferencePolicy` |
+| `proto/sandbox.proto` | `navigator.sandbox.v1` | `SandboxPolicy`, `NetworkPolicyRule` |
 
 ## Startup Sequence
 
@@ -233,37 +233,46 @@ These RPCs are called by sandbox pods at startup to bootstrap themselves.
 
 Defined in `proto/inference.proto`, implemented in `crates/navigator-server/src/inference.rs` as `InferenceService`.
 
-The gateway acts as the control plane for inference routes. It stores route definitions, enforces sandbox-scoped access policies, and delivers pre-filtered route bundles to sandbox pods. The gateway does not execute inference requests -- sandboxes connect directly to inference backends using the credentials and endpoints provided in the bundle.
+The gateway acts as the control plane for inference configuration. It stores a single managed cluster inference route (named `inference.local`) and delivers resolved route bundles to sandbox pods. The gateway does not execute inference requests -- sandboxes connect directly to inference backends using the credentials and endpoints provided in the bundle.
 
-#### Route Delivery
+#### Cluster Inference Configuration
+
+The gateway manages a single cluster-wide inference route that maps to a provider record. When set, the route stores only a `provider_name` and `model_id` reference. At bundle resolution time, the gateway looks up the referenced provider and derives the endpoint URL, API key, protocols, and provider type from it. This late-binding design means provider credential rotations are automatically reflected in the next bundle fetch without updating the route itself.
 
 | RPC | Description |
 |-----|-------------|
-| `GetSandboxInferenceBundle` | Returns the set of inference routes a sandbox is authorized to use. Takes a `sandbox_id`, loads the sandbox's `InferencePolicy.allowed_routes`, fetches all enabled `InferenceRoute` records whose `routing_hint` matches, normalizes protocols, and returns them as `SandboxResolvedRoute` messages along with a revision hash and `generated_at_ms` timestamp. |
+| `SetClusterInference` | Configures the cluster inference route. Validates `provider_name` and `model_id` are non-empty, verifies the named provider exists and has a supported type for inference (openai, anthropic, nvidia), validates the provider has a usable API key, then upserts the `inference.local` route record. Increments a monotonic `version` on each update. Returns the configured `provider_name`, `model_id`, and `version`. |
+| `GetClusterInference` | Returns the current cluster inference configuration (`provider_name`, `model_id`, `version`). Returns `NotFound` if no cluster inference is configured, or `FailedPrecondition` if the stored route has empty provider/model metadata. |
+| `GetInferenceBundle` | Returns the resolved inference route bundle for sandbox consumption. See [Route Bundle Delivery](#route-bundle-delivery) below. |
+
+#### Route Bundle Delivery
 
-The trait method delegates to the standalone function `resolve_sandbox_inference_bundle(store, sandbox_id)` (`crates/navigator-server/src/inference.rs`), which takes `&Store` and `&str` instead of `&self`. This extraction decouples bundle resolution from `ServerState`, enabling direct unit testing against an in-memory SQLite store without constructing a full server. The function similarly delegates route filtering to `list_sandbox_routes(store, allowed_routes)`.
+The `GetInferenceBundle` RPC resolves the managed cluster route into a `GetInferenceBundleResponse` containing fully materialized route data that sandboxes can use directly.
 
-The `GetSandboxInferenceBundleResponse` includes:
+The trait method delegates to `resolve_inference_bundle(store)` (`crates/navigator-server/src/inference.rs`), which takes `&Store` instead of `&self`. This extraction decouples bundle resolution from `ServerState`, enabling direct unit testing against an in-memory SQLite store without constructing a full server.
 
-- **`routes`** -- a list of `SandboxResolvedRoute` messages, each containing `routing_hint`, `base_url`, `model_id`, `api_key`, and normalized `protocols`. These are flattened from `InferenceRoute.spec` -- no route IDs or names are exposed to the sandbox.
-- **`revision`** -- a hex-encoded hash computed from the route contents (`routing_hint`, `base_url`, `model_id`, `api_key`, `protocols`). Sandboxes can compare this value to detect when their route set has changed.
+The `GetInferenceBundleResponse` includes:
+
+- **`routes`** -- a list of `ResolvedRoute` messages containing base URL, model ID, API key, protocols, and provider type. Currently contains zero or one routes (the managed cluster route).
+- **`revision`** -- a hex-encoded hash computed from route contents. Sandboxes compare this value to detect when their route set has changed.
 - **`generated_at_ms`** -- epoch milliseconds when the bundle was assembled.
 
-Route filtering in `list_sandbox_routes()` (`crates/navigator-server/src/inference.rs`):
-1. Load the sandbox's `InferencePolicy.allowed_routes` into a `HashSet`.
-2. Fetch all `InferenceRoute` records from the store (up to 500).
-3. Skip routes where `enabled == false`.
-4. Skip routes whose `routing_hint` is not in the allowed set.
-5. Normalize protocols via `navigator_core::inference::normalize_protocols()` and skip routes with no valid protocols after normalization.
+#### Provider-Based Route Resolution
 
-#### Route CRUD
+Managed route resolution in `resolve_managed_cluster_route()` (`crates/navigator-server/src/inference.rs`):
 
-| RPC | Description |
-|-----|-------------|
-| `CreateInferenceRoute` | Creates a route. Normalizes protocols (lowercase + dedupe), validates required fields (`routing_hint`, `base_url`, `protocols`, `model_id`). Auto-generates a 6-char name if empty. Rejects duplicates by name. |
-| `UpdateInferenceRoute` | Updates a route by name. Preserves stored `id`. Normalizes protocols and validates the spec. |
-| `DeleteInferenceRoute` | Deletes a route by name. Returns `deleted: bool`. |
-| `ListInferenceRoutes` | Paginated list (default limit 100). |
+1. Load the managed route by name (`inference.local`).
+2. Skip (return `None`) if the route does not exist, has no spec, or is disabled.
+3. Validate that `provider_name` and `model_id` are non-empty.
+4. Fetch the referenced provider record from the store.
+5. Resolve the provider into a `ResolvedProviderRoute` via `resolve_provider_route()`:
+   - Look up the `InferenceProviderProfile` for the provider's type via `navigator_core::inference::profile_for()`. Supported types: `openai`, `anthropic`, `nvidia`.
+   - Search the provider's credentials map for an API key using the profile's preferred key name (e.g., `OPENAI_API_KEY`), falling back to the first non-empty credential in sorted key order.
+   - Resolve the base URL from the provider's config map using the profile-specific key (e.g., `OPENAI_BASE_URL`), falling back to the profile's default URL.
+   - Derive protocols from the profile (e.g., `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` for OpenAI-compatible providers).
+6. Return a `ResolvedRoute` with the fully materialized endpoint, credentials, and protocols.
+
+The `ClusterInferenceConfig` stored in the database contains only `provider_name` and `model_id`. All other fields (endpoint, credentials, protocols, auth style) are resolved from the provider record at bundle generation time via `build_cluster_inference_config()`.
 
 ## HTTP Endpoints
 
@@ -340,6 +349,10 @@ All buses use `tokio::sync::broadcast` channels keyed by sandbox ID. Buffer size
 
 Broadcast lag is translated to `Status::resource_exhausted` via `broadcast_to_status()`.
 
+**Cleanup:** Each bus exposes a `remove(sandbox_id)` method that drops the broadcast sender (closing active receivers with `RecvError::Closed`) and frees internal map entries. Cleanup is wired into both the `handle_deleted` reconciler (Kubernetes watcher) and the `delete_sandbox` gRPC handler to prevent unbounded memory growth from accumulated entries for deleted sandboxes.
+
+**Validation:** `WatchSandbox` validates that the sandbox exists before subscribing to any bus, preventing entries from being created for non-existent IDs. `PushSandboxLogs` validates sandbox existence once on the first batch of the stream.
+
 ## Remote Exec via SSH
 
 The `ExecSandbox` RPC (`crates/navigator-server/src/grpc.rs`) executes a command inside a sandbox pod over SSH and streams stdout/stderr/exit back to the client.
@@ -507,7 +520,7 @@ Updated by the sandbox watcher on every Applied event and by gRPC handlers durin
   - `AlreadyExists` for duplicate creation
   - `FailedPrecondition` for state violations (e.g., exec on non-Ready sandbox, missing provider)
   - `Internal` for store/decode/Kubernetes failures
-  - `PermissionDenied` for policy violations (e.g., sandbox has no inference policy or empty `allowed_routes`)
+  - `PermissionDenied` for policy violations
   - `ResourceExhausted` for broadcast lag (missed messages)
   - `Cancelled` for closed broadcast channels
 
diff --git a/architecture/inference-routing.md b/architecture/inference-routing.md
index 03997253..03167aa3 100644
--- a/architecture/inference-routing.md
+++ b/architecture/inference-routing.md
@@ -1,203 +1,123 @@
 # Inference Routing
 
-The inference routing system transparently intercepts AI inference API calls from sandboxed processes and reroutes them to policy-controlled backends. This allows organizations to redirect SDK calls (OpenAI, Anthropic) to local or self-hosted inference servers without modifying the agent's code. The routing decision and HTTP proxying execute inside the sandbox process itself. The gateway serves only as a control plane for route management and bundle delivery.
-
-## Source File Index
-
-| File | Purpose |
-|------|---------|
-| `crates/navigator-sandbox/src/l7/inference.rs` | `InferenceApiPattern`, `detect_inference_pattern()`, HTTP request/response parsing for intercepted connections |
-| `crates/navigator-sandbox/src/proxy.rs` | `InferenceContext`, `handle_inference_interception()`, `route_inference_request()` -- proxy-side interception and local routing |
-| `crates/navigator-sandbox/src/lib.rs` | `build_inference_context()`, `bundle_to_resolved_routes()`, `spawn_route_refresh()` -- route loading and background cache refresh |
-| `crates/navigator-sandbox/src/grpc_client.rs` | `fetch_inference_bundle()` -- fetches the pre-filtered route bundle from the gateway |
-| `crates/navigator-sandbox/src/opa.rs` | `NetworkAction` enum, `evaluate_network_action()` -- tri-state routing decision |
-| `crates/navigator-router/src/lib.rs` | `Router` -- protocol-based route selection and request forwarding |
-| `crates/navigator-router/src/backend.rs` | `proxy_to_backend()` -- HTTP request forwarding with auth header and model ID rewriting |
-| `crates/navigator-router/src/config.rs` | `RouterConfig`, `RouteConfig`, `ResolvedRoute` -- route configuration types and YAML loading |
-| `crates/navigator-router/src/mock.rs` | Mock route support (`mock://` scheme) for testing |
-| `crates/navigator-server/src/inference.rs` | `InferenceService` gRPC implementation -- route CRUD and bundle delivery (control plane only) |
-| `crates/navigator-core/src/inference.rs` | `normalize_protocols()` -- shared protocol normalization logic |
-| `proto/inference.proto` | Protobuf definitions: `InferenceRoute`, `InferenceRouteSpec`, `GetSandboxInferenceBundle` RPC, CRUD RPCs |
-| `proto/sandbox.proto` | `InferencePolicy` message (field on `SandboxPolicy`) |
-| `crates/navigator-sandbox/src/main.rs` | Sandbox binary CLI: `--inference-routes` / `NEMOCLAW_INFERENCE_ROUTES` flag definition |
-| `tasks/ci.toml` | `[sandbox]` task: mounts `inference-routes.yaml`, sets env vars for dev sandbox |
-| `inference-routes.yaml` | Default standalone routes for dev sandbox (NVIDIA API endpoint) |
-| `crates/navigator-sandbox/data/sandbox-policy.rego` | `network_action` Rego rule -- tri-state decision logic |
+Inference routing gives sandboxed agents access to LLM APIs through a single, explicit endpoint: `inference.local`. There is no implicit catch-all interception for arbitrary hosts. Requests are routed only when the process targets `inference.local` via HTTPS and the request matches a supported inference API pattern.
 
-## Architecture Overview
-
-Inference routing executes in two distinct planes:
-
-- **Control plane (gateway)**: Stores routes in its database, resolves which routes a sandbox is allowed to use based on its policy, and delivers pre-filtered bundles to sandboxes via `GetSandboxInferenceBundle`.
-- **Data plane (sandbox)**: Intercepts CONNECT requests, TLS-terminates connections, detects inference API patterns, selects a compatible route from its local cache, and forwards the HTTP request directly to the backend.
+All inference execution happens locally inside the sandbox via the `navigator-router` crate. The gateway is control-plane only: it stores configuration and delivers resolved route bundles to sandboxes over gRPC.
 
-The `navigator-router` crate is a dependency of the sandbox, not the server. The server has no `Router` instance and does not execute inference requests.
-
-```mermaid
-flowchart LR
-    subgraph "Sandbox Process"
-        Proxy[Proxy] --> Router[Router]
-    end
-    subgraph "Gateway"
-        CRUD[Route CRUD] --> Store[(Store)]
-        Bundle[Bundle Delivery] --> Store
-    end
-    subgraph "Backends"
-        LM[LM Studio]
-        API[Cloud API]
-    end
-
-    Proxy -- "GetSandboxInferenceBundle (30s refresh)" --> Bundle
-    Router -- "HTTP request" --> LM
-    Router -- "HTTP request" --> API
-```
-
-## End-to-End Flow
-
-An inference routing request passes through four components: the sandboxed agent, the sandbox proxy (with its embedded router), the OPA engine, and the backend. The gateway is involved only at startup and during periodic cache refreshes.
+## Architecture Overview
 
 ```mermaid
 sequenceDiagram
-    participant Agent as Sandboxed Agent (e.g., OpenAI SDK)
+    participant Agent as Agent Process
     participant Proxy as Sandbox Proxy
-    participant OPA as OPA Engine
-    participant Router as Local Router (in sandbox)
-    participant Backend as Backend (e.g., LM Studio)
-
-    Note over Proxy: At startup: fetch route bundle from gateway or load from file
-
-    Agent->>Proxy: CONNECT api.openai.com:443
-    Proxy->>OPA: evaluate_network_action(input)
-    OPA-->>Proxy: InspectForInference
-    Proxy-->>Agent: 200 Connection Established
-    Proxy->>Proxy: TLS terminate (ephemeral CA)
-    Agent->>Proxy: POST /v1/chat/completions (plaintext after TLS termination)
-    Proxy->>Proxy: detect_inference_pattern() -> openai_chat_completions
-    Proxy->>Proxy: Strip Authorization + hop-by-hop headers
-    Proxy->>Router: proxy_with_candidates(protocol, method, path, headers, body, routes)
-    Router->>Router: Find compatible route (protocol match)
-    Router->>Router: Rewrite Authorization, Host, and model in body
-    Router->>Backend: POST /v1/chat/completions
-    Backend-->>Router: 200 OK (response body)
-    Router-->>Proxy: ProxyResponse(status, headers, body)
-    Proxy-->>Agent: HTTP 200 OK (re-encrypted via TLS)
-```
+    participant Router as navigator-router
+    participant Gateway as Gateway (gRPC)
+    participant Backend as Inference Backend
 
-## Route Loading
+    Note over Gateway,Router: Control plane (startup + periodic refresh)
+    Gateway->>Router: GetInferenceBundle (routes, credentials)
 
-Routes reach the sandbox through one of two modes, determined at sandbox startup.
+    Note over Agent,Backend: Data plane (per-request)
+    Agent->>Proxy: CONNECT inference.local:443
+    Proxy->>Proxy: TLS terminate (MITM)
+    Proxy->>Proxy: Parse HTTP, detect pattern
+    Proxy->>Router: proxy_with_candidates()
+    Router->>Router: Select route by protocol
+    Router->>Router: Rewrite auth + model
+    Router->>Backend: HTTPS request
+    Backend->>Router: Response
+    Router->>Proxy: ProxyResponse
+    Proxy->>Agent: HTTP response over TLS tunnel
+```
 
-### Two Route Source Modes
+## Provider Profiles
 
-**File:** `crates/navigator-sandbox/src/lib.rs` -- `build_inference_context()`
+File: `crates/navigator-core/src/inference.rs`
 
-The `build_inference_context()` function determines the route source. Priority order:
+`InferenceProviderProfile` is the single source of truth for provider-specific inference knowledge: default endpoint, supported protocols, credential key lookup order, auth header style, and default headers.
 
-1. **Standalone mode (route file)**: If `--inference-routes` (or `NEMOCLAW_INFERENCE_ROUTES`) is set, routes load from a YAML file via `RouterConfig::load_from_file()`. The file format matches the `navigator-router` `RouterConfig` schema. This mode always takes precedence -- if both a route file and cluster credentials are present, the route file wins.
+Three profiles are defined:
 
-2. **Cluster mode (gateway bundle)**: If `sandbox_id` and `navigator_endpoint` are available (and no route file is set), the sandbox fetches a pre-filtered bundle from the gateway via `grpc_client::fetch_inference_bundle()`, which calls the `GetSandboxInferenceBundle` gRPC RPC.
+| Provider | Default Base URL | Protocols | Auth | Default Headers |
+|----------|-----------------|-----------|------|-----------------|
+| `openai` | `https://api.openai.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
+| `anthropic` | `https://api.anthropic.com/v1` | `anthropic_messages`, `model_discovery` | `x-api-key` | `anthropic-version: 2023-06-01` |
+| `nvidia` | `https://integrate.api.nvidia.com/v1` | `openai_chat_completions`, `openai_completions`, `openai_responses`, `model_discovery` | `Authorization: Bearer` | (none) |
 
-3. **Disabled**: If neither source is configured, `build_inference_context()` returns `None` and inference routing is not active.
+Each profile also defines `credential_key_names` (e.g. `["OPENAI_API_KEY"]`) and `base_url_config_keys` (e.g. `["OPENAI_BASE_URL"]`) used by the gateway to resolve credentials and endpoint overrides from provider records.
 
-### Standalone Mode: YAML Route File
+Unknown provider types return `None` from `profile_for()` and default to `Bearer` auth with no default headers via `auth_for_provider_type()`.
 
-**File:** `crates/navigator-router/src/config.rs`
+## Control Plane (Gateway)
 
-Routes are defined in a YAML file with this schema:
+File: `crates/navigator-server/src/inference.rs`
 
-```yaml
-routes:
-  - routing_hint: local
-    endpoint: http://localhost:1234/v1
-    model: meta/llama-3.1-8b-instruct
-    protocols: [openai_chat_completions]
-    api_key: lm-studio
-    # api_key_env: MY_API_KEY   # alternative: read key from environment variable
-```
+The gateway implements the `Inference` gRPC service defined in `proto/inference.proto`.
 
-`RouteConfig` supports two key-resolution modes: `api_key` (literal value) and `api_key_env` (environment variable name). If neither is set, route resolution fails at startup. The route file is loaded once at startup; changes require a sandbox restart.
+### Cluster inference set/get
 
-### Cluster Mode: Gateway Bundle
+`SetClusterInference` takes a `provider_name` and `model_id`. It:
 
-**File:** `crates/navigator-sandbox/src/grpc_client.rs` -- `fetch_inference_bundle()`
+1. Validates that both fields are non-empty.
+2. Fetches the named provider record from the store.
+3. Validates the provider by resolving its route (checking that the provider type is supported and has a usable API key).
+4. Builds a managed route spec that stores only `provider_name` and `model_id`. The spec intentionally leaves `base_url`, `api_key`, and `protocols` empty -- these are resolved dynamically at bundle time from the provider record.
+5. Upserts the route with name `inference.local`. Version starts at 1 and increments monotonically on each update.
 
-The sandbox connects to the gateway's `Inference` gRPC service using mTLS and calls `GetSandboxInferenceBundle` with its `sandbox_id`. The gateway resolves the sandbox's inference policy, filters routes by `allowed_routes`, and returns a `GetSandboxInferenceBundleResponse` containing:
+`GetClusterInference` returns `provider_name`, `model_id`, and `version` for the managed route. Returns `NOT_FOUND` if cluster inference is not configured.
 
-- `routes`: A list of `SandboxResolvedRoute` messages (routing_hint, base_url, protocols, api_key, model_id)
-- `revision`: An opaque hash (DefaultHasher-based, 16-char hex) for cache freshness
-- `generated_at_ms`: Bundle generation timestamp
+### Bundle delivery
 
-The proto response is converted to `Vec<ResolvedRoute>` by `bundle_to_resolved_routes()` in `lib.rs`.
+`GetInferenceBundle` resolves the managed route at request time:
 
-### Background Route Cache Refresh
+1. Loads the `inference.local` route from the store.
+2. Looks up the referenced provider record by `provider_name`.
+3. Resolves endpoint, API key, protocols, and provider type from the provider record using the `InferenceProviderProfile` registry.
+4. If the provider's config map contains a base URL override key (e.g. `OPENAI_BASE_URL`), that value overrides the profile default.
+5. Returns a `GetInferenceBundleResponse` with the resolved route(s), a revision hash (DefaultHasher over route fields), and `generated_at_ms` timestamp.
 
-**File:** `crates/navigator-sandbox/src/lib.rs` -- `spawn_route_refresh()`
+Because resolution happens at request time, credential rotation and endpoint changes on the provider record take effect on the next bundle fetch without re-running `SetClusterInference`.
 
-In cluster mode, a background `tokio::spawn` task refreshes the route cache every 30 seconds by calling `fetch_inference_bundle()` again. The routes are stored behind `Arc<RwLock<Vec<ResolvedRoute>>>`, shared between the proxy and the refresh task. The refresh task is started even when the initial cluster bundle is empty, so newly created routes become available without restarting the sandbox. If a refresh fails, the sandbox logs a warning and keeps the stale routes.
+An empty route list is valid and indicates cluster inference is not yet configured.
 
-File mode does not spawn a refresh task -- routes are static for the sandbox lifetime.
+### Proto definitions
 
-### Graceful Degradation
+File: `proto/inference.proto`
 
-Both route source modes degrade gracefully when routes are unavailable:
+Key messages:
 
-- **Empty routes in file mode**: If `routes: []` in the file, `build_inference_context()` returns `None` and inference routing is disabled. This is confirmed by the `build_inference_context_empty_route_file_returns_none` test.
-- **Empty routes in cluster mode**: If the initial cluster bundle has zero routes, the sandbox still creates `InferenceContext` with an empty cache and starts background refresh. Intercepted inference requests return `503` (`{"error": "inference endpoint detected without matching inference route"}`) until a later refresh provides routes.
-- **Cluster mode errors**: `PermissionDenied` or `NotFound` errors (detected via string matching on the gRPC error message) indicate no inference policy is configured for this sandbox. The sandbox logs this and proceeds without inference routing. Other gRPC errors also result in graceful degradation: inference routing is disabled, but the sandbox starts normally.
-- **File mode errors**: Parse failures or missing files in standalone mode are fatal -- `build_inference_context()` propagates the error and the sandbox refuses to start. Only an empty-but-valid routes list is gracefully disabled.
+- `SetClusterInferenceRequest` -- `provider_name` + `model_id`
+- `SetClusterInferenceResponse` -- `provider_name` + `model_id` + `version`
+- `GetInferenceBundleResponse` -- `repeated ResolvedRoute routes` + `revision` + `generated_at_ms`
+- `ResolvedRoute` -- `name`, `base_url`, `protocols`, `api_key`, `model_id`, `provider_type`
 
-## Tri-State Network Decision
+## Data Plane (Sandbox)
 
-The OPA engine evaluates every CONNECT request and returns one of three routing actions via the `network_action` rule. This replaces the binary allow/deny model with a third option that triggers inference interception.
+Files:
 
-### `NetworkAction` enum
+- `crates/navigator-sandbox/src/proxy.rs` -- proxy interception, inference context, request routing
+- `crates/navigator-sandbox/src/l7/inference.rs` -- pattern detection, HTTP parsing, response formatting
+- `crates/navigator-sandbox/src/lib.rs` -- inference context initialization, route refresh
+- `crates/navigator-sandbox/src/grpc_client.rs` -- `fetch_inference_bundle()`
 
-**File:** `crates/navigator-sandbox/src/opa.rs`
+### Interception flow
 
-```rust
-pub enum NetworkAction {
-    Allow { matched_policy: Option<String> },
-    InspectForInference { matched_policy: Option<String> },
-    Deny { reason: String },
-}
-```
+The proxy handles only `CONNECT` requests to `inference.local`. Non-CONNECT requests (any method, any host) are rejected with `403`.
 
-### Decision logic
+When a `CONNECT inference.local:443` arrives:
 
-The `evaluate_network_action()` method evaluates `data.navigator.sandbox.network_action` and maps the string result:
+1. Proxy responds `200 Connection Established`.
+2. `handle_inference_interception()` TLS-terminates the client connection using the sandbox CA (MITM).
+3. Raw HTTP requests are parsed from the TLS tunnel using `try_parse_http_request()` (supports Content-Length and chunked transfer encoding).
+4. Each parsed request is passed to `route_inference_request()`.
+5. The tunnel supports HTTP keep-alive: multiple requests can be processed sequentially.
+6. Buffer starts at 64 KiB (`INITIAL_INFERENCE_BUF`) and grows up to 10 MiB (`MAX_INFERENCE_BUF`). Requests exceeding the max get `413 Payload Too Large`.
 
-| Rego result | Rust variant | Meaning |
-|-------------|--------------|---------|
-| `"allow"` | `NetworkAction::Allow` | Endpoint + binary explicitly matched a `network_policies` entry |
-| `"inspect_for_inference"` | `NetworkAction::InspectForInference` | No policy match, but `data.inference.allowed_routes` is non-empty |
-| `"deny"` (default) | `NetworkAction::Deny` | No match and no inference routing configured |
+### Request classification
 
-### Rego rules
+File: `crates/navigator-sandbox/src/l7/inference.rs` -- `default_patterns()` and `detect_inference_pattern()`
 
-**File:** `crates/navigator-sandbox/data/sandbox-policy.rego`
-
-```rego
-default network_action := "deny"
-
-# Explicitly allowed: endpoint + binary match in a network policy.
-network_action := "allow" if {
-    network_policy_for_request
-}
-
-# Binary not explicitly allowed + inference configured -> inspect.
-network_action := "inspect_for_inference" if {
-    not network_policy_for_request
-    count(data.inference.allowed_routes) > 0
-}
-```
-
-The `inspect_for_inference` rule fires when the connection does not match any network policy but the sandbox has at least one configured inference route. This covers both unknown endpoints (e.g., `api.openai.com` not in any policy) and known endpoints where the calling binary is not in the allowed list.
-
-## Inference API Pattern Detection
-
-**File:** `crates/navigator-sandbox/src/l7/inference.rs`
-
-The `InferenceApiPattern` struct defines method + path combinations that identify inference API calls. `default_patterns()` returns the built-in set:
+Supported built-in patterns:
 
 | Method | Path | Protocol | Kind |
 |--------|------|----------|------|
@@ -205,464 +125,125 @@ The `InferenceApiPattern` struct defines method + path combinations that identif
 | `POST` | `/v1/completions` | `openai_completions` | `completion` |
 | `POST` | `/v1/responses` | `openai_responses` | `responses` |
 | `POST` | `/v1/messages` | `anthropic_messages` | `messages` |
+| `GET` | `/v1/models` | `model_discovery` | `models_list` |
+| `GET` | `/v1/models/*` | `model_discovery` | `models_get` |
 
-`detect_inference_pattern()` strips query strings before matching (splitting on `?`). Matching is method-case-insensitive and path-exact -- no glob patterns. Only the path portion (before `?`) is compared against `path_glob`.
-
-```rust
-pub fn detect_inference_pattern<'a>(
-    method: &str,
-    path: &str,
-    patterns: &'a [InferenceApiPattern],
-) -> Option<&'a InferenceApiPattern> {
-    let path_only = path.split('?').next().unwrap_or(path);
-    patterns
-        .iter()
-        .find(|p| method.eq_ignore_ascii_case(&p.method) && path_only == p.path_glob)
-}
-```
-
-## Proxy-Side Interception
-
-**File:** `crates/navigator-sandbox/src/proxy.rs`
-
-When OPA returns `InspectForInference`, the proxy calls `handle_inference_interception()` instead of connecting to the upstream server. The proxy never establishes a connection to the original target.
-
-### `InferenceContext`
-
-```rust
-pub struct InferenceContext {
-    pub patterns: Vec<crate::l7::inference::InferenceApiPattern>,
-    router: navigator_router::Router,
-    routes: Arc<tokio::sync::RwLock<Vec<navigator_router::config::ResolvedRoute>>>,
-}
-```
-
-Built at sandbox startup in `crates/navigator-sandbox/src/lib.rs` by `build_inference_context()`. Contains a `Router` (reqwest HTTP client) and a shared route cache. The `route_cache()` method exposes the `Arc<RwLock<...>>` handle for the background refresh task.
-
-### `handle_inference_interception()` flow
+Query strings are stripped before matching. Path matching is exact for most patterns; `/v1/models/*` matches any sub-path (e.g. `/v1/models/gpt-4.1`). Absolute-form URIs (e.g. `https://inference.local/v1/chat/completions`) are normalized to path-only form by `normalize_inference_path()` before detection.
 
-1. **Validate prerequisites**: Both `InferenceContext` (router + routes) and `ProxyTlsState` (ephemeral CA) must be present. Missing either is a fatal error for the connection.
+If no pattern matches, the proxy returns `403 Forbidden` with `{"error": "connection not allowed by policy"}`.
 
-2. **TLS-terminate the client**: Call `tls_terminate_client()` to present an ephemeral leaf certificate for the original target host (e.g., `api.openai.com`). The sandboxed SDK sees a valid TLS connection via the sandbox CA that was injected into its trust store at startup.
+### Route cache
 
-3. **Read HTTP requests in a loop** (supports HTTP keep-alive):
-   - Start with a 64 KiB buffer (`INITIAL_INFERENCE_BUF = 65536`). If the buffer fills before a complete request is parsed, it doubles in size up to 10 MiB (`MAX_INFERENCE_BUF`). Exceeding 10 MiB returns a `413 Payload Too Large` response.
-   - Parse the request using `try_parse_http_request()` which extracts method, path, headers, and body. Both `Content-Length` and `Transfer-Encoding: chunked` request framing are supported (chunked bodies are decoded before forwarding).
+- `InferenceContext` holds a `Router`, the pattern list, and an `Arc<RwLock<Vec<ResolvedRoute>>>` route cache.
+- In cluster mode, `spawn_route_refresh()` polls `GetInferenceBundle` every 30 seconds (`ROUTE_REFRESH_INTERVAL_SECS`). On failure, stale routes are kept.
+- In file mode (`--inference-routes`), routes load once at startup from YAML. No refresh task is spawned.
+- In cluster mode, an empty initial bundle still enables the inference context so the refresh task can pick up later configuration.
 
-4. **For each parsed request** (delegated to `route_inference_request()`):
-   - If `detect_inference_pattern()` matches:
-     - Strip credential and framing/hop-by-hop headers (`Authorization`, `x-api-key`, `host`, `content-length`, and all hop-by-hop headers)
-     - Acquire a read lock on the route cache
-      - If routes are empty, return `503` JSON: `{"error": "inference endpoint detected without matching inference route"}`
-     - Call `Router::proxy_with_candidates()` to select a route and forward the request locally
-     - Return the backend's response to the client (response hop-by-hop and framing headers are stripped before formatting)
-   - If no pattern matches:
-      - Return a `403` JSON error: `{"error": "connection not allowed by policy"}`
-   - If the router call fails:
-     - Map the `RouterError` to an HTTP status via `router_error_to_http()` and return a JSON error
+### Bundle-to-route conversion
 
-5. **Shift the buffer** for the next request (supports pipelining within the connection).
+`bundle_to_resolved_routes()` in `lib.rs` converts proto `ResolvedRoute` messages to router `ResolvedRoute` structs. Auth header style and default headers are derived from `provider_type` using `navigator_core::inference::auth_for_provider_type()`.
 
-### Router error mapping
+## Router Behavior
 
-`router_error_to_http()` translates `RouterError` variants to HTTP status codes:
+Files:
 
-| `RouterError` variant | HTTP status | Example message |
-|----------------------|-------------|-----------------|
-| `RouteNotFound` | 400 | "no route configured for routing_hint 'local'" |
-| `NoCompatibleRoute` | 400 | "no compatible route for source protocol 'openai_chat_completions'" |
-| `Unauthorized` | 401 | (error message) |
-| `UpstreamUnavailable` | 503 | "request to ... timed out" |
-| `UpstreamProtocol` / `Internal` | 502 | (error message) |
-
-### Integration with the proxy decision flow
-
-The interception path branches from `handle_tcp_connection()` after OPA evaluation:
-
-```mermaid
-flowchart TD
-    A[CONNECT request] --> B[OPA evaluate_network_action]
-    B --> C{Result?}
-    C -- Deny --> D[403 Forbidden]
-    C -- InspectForInference --> E[handle_inference_interception]
-    C -- Allow --> F[SSRF check + TCP connect upstream]
-    E --> G[TLS terminate client]
-    G --> H[Parse HTTP request]
-    H --> I{Inference pattern?}
-    I -- Yes --> J[Strip auth + local router proxy]
-    J --> K[Return response to client]
-    I -- No --> L[403 JSON error]
-    F --> M{L7 config?}
-    M -- Yes --> N[L7 relay_with_inspection]
-    M -- No --> O[copy_bidirectional]
-```
-
-## Gateway: Bundle Delivery
-
-**File:** `crates/navigator-server/src/inference.rs`
-
-The gateway's `InferenceService` implements the `Inference` gRPC service. It handles route CRUD operations and bundle delivery. It does not hold a `Router` instance and does not execute inference requests.
-
-### GetSandboxInferenceBundle RPC
-
-The entry point for sandbox route loading. Processes requests from sandbox processes at startup and during periodic refresh.
-
-1. **Validate** `sandbox_id` is present (else `INVALID_ARGUMENT`).
-
-2. **Load the sandbox** from the store via `get_message::<Sandbox>()`. Returns `NOT_FOUND` if the sandbox does not exist.
-
-3. **Extract the inference policy**: Navigate `sandbox.spec.policy.inference`. If the `inference` field is absent, return `PERMISSION_DENIED` ("sandbox has no inference policy configured"). If `allowed_routes` is empty, return `PERMISSION_DENIED` ("sandbox inference policy has no allowed routes").
-
-4. **Resolve routes** via `list_sandbox_routes()`:
-   - Fetch all `InferenceRoute` records from the store (up to 500)
-   - Decode each from protobuf
-   - Filter: `enabled == true` AND `routing_hint` is in `allowed_routes` (uses a `HashSet` for O(1) lookup)
-   - Normalize and deduplicate protocols per route
-   - Skip routes with no valid protocols after normalization
-   - Return `Vec<SandboxResolvedRoute>`
-
-5. **Compute revision**: Hash all route fields (routing_hint, base_url, model_id, api_key, protocols) with `DefaultHasher` and format as 16-char hex. This allows sandboxes to detect stale bundles.
-
-6. **Return the response**: `GetSandboxInferenceBundleResponse` with routes, revision, and `generated_at_ms` timestamp.
-
-### Route CRUD RPCs
-
-| RPC | Behavior |
-|-----|----------|
-| `CreateInferenceRoute` | Validates spec, normalizes protocols (lowercase + dedupe), auto-generates name if empty (via `generate_name()`), checks for name uniqueness, assigns UUID, persists |
-| `UpdateInferenceRoute` | Looks up existing route by name, preserves the stored `id`, replaces the spec |
-| `DeleteInferenceRoute` | Deletes by name via `delete_by_name()`, returns `deleted: bool` |
-| `ListInferenceRoutes` | Paginated list (default limit 100), decodes protobuf from store records |
-
-### Route validation
-
-`validate_route_spec()` checks that required fields are non-empty:
-
-- `routing_hint` -- the label that sandbox policies reference
-- `base_url` -- backend endpoint URL
-- `protocols` -- at least one protocol after normalization
-- `model_id` -- model identifier to use
-
-Note: `api_key` is not validated as required on the server side. Routes can be stored with an empty `api_key`, which is valid for local backends that do not require authentication. The `api_key` defaults to empty string in the CLI (`--api-key`, default `""`).
-
-### Protocol normalization
-
-`normalize_protocols()` (in `crates/navigator-core/src/inference.rs`) transforms the protocol list: trim whitespace, convert to lowercase, remove duplicates (preserving insertion order), remove empty entries. This function is shared between the server and router crates.
-
-## Inference Router
-
-**File:** `crates/navigator-router/src/lib.rs`
-
-The `Router` struct holds a `reqwest::Client` with a 60-second timeout and an optional set of static routes (used for config-file-based routing via `Router::from_config()`).
+- `crates/navigator-router/src/lib.rs` -- `Router`, `proxy_with_candidates()`
+- `crates/navigator-router/src/backend.rs` -- `proxy_to_backend()`, URL construction
+- `crates/navigator-router/src/config.rs` -- `RouteConfig`, `ResolvedRoute`, YAML loading
 
 ### Route selection
 
-`proxy_with_candidates()` takes a `source_protocol` (e.g., `"openai_chat_completions"`) and an externally-provided list of `ResolvedRoute` candidates. It:
-
-1. Normalizes `source_protocol` to lowercase.
-2. Finds the **first** candidate whose `protocols` list contains an exact match.
-3. Returns `NoCompatibleRoute` if no candidate matches.
-
-```rust
-let route = candidates
-    .iter()
-    .find(|r| r.protocols.iter().any(|p| p == &normalized_source))
-    .ok_or_else(|| RouterError::NoCompatibleRoute(source_protocol.to_string()))?;
-```
-
-### Mock route support
-
-**File:** `crates/navigator-router/src/mock.rs`
-
-Routes with a `mock://` endpoint scheme return canned responses without making HTTP calls. Mock responses are protocol-aware:
-
-| Protocol | Response shape |
-|----------|---------------|
-| `openai_chat_completions` | Valid OpenAI chat completion JSON |
-| `openai_completions` | Valid OpenAI text completion JSON |
-| `anthropic_messages` | Valid Anthropic messages JSON |
-| Other | Generic JSON with `mock: true` |
+`proxy_with_candidates()` finds the first route whose `protocols` list contains the detected source protocol (normalized to lowercase). If no route matches, returns `RouterError::NoCompatibleRoute`.
 
-All mock responses include an `x-navigator-mock: true` header and use the route's `model` field in the response body.
+### Request rewriting
 
-### Backend proxying
+`proxy_to_backend()` rewrites outgoing requests:
 
-**File:** `crates/navigator-router/src/backend.rs`
+1. **Auth injection**: Uses the route's `AuthHeader` -- either `Authorization: Bearer <key>` or a custom header (e.g. `x-api-key: <key>` for Anthropic).
+2. **Header stripping**: Removes `authorization`, `x-api-key`, `host`, and any header names that will be set from route defaults.
+3. **Default headers**: Applies route-level default headers (e.g. `anthropic-version: 2023-06-01`) unless the client already sent them.
+4. **Model rewrite**: Parses the request body as JSON and replaces the `model` field with the route's configured model. Non-JSON bodies are forwarded unchanged.
+5. **URL construction**: `build_backend_url()` appends the request path to the route endpoint. If the endpoint already ends with `/v1` and the request path starts with `/v1/`, the duplicate prefix is deduplicated.
 
-`proxy_to_backend()` forwards the HTTP request to the real backend:
+### Header sanitization
 
-1. **Construct URL**: `{route.endpoint.trim_end('/')}{path}` (e.g., `http://localhost:1234/v1` + `/chat/completions` = `http://localhost:1234/v1/chat/completions`). Note: the path from the original request is appended as-is. If the route's `base_url` already includes the API prefix, the path may double up -- route configuration should account for this.
+Before forwarding inference requests, the proxy strips sensitive and hop-by-hop headers from both requests and responses:
 
-2. **Set Authorization**: `Bearer {route.api_key}` via `builder.bearer_auth()`.
+- **Request**: `authorization`, `x-api-key`, `host`, `content-length`, and hop-by-hop headers (`connection`, `keep-alive`, `proxy-authenticate`, `proxy-authorization`, `proxy-connection`, `te`, `trailer`, `transfer-encoding`, `upgrade`).
+- **Response**: `content-length` and hop-by-hop headers.
 
-3. **Forward headers**: All headers except `authorization` and `host` are forwarded from the original request.
+### Mock routes
 
-4. **Model ID rewrite**: If the request body is valid JSON containing a `"model"` key, the value is replaced with `route.model`. This ensures the backend receives the model ID it serves, not the client's original model alias. If the body is not JSON or has no `"model"` key, it is forwarded unchanged.
+File: `crates/navigator-router/src/mock.rs`
 
-5. **Timeout**: 60 seconds (set at `Client` construction time).
+Routes with `mock://` scheme endpoints return canned responses without making HTTP requests. Mock responses are protocol-aware (OpenAI chat completion, OpenAI completion, Anthropic messages, or generic JSON). Mock routes include an `x-navigator-mock: true` response header.
 
-6. **Error classification**:
+### HTTP client
 
-| Condition | Error variant |
-|-----------|--------------|
-| Request timeout | `RouterError::UpstreamUnavailable` |
-| Connection failure | `RouterError::UpstreamUnavailable` |
-| Response body read failure | `RouterError::UpstreamProtocol` |
-| Invalid HTTP method | `RouterError::Internal` |
-| Other request errors | `RouterError::Internal` |
+The router uses a `reqwest::Client` with a 60-second timeout. Timeouts and connection failures map to `RouterError::UpstreamUnavailable`.
 
-## Data Model
+## Standalone Route File
 
-### InferenceRoute (protobuf)
+File: `crates/navigator-router/src/config.rs`
 
-**File:** `proto/inference.proto`
-
-```protobuf
-message InferenceRoute {
-  string id = 1;              // UUID, assigned at creation
-  InferenceRouteSpec spec = 2;
-  string name = 3;            // Human-friendly, unique per object type
-}
-
-message InferenceRouteSpec {
-  string routing_hint = 1;    // Label for policy matching (e.g., "local")
-  string base_url = 2;        // Backend endpoint URL
-  repeated string protocols = 3; // Supported protocols (e.g., ["openai_chat_completions"])
-  string api_key = 4;         // API key for the backend (may be empty)
-  string model_id = 5;        // Model identifier
-  bool enabled = 6;           // Whether route is active
-}
-```
-
-Persisted in the `objects` table with `object_type = "inference_route"`, using protobuf encoding.
-
-### InferencePolicy (protobuf)
-
-**File:** `proto/sandbox.proto`
-
-```protobuf
-message InferencePolicy {
-  repeated string allowed_routes = 1; // e.g., ["local", "frontier"]
-  repeated InferenceApiPattern api_patterns = 2; // Custom patterns (unused, defaults apply)
-}
-```
-
-A field on `SandboxPolicy`, referenced by the OPA engine as `data.inference.allowed_routes`.
-
-The `api_patterns` field is intended to allow per-sandbox pattern customization. The sandbox code does not currently read this field -- it always calls `default_patterns()` from `crates/navigator-sandbox/src/l7/inference.rs`. The proto comment notes: "If empty, built-in defaults (OpenAI chat/completions) are used."
-
-### SandboxResolvedRoute (protobuf)
-
-**File:** `proto/inference.proto`
-
-```protobuf
-message SandboxResolvedRoute {
-  string routing_hint = 1;
-  string base_url = 2;
-  repeated string protocols = 3;
-  string api_key = 4;
-  string model_id = 5;
-}
-```
-
-Returned by `GetSandboxInferenceBundle`. Contains the fields needed for routing -- the gateway pre-filters routes so the sandbox receives only those matching its policy.
-
-### GetSandboxInferenceBundle (protobuf)
-
-**File:** `proto/inference.proto`
-
-```protobuf
-message GetSandboxInferenceBundleRequest {
-  string sandbox_id = 1;
-}
-
-message GetSandboxInferenceBundleResponse {
-  repeated SandboxResolvedRoute routes = 1;
-  string revision = 2;           // Opaque hash for cache freshness
-  int64 generated_at_ms = 3;     // Epoch ms when bundle was generated
-}
-```
-
-### ResolvedRoute (Rust)
-
-**File:** `crates/navigator-router/src/config.rs`
-
-```rust
-pub struct ResolvedRoute {
-    pub routing_hint: String,
-    pub endpoint: String,
-    pub model: String,
-    pub api_key: String,
-    pub protocols: Vec<String>,
-}
-```
-
-Created either from `RouterConfig::resolve_routes()` (file mode) or `bundle_to_resolved_routes()` (cluster mode). Contains only the fields needed for routing. Implements a custom `Debug` that redacts `api_key` as `[REDACTED]`.
-
-## Policy Configuration
-
-### Sandbox policy (YAML)
-
-The `inference` key in a sandbox policy YAML file controls which routes the sandbox can use:
-
-```yaml
-inference:
-  allowed_routes:
-    - local        # Matches routes with routing_hint "local"
-    - frontier     # Matches routes with routing_hint "frontier"
-```
-
-When `allowed_routes` is non-empty, the OPA engine returns `inspect_for_inference` for any connection that does not explicitly match a `network_policies` entry. When `allowed_routes` is empty or the `inference` key is absent, unmatched connections are denied.
-
-### Route configuration (file mode)
-
-For standalone sandboxes (no cluster), routes are configured in a YAML file and passed via `--inference-routes` or `NEMOCLAW_INFERENCE_ROUTES`:
+Standalone sandboxes can load static routes from YAML via `--inference-routes`:
 
 ```yaml
 routes:
-  - routing_hint: local
+  - route: inference.local
     endpoint: http://localhost:1234/v1
-    model: meta/llama-3.1-8b-instruct
-    protocols: [openai_chat_completions, openai_responses]
+    model: local-model
+    protocols: [openai_chat_completions]
     api_key: lm-studio
-
-  - routing_hint: frontier
-    endpoint: https://api.anthropic.com
-    model: claude-sonnet-4-20250514
-    protocols: [anthropic_messages]
-    api_key_env: ANTHROPIC_API_KEY
-```
-
-### Route configuration (cluster mode)
-
-Routes are stored in the gateway's database and managed via the `nav inference` CLI commands or the gRPC API:
-
-```
-routing_hint: local
-base_url: http://localhost:1234/v1
-protocols: [openai_chat_completions]
-api_key: lm-studio
-model_id: meta/llama-3.1-8b-instruct
-enabled: true
-```
-
-The `routing_hint` field connects sandbox policy to server-side routes: a sandbox with `allowed_routes: ["local"]` can use any enabled route whose `routing_hint` is `"local"`.
-
-## CLI Commands
-
-**File:** `crates/navigator-cli/src/main.rs` (command definitions), `crates/navigator-cli/src/run.rs` (implementations)
-
-| Command | Description |
-|---------|-------------|
-| `nav inference create` | Create an inference route. Accepts `--routing-hint`, `--base-url`, `--protocol` (repeatable or comma-separated), `--api-key` (default empty), `--model-id`, `--disabled`. Auto-generates a name unless `--name` is provided. |
-| `nav inference update` | Update an existing route by name. Same flags as create. |
-| `nav inference delete` | Delete one or more routes by name. |
-| `nav inference list` | List all routes. Supports `--limit` (default 100) and `--offset`. |
-
-The `create` and `update` commands perform protocol auto-detection when `--protocol` is not specified: they probe the backend URL with the provided API key and model to determine supported protocols, showing a spinner during the process.
-
-## Dev Sandbox Workflow
-
-**File:** `tasks/cluster.toml` (task `["cluster:sandbox"]`), `inference-routes.yaml` (repo root)
-
-Running `mise run cluster:sandbox` starts a standalone sandbox container with inference routing pre-configured. The task mounts three files into the container:
-
-- `crates/navigator-sandbox/data/sandbox-policy.rego` as `/var/navigator/policy.rego`
-- `deploy/docker/sandbox/dev-sandbox-policy.yaml` as `/var/navigator/data.yaml`
-- `inference-routes.yaml` as `/var/navigator/inference-routes.yaml`
-
-The container receives `NEMOCLAW_INFERENCE_ROUTES=/var/navigator/inference-routes.yaml` to enable standalone inference routing. `NVIDIA_API_KEY` is always forwarded from the host environment (empty string if unset).
-
-The default `inference-routes.yaml` defines a single route:
-
-```yaml
-routes:
-  - routing_hint: local
-    endpoint: https://integrate.api.nvidia.com/
-    model: nvidia/nemotron-3-nano-30b-a3b
-    protocols:
-      - openai_chat_completions
-      - openai_completions
-    api_key_env: NVIDIA_API_KEY
+    # Or reference an environment variable:
+    # api_key_env: OPENAI_API_KEY
 ```
 
-The `-e` flag forwards arbitrary host environment variables into the container:
-
-```bash
-mise run cluster:sandbox -e ANTHROPIC_API_KEY -- /bin/bash
-```
-
-This checks whether the named variable is set in the host environment and passes it through. Unset variables produce a warning and are skipped.
-
-## Security: API Key Handling
-
-The migration moves API keys from the gateway's memory into each sandbox's memory. This changes the blast radius of a sandbox compromise.
-
-### Previous model
-
-API keys lived only in the gateway's database and process memory. Sandboxes sent inference requests to the gateway via gRPC; the gateway looked up routes, injected the API key, and forwarded to the backend. A compromised sandbox could use the gateway as an oracle to make inference calls but could not extract the raw API key.
-
-### Current model
-
-API keys are delivered to the sandbox in the route bundle (`SandboxResolvedRoute.api_key`) and held in `ResolvedRoute` structs within sandbox memory. The sandbox's `Router` injects the key directly when forwarding to backends. A compromised sandbox can read its own memory and extract the keys.
-
-### Mitigations
-
-1. **Scoped bundles**: The gateway filters routes by the sandbox's `allowed_routes` policy before delivering the bundle. A sandbox only receives keys for routes it is authorized to use.
-
-2. **Custom Debug redaction**: `ResolvedRoute` implements `Debug` with `api_key` rendered as `[REDACTED]`. This prevents keys from leaking into logs or debug output.
+Fields:
 
-3. **No child process injection**: API keys are not injected into the entrypoint process's environment or into SSH shell environments. They exist only within the Rust proxy/router structures in the sandbox supervisor process.
+- `route` -- route name (informational)
+- `endpoint` -- backend base URL
+- `model` -- model ID to force on outgoing requests
+- `protocols` -- list of supported protocol strings
+- `provider_type` -- optional; determines auth style and default headers via `InferenceProviderProfile`
+- `api_key` -- inline API key (mutually exclusive with `api_key_env`)
+- `api_key_env` -- environment variable name containing the API key
 
-4. **Landlock and seccomp**: The sandboxed child process runs under Landlock and seccomp restrictions that prevent it from reading the supervisor's memory (the supervisor runs outside the sandbox namespace).
+Validation at load time requires either `api_key` or `api_key_env` to resolve, and at least one protocol. Protocols are normalized (lowercased, trimmed, deduplicated).
 
-## Deployment: Hard Cutover
+## Error Model
 
-The inference routing migration is a breaking protocol change. The `ProxyInference` RPC has been removed from the proto definition and the gateway no longer implements it. Sandboxes running the new code call `GetSandboxInferenceBundle` instead.
+| Status | Condition |
+|--------|-----------|
+| `403` | Request on `inference.local` does not match a recognized inference API pattern |
+| `503` | Pattern matched but route cache is empty (cluster inference not configured) |
+| `400` | No compatible route for the detected source protocol |
+| `401` | Upstream returned unauthorized |
+| `502` | Upstream protocol error or internal router error |
+| `503` | Upstream unavailable (timeout or connection failure) |
+| `413` | Request body exceeds 10 MiB buffer limit |
 
-**Deployment requirements:**
-- Server and sandbox must be released together.
-- Running sandboxes must be restarted after the upgrade -- they cannot be live-migrated.
-- There is no backward-compatible fallback period. Old sandboxes calling the removed `ProxyInference` RPC will get `UNIMPLEMENTED` errors from the updated gateway.
+## CLI Surface
 
-## Error Handling
+Cluster inference commands:
 
-### Proxy-side errors
+- `nemoclaw cluster inference set --provider <name> --model <id>` -- configures cluster inference by referencing a provider record name
+- `nemoclaw cluster inference get` -- displays current cluster inference configuration
 
-| Condition | Behavior |
-|-----------|----------|
-| `InferenceContext` missing | Error: "InspectForInference requires inference context (router + routes)" |
-| TLS state not configured | Error: "InspectForInference requires TLS state for client termination" |
-| Request exceeds 10 MiB buffer | `413` Payload Too Large response to client |
-| Non-inference request on intercepted connection | `403` JSON error: `{"error": "connection not allowed by policy"}` |
-| No routes in cache | `503` JSON error: `{"error": "inference endpoint detected without matching inference route"}` |
-| Router returns `NoCompatibleRoute` | `400` JSON error |
-| Backend timeout or connection failure | `503` JSON error |
-| Backend protocol error or internal error | `502` JSON error |
+The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, or `nvidia`).
 
-### Gateway-side errors (bundle delivery)
+## Provider Discovery
 
-| Condition | gRPC status |
-|-----------|-------------|
-| Empty `sandbox_id` | `INVALID_ARGUMENT` |
-| Sandbox not found | `NOT_FOUND` |
-| Sandbox has no inference policy | `PERMISSION_DENIED` |
-| Inference policy has empty `allowed_routes` | `PERMISSION_DENIED` |
-| Store read failure | `INTERNAL` |
+Files:
 
-### Router-side errors
+- `crates/navigator-providers/src/lib.rs` -- `ProviderRegistry`, `ProviderPlugin` trait
+- `crates/navigator-providers/src/providers/openai.rs` -- `OpenaiProvider`
+- `crates/navigator-providers/src/providers/anthropic.rs` -- `AnthropicProvider`
+- `crates/navigator-providers/src/providers/nvidia.rs` -- `NvidiaProvider`
 
-| Condition | `RouterError` variant | HTTP status (via proxy) |
-|-----------|----------------------|------------------------|
-| No compatible route for protocol | `NoCompatibleRoute` | 400 |
-| Backend timeout (60s) | `UpstreamUnavailable` | 503 |
-| Backend connection failure | `UpstreamUnavailable` | 503 |
-| Response body read failure | `UpstreamProtocol` | 502 |
-| Invalid HTTP method | `Internal` | 502 |
+Provider discovery and inference routing are separate concerns:
 
-## Cross-References
+- `ProviderPlugin` (in `navigator-providers`) handles credential *discovery* -- scanning environment variables to find API keys.
+- `InferenceProviderProfile` (in `navigator-core`) handles how to *use* discovered credentials to make inference API calls.
 
-- [Sandbox Architecture](sandbox.md) -- Proxy, OPA engine, TLS termination, `NetworkAction` integration
-- [Gateway Architecture](gateway.md) -- gRPC service hosting, `ServerState`, persistence store
-- [Policy Language](security-policy.md) -- Rego rules including `network_action`
-- [Overview](README.md) -- System-wide context
+The `openai`, `anthropic`, and `nvidia` provider plugins each discover credentials from their canonical environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `NVIDIA_API_KEY`). These credentials are stored in provider records and looked up by the gateway at bundle resolution time.
diff --git a/architecture/sandbox-connect.md b/architecture/sandbox-connect.md
index 56681672..0905da6f 100644
--- a/architecture/sandbox-connect.md
+++ b/architecture/sandbox-connect.md
@@ -6,7 +6,7 @@ Sandbox connect provides secure remote access into running sandbox environments.
 
 1. **Interactive shell** (`sandbox connect`) -- opens a PTY-backed SSH session for interactive use
 2. **Command execution** (`sandbox create -- <cmd>`) -- runs a command over SSH with stdout/stderr piped back
-3. **File sync** (`sandbox create --sync`) -- rsyncs local files into the sandbox before command execution
+3. **File sync** (`sandbox create --upload`) -- uploads local files into the sandbox before command execution
 
 All three modes tunnel SSH traffic through the gateway's multiplexed port using HTTP CONNECT. The gateway authenticates each connection with a short-lived session token, then performs a custom NSSH1 handshake with the sandbox's embedded SSH daemon before bridging raw bytes between client and sandbox.
 
@@ -150,9 +150,9 @@ The `sandbox exec` path is identical to interactive connect except:
 - The command string is passed as the final SSH argument
 - The sandbox daemon routes it through `exec_request()` instead of `shell_request()`, spawning `/bin/bash -lc <command>`
 
-### Port Forwarding (`sandbox forward start`)
+### Port Forwarding (`forward start`)
 
-`nemoclaw sandbox forward start <port> <name>` opens a local SSH tunnel so connections to `127.0.0.1:<port>`
+`nemoclaw forward start <port> <name>` opens a local SSH tunnel so connections to `127.0.0.1:<port>`
 on the host are forwarded to `127.0.0.1:<port>` inside the sandbox.
 
 #### CLI
@@ -162,20 +162,20 @@ on the host are forwarded to `127.0.0.1:<port>` inside the sandbox.
 - By default stays attached in foreground until interrupted (Ctrl+C).
 - With `-d`/`--background`, SSH forks after auth and the CLI exits. The PID is
   tracked in `~/.config/nemoclaw/forwards/<name>-<port>.pid` along with sandbox id metadata.
-- `nemoclaw sandbox forward stop <port> <name>` validates PID ownership and then kills a background forward.
-- `nemoclaw sandbox forward list` shows all tracked forwards.
-- `nemoclaw sandbox forward stop` and `nemoclaw sandbox forward list` are local operations and do not require
+- `nemoclaw forward stop <port> <name>` validates PID ownership and then kills a background forward.
+- `nemoclaw forward list` shows all tracked forwards.
+- `nemoclaw forward stop` and `nemoclaw forward list` are local operations and do not require
   resolving an active cluster.
 - `nemoclaw sandbox create --forward <port>` starts a background forward before connect/exec, including
   when no trailing command is provided.
 - `nemoclaw sandbox delete` auto-stops any active forwards for the deleted sandbox.
 
-#### TUI (Gator)
+#### TUI
 
 The TUI (`crates/navigator-tui/`) supports port forwarding through the create sandbox modal. Users
 specify comma-separated ports in the **Ports** field. After sandbox creation:
 
-1. Gator polls for `Ready` state (up to 30 attempts at 2-second intervals).
+1. The TUI polls for `Ready` state (up to 30 attempts at 2-second intervals).
 2. Creates an SSH session via `CreateSshSession` gRPC.
 3. Spawns background SSH tunnels (`ssh -N -f -L <port>:127.0.0.1:<port>`) for each port.
 4. Sends a `ForwardResult` event back to the main loop with the outcome.
@@ -183,7 +183,7 @@ specify comma-separated ports in the **Ports** field. After sandbox creation:
 Active forwards are displayed in the sandbox table's NOTES column (e.g., `fwd:8080,3000`) and in
 the sandbox detail view's Forwards row.
 
-When deleting a sandbox, Gator calls `stop_forwards_for_sandbox()` before sending the delete
+When deleting a sandbox, the TUI calls `stop_forwards_for_sandbox()` before sending the delete
 request. PID tracking uses the same `~/.config/nemoclaw/forwards/` directory as the CLI.
 
 #### Shared forward module
@@ -278,29 +278,29 @@ File sync uses **tar-over-SSH**: the CLI streams a tar archive through the exist
 
 **Files**: `crates/navigator-cli/src/ssh.rs`, `crates/navigator-cli/src/run.rs`
 
-#### `sandbox create --sync`
+#### `sandbox create --upload`
 
-When `--sync` is passed to `sandbox create`, the CLI pushes local git-tracked files into `/sandbox` after the sandbox reaches `Ready` and before any command runs.
+When `--upload` is passed to `sandbox create`, the CLI pushes local files into `/sandbox` (or a specified destination) after the sandbox reaches `Ready` and before any command runs.
 
 1. `git_repo_root()` determines the repository root via `git rev-parse --show-toplevel`
 2. `git_sync_files()` lists files with `git ls-files -co --exclude-standard -z` (tracked + untracked, respecting gitignore, null-delimited)
 3. `sandbox_sync_up_files()` creates an SSH session config, spawns `ssh <proxy> sandbox "tar xf - -C /sandbox"`, and streams a tar archive of the file list to the SSH child's stdin using the `tar` crate
 4. Files land in `/sandbox` inside the container
 
-#### `nemoclaw sandbox sync` command
+#### `nemoclaw sandbox upload` / `nemoclaw sandbox download`
 
-The standalone `sandbox sync` subcommand supports bidirectional file transfer:
+Standalone commands support bidirectional file transfer:
 
 ```bash
 # Push local files up to sandbox
-nemoclaw sandbox sync <name> --up <local-path> [<sandbox-path>]
+nemoclaw sandbox upload <name> <local-path> [<sandbox-path>]
 
 # Pull sandbox files down to local
-nemoclaw sandbox sync <name> --down <sandbox-path> [<local-path>]
+nemoclaw sandbox download <name> <sandbox-path> [<local-path>]
 ```
 
-- **Push (`--up`)**: `sandbox_sync_up()` streams a tar archive of the local path to `ssh ... tar xf - -C <dest>` on the sandbox side. Default destination: `/sandbox`.
-- **Pull (`--down`)**: `sandbox_sync_down()` runs `ssh ... tar cf - -C <dir> <path>` on the sandbox side and extracts the output locally via `tar::Archive`. Default destination: `.` (current directory).
+- **Upload**: `sandbox_upload()` streams a tar archive of the local path to `ssh ... tar xf - -C <dest>` on the sandbox side. Default destination: `/sandbox`.
+- **Download**: `sandbox_download()` runs `ssh ... tar cf - -C <dir> <path>` on the sandbox side and extracts the output locally via `tar::Archive`. Default destination: `.` (current directory).
 - No compression for v1 — the SSH tunnel is local-network; compression adds CPU cost with marginal bandwidth savings.
 
 #### Why tar-over-SSH instead of rsync
diff --git a/architecture/sandbox.md b/architecture/sandbox.md
index 6335cd8b..68166771 100644
--- a/architecture/sandbox.md
+++ b/architecture/sandbox.md
@@ -14,7 +14,7 @@ All paths are relative to `crates/navigator-sandbox/src/`.
 | `policy.rs` | `SandboxPolicy`, `NetworkPolicy`, `ProxyPolicy`, `LandlockPolicy`, `ProcessPolicy` structs and proto conversions |
 | `opa.rs` | OPA/Rego policy engine using `regorus` crate -- network evaluation, sandbox config queries, L7 endpoint queries |
 | `process.rs` | `ProcessHandle` for spawning child processes, privilege dropping, signal handling |
-| `proxy.rs` | HTTP CONNECT proxy with OPA evaluation, process-identity binding, and L7 dispatch |
+| `proxy.rs` | HTTP CONNECT proxy with OPA evaluation, process-identity binding, inference interception, and L7 dispatch |
 | `ssh.rs` | Embedded SSH server (`russh` crate) with PTY support and handshake verification |
 | `identity.rs` | `BinaryIdentityCache` -- SHA256 trust-on-first-use binary integrity |
 | `procfs.rs` | `/proc` filesystem reading for TCP peer identity resolution and ancestor chain walking |
@@ -25,7 +25,7 @@ All paths are relative to `crates/navigator-sandbox/src/`.
 | `sandbox/linux/seccomp.rs` | Syscall filtering via BPF on `SYS_socket` |
 | `sandbox/linux/netns.rs` | Network namespace creation, veth pair setup, cleanup on drop |
 | `l7/mod.rs` | L7 types (`L7Protocol`, `TlsMode`, `EnforcementMode`, `L7EndpointConfig`), config parsing, validation, access preset expansion |
-| `l7/inference.rs` | Inference API pattern detection (`InferenceApiPattern`, `default_patterns()`, `detect_inference_pattern()`) and HTTP request/response parsing for intercepted inference connections |
+| `l7/inference.rs` | Inference API pattern detection (`detect_inference_pattern()`), HTTP request/response parsing and formatting for intercepted inference connections |
 | `l7/tls.rs` | Ephemeral CA generation (`SandboxCa`), per-hostname leaf cert cache (`CertCache`), TLS termination/connection helpers |
 | `l7/relay.rs` | Protocol-aware bidirectional relay with per-request OPA evaluation |
 | `l7/rest.rs` | HTTP/1.1 request/response parsing, body framing (Content-Length, chunked), deny response generation |
@@ -74,7 +74,7 @@ flowchart TD
 
 1. **Policy loading** (`load_policy()`):
    - Priority 1: `--policy-rules` + `--policy-data` provided -- load OPA engine from local Rego file and YAML data file via `OpaEngine::from_files()`. Query `query_sandbox_config()` for filesystem/landlock/process settings. Network mode forced to `Proxy`.
-   - Priority 2: `--sandbox-id` + `--navigator-endpoint` provided -- fetch typed proto policy via `grpc_client::fetch_policy()`. If the proto contains `network_policies` OR has non-empty `inference.allowed_routes`, create OPA engine via `OpaEngine::from_proto()` using baked-in Rego rules. Convert proto to `SandboxPolicy` via `TryFrom`.
+   - Priority 2: `--sandbox-id` + `--navigator-endpoint` provided -- fetch typed proto policy via `grpc_client::fetch_policy()`. Create OPA engine via `OpaEngine::from_proto()` using baked-in Rego rules. Convert proto to `SandboxPolicy` via `TryFrom`, which always forces `NetworkMode::Proxy` so that all egress passes through the proxy and the `inference.local` virtual host is always addressable.
    - Neither present: return fatal error.
    - Output: `(SandboxPolicy, Option<Arc<OpaEngine>>)`
 
@@ -163,7 +163,7 @@ The network mode determines which enforcement mechanisms activate:
 | `Proxy` | Blocks `AF_NETLINK`, `AF_PACKET`, `AF_BLUETOOTH`, `AF_VSOCK` (allows `AF_INET`/`AF_INET6`) | Yes (Linux) | Yes | Controlled network via proxy + OPA |
 | `Allow` | No seccomp filter | No | No | Unrestricted network (seccomp skipped entirely) |
 
-In gRPC mode, the mode is derived from the proto: if `network_policies` is non-empty, mode is `Proxy`; otherwise `Block`. In file mode, the mode is always `Proxy` (the presence of `--policy-rules` implies network policy evaluation).
+In gRPC mode, the mode is always `Proxy`. The `SandboxPolicy::try_from()` conversion forces `NetworkMode::Proxy` unconditionally so that all egress passes through the proxy and the `inference.local` virtual host is always addressable. In file mode, the mode is also always `Proxy` (the presence of `--policy-rules` implies network policy evaluation).
 
 ### Policy loading modes
 
@@ -195,7 +195,7 @@ The Rego rules are compiled into the binary via `include_str!("../data/sandbox-p
 | Rule | Type | Purpose |
 |------|------|---------|
 | `allow_network` | bool | L4 allow/deny decision for a CONNECT request |
-| `network_action` | string | Tri-state routing decision: `"allow"`, `"inspect_for_inference"`, or `"deny"` |
+| `network_action` | string | Routing decision: `"allow"` or `"deny"` |
 | `deny_reason` | string | Human-readable deny reason |
 | `matched_network_policy` | string | Name of the matched policy rule |
 | `matched_endpoint_config` | object | Full endpoint config for L7 inspection lookup |
@@ -223,21 +223,9 @@ The inner `regorus::Engine` requires `&mut self` for evaluation, so access is se
 
 All loading methods run the same preprocessing pipeline: L7 validation (errors block startup, warnings are logged), then access preset expansion (e.g., `access: "read-only"` becomes explicit `rules` with GET/HEAD/OPTIONS).
 
-### Policy data: inference section
-
-The OPA data includes an `inference` key controlling inference interception. In YAML (file mode):
-
-```yaml
-inference:
-  allowed_routes:
-    - local
-```
-
-In proto mode, the `SandboxPolicy.inference` field maps to the same structure. When `allowed_routes` is non-empty, the `network_action` Rego rule returns `"inspect_for_inference"` for connections that do not match any explicit network policy. When `allowed_routes` is empty or absent, unmatched connections are denied as before. The `from_proto()` method defaults to an empty `allowed_routes` list when the proto's `inference` field is `None`.
-
 ### Network evaluation
 
-Two evaluation methods exist: `evaluate_network()` for the legacy bool-based path, and `evaluate_network_action()` for the tri-state routing path used by the proxy.
+Two evaluation methods exist: `evaluate_network()` for the legacy bool-based path, and `evaluate_network_action()` for the two-state routing path used by the proxy.
 
 #### `evaluate_network(input: &NetworkInput) -> Result<PolicyDecision>`
 
@@ -265,28 +253,25 @@ Returns `PolicyDecision { allowed, reason, matched_policy }`.
 
 #### `evaluate_network_action(input: &NetworkInput) -> Result<NetworkAction>`
 
-Uses the same input JSON shape as `evaluate_network()`. Evaluates the `data.navigator.sandbox.network_action` Rego rule, which returns one of three string values:
+Uses the same input JSON shape as `evaluate_network()`. Evaluates the `data.navigator.sandbox.network_action` Rego rule, which returns one of two string values:
 
 - `"allow"` -- endpoint + binary explicitly matched in a network policy
-- `"inspect_for_inference"` -- no policy match but `inference.allowed_routes` is non-empty
 - `"deny"` -- network connections not allowed by policy
 
 The Rego logic:
 1. If `network_policy_for_request` exists (endpoint + binary match), return `"allow"`
-2. If no match but `count(data.inference.allowed_routes) > 0`, return `"inspect_for_inference"`
-3. Default: `"deny"`
+2. Default: `"deny"`
 
-Returns `NetworkAction`, an enum with three variants:
+Returns `NetworkAction`, an enum with two variants:
 
 ```rust
 pub enum NetworkAction {
     Allow { matched_policy: Option<String> },
-    InspectForInference { matched_policy: Option<String> },
     Deny { reason: String },
 }
 ```
 
-The proxy calls `evaluate_network_action()` (not `evaluate_network()`) as its main decision path. This enables the proxy to route unmatched connections through inference interception when the sandbox has inference routing configured.
+The proxy calls `evaluate_network_action()` (not `evaluate_network()`) as its main decision path. Connections to the `inference.local` virtual host bypass OPA evaluation entirely and are handled by the [inference interception](#inference-interception) path before the OPA check.
 
 ### L7 endpoint config query
 
@@ -309,14 +294,13 @@ Both methods hold the `Mutex` only for the final swap (`*engine = new_engine`),
 
 **File:** `crates/navigator-sandbox/src/lib.rs` (`run_policy_poll_loop()`)
 
-In gRPC mode, the sandbox can receive policy updates at runtime without restarting. A background task polls the gateway for new policy versions and hot-reloads the OPA engine when changes are detected. Only **dynamic** policy domains (network rules and inference routing) can change at runtime; **static** domains (filesystem, Landlock, process) are applied once in the pre-exec closure and cannot be modified after the child process spawns.
+In gRPC mode, the sandbox can receive policy updates at runtime without restarting. A background task polls the gateway for new policy versions and hot-reloads the OPA engine when changes are detected. Only **dynamic** policy domains (network rules) can change at runtime; **static** domains (filesystem, Landlock, process) are applied once in the pre-exec closure and cannot be modified after the child process spawns.
 
 ### Dynamic vs static policy domains
 
 | Domain | Mutable at runtime | Applied where | Reason |
 |--------|-------------------|---------------|--------|
 | `network_policies` | Yes | OPA engine (proxy evaluates per-CONNECT) | Engine swap updates all future evaluations |
-| `inference` | Yes | OPA engine (proxy evaluates per-CONNECT) | Same mechanism as network policies |
 | `filesystem` | No | Landlock LSM in pre-exec | Kernel-enforced; cannot be modified after `restrict_self()` |
 | `landlock` | No | Landlock LSM in pre_exec | Configuration for the above; same restriction |
 | `process` | No | `setuid`/`setgid` in pre-exec | Privileges dropped irrevocably before exec |
@@ -367,7 +351,7 @@ The `run_policy_poll_loop()` function in `crates/navigator-sandbox/src/lib.rs` i
 
 **File:** `crates/navigator-sandbox/src/grpc_client.rs`
 
-`CachedNavigatorClient` is a persistent gRPC client for the `Navigator` service, analogous to `CachedInferenceClient` for the `Inference` service. It wraps a `NavigatorClient<Channel>` connected once at construction and reused for all subsequent calls.
+`CachedNavigatorClient` is a persistent gRPC client for the `Navigator` service. It wraps a `NavigatorClient<Channel>` connected once at construction and reused for all subsequent calls.
 
 ```rust
 pub struct CachedNavigatorClient {
@@ -385,6 +369,7 @@ Methods:
 - **`connect(endpoint)`**: Establish an mTLS channel and return a new client.
 - **`poll_policy(sandbox_id)`**: Call `GetSandboxPolicy` RPC and return a `PolicyPollResult` containing the policy, version, and hash.
 - **`report_policy_status(sandbox_id, version, loaded, error_msg)`**: Call `ReportPolicyStatus` RPC with the appropriate `PolicyStatus` enum value (`Loaded` or `Failed`).
+- **`raw_client()`**: Return a clone of the underlying `NavigatorClient<Channel>` for direct RPC calls (used by the log push task).
 
 ### Server-side policy versioning
 
@@ -504,7 +489,7 @@ If namespace creation fails (e.g., missing capabilities), startup fails in `Prox
 
 **File:** `crates/navigator-sandbox/src/proxy.rs`
 
-The proxy is an async TCP listener that accepts HTTP CONNECT requests. Each connection spawns a handler task. The proxy evaluates every CONNECT request against OPA policy with full process-identity binding.
+The proxy is an async TCP listener that accepts HTTP CONNECT requests. Each connection spawns a handler task. The proxy evaluates every CONNECT request against OPA policy with full process-identity binding, except for connections to the `inference.local` virtual host which bypass OPA and are handled by the inference interception path.
 
 ### Connection flow
 
@@ -518,31 +503,33 @@ sequenceDiagram
     participant Backend as Inference Backend
     participant U as Upstream Server
 
-    S->>P: CONNECT api.example.com:443 HTTP/1.1
+    S->>P: CONNECT host:port HTTP/1.1
     P->>P: Parse CONNECT target (host, port)
-    P->>P: Resolve TCP peer identity via /proc
-    P->>P: TOFU verify binary SHA256
-    P->>P: Walk ancestor chain, verify each
-    P->>P: Collect cmdline paths
-    P->>O: evaluate_network_action(input)
-    O-->>P: NetworkAction (Allow / InspectForInference / Deny)
-    P->>P: Log CONNECT decision (unified log line)
-    alt Deny
-        P-->>S: HTTP/1.1 403 Forbidden
-        else InspectForInference
-            P-->>S: HTTP/1.1 200 Connection Established
-            P->>P: TLS-terminate client (SandboxCa)
-            P->>P: Parse HTTP request from tunnel
-            alt Inference API pattern matched
-                P->>P: Strip Authorization header
-                P->>R: proxy_with_candidates(protocol, method, path, headers, body, routes)
-                R->>Backend: POST /v1/chat/completions (with route API key)
-                Backend-->>R: HTTP response
-                R-->>P: ProxyResponse(status, headers, body)
-                P-->>S: HTTP response (re-encrypted via TLS)
-            else Non-inference request
-                P-->>S: HTTP/1.1 403 JSON error
-            end
+
+    alt Target is inference.local
+        P-->>S: HTTP/1.1 200 Connection Established
+        P->>P: TLS-terminate client (SandboxCa)
+        P->>P: Parse HTTP request from tunnel
+        alt Inference API pattern matched
+            P->>P: Strip Authorization header
+            P->>R: proxy_with_candidates(protocol, method, path, headers, body, routes)
+            R->>Backend: POST /v1/chat/completions (with route API key)
+            Backend-->>R: HTTP response
+            R-->>P: ProxyResponse(status, headers, body)
+            P-->>S: HTTP response (re-encrypted via TLS)
+        else Non-inference request
+            P-->>S: HTTP/1.1 403 JSON error
+        end
+    else Regular host
+        P->>P: Resolve TCP peer identity via /proc
+        P->>P: TOFU verify binary SHA256
+        P->>P: Walk ancestor chain, verify each
+        P->>P: Collect cmdline paths
+        P->>O: evaluate_network_action(input)
+        O-->>P: NetworkAction (Allow / Deny)
+        P->>P: Log CONNECT decision (unified log line)
+        alt Deny
+            P-->>S: HTTP/1.1 403 Forbidden
         else Allow
             P->>DNS: resolve_and_reject_internal(host, port)
             DNS-->>P: Resolved addresses
@@ -578,9 +565,13 @@ Startup steps:
 
 The proxy reads up to 8192 bytes (`MAX_HEADER_BYTES`) looking for `\r\n\r\n`. It validates the method is `CONNECT` (returning 403 for anything else with a structured log) and parses the `host:port` target.
 
+### `inference.local` interception (pre-OPA fast path)
+
+After parsing the CONNECT target, the proxy checks whether the hostname (lowercased) matches `INFERENCE_LOCAL_HOST` (`"inference.local"`). If it does, the proxy immediately sends `200 Connection Established` and hands the connection to `handle_inference_interception()`, bypassing OPA evaluation entirely. This design ensures `inference.local` is always addressable in proxy mode regardless of what network policies are configured.
+
 ### OPA evaluation with identity binding (`evaluate_opa_tcp()`)
 
-This is the core security evaluation path, Linux-only (requires `/proc`).
+For all non-`inference.local` CONNECT targets, the proxy performs OPA evaluation with process-identity binding. This is the core security evaluation path, Linux-only (requires `/proc`).
 
 ```mermaid
 flowchart TD
@@ -607,7 +598,7 @@ On non-Linux platforms, `evaluate_opa_tcp()` always denies with the reason "iden
 
 ```rust
 struct ConnectDecision {
-    action: NetworkAction,          // Allow, InspectForInference, or Deny
+    action: NetworkAction,          // Allow or Deny
     binary: Option<PathBuf>,
     binary_pid: Option<u32>,
     ancestors: Vec<PathBuf>,
@@ -615,21 +606,19 @@ struct ConnectDecision {
 }
 ```
 
-The `action` field carries the matched policy name (for `Allow` and `InspectForInference`) or the deny reason (for `Deny`) inside the `NetworkAction` enum variants. This replaces the previous `allowed: bool` + `matched_policy` + `reason` fields.
+The `action` field carries the matched policy name (for `Allow`) or the deny reason (for `Deny`) inside the `NetworkAction` enum variants.
 
 ### Unified logging
 
-Every CONNECT request produces an `info!()` log line with all context: source/destination addresses, binary path, PID, ancestor chain, cmdline paths, action (`allow`, `inspect_for_inference`, or `deny`), engine, matched policy, and deny reason.
-
-For `InspectForInference` connections, the initial log records `action=inspect_for_inference`. If the subsequent inference interception fails (TLS handshake failure, client disconnect, request not allowed by policy, payload too large, missing context, or I/O error), a second `CONNECT` log is emitted with `action=deny` and a `reason` describing the failure. Successfully routed connections produce no second log. This two-log pattern gives operators visibility into why an `inspect_for_inference` decision ultimately resulted in a denial.
+Every CONNECT request to a non-`inference.local` target produces an `info!()` log line with all context: source/destination addresses, binary path, PID, ancestor chain, cmdline paths, action (`allow` or `deny`), engine, matched policy, and deny reason. Inference interception failures produce a separate `info!()` log with `action=deny` and the denial reason.
 
 ### SSRF protection (internal IP rejection)
 
 After OPA allows a connection, the proxy resolves DNS and rejects any host that resolves to an internal IP address (loopback, RFC 1918 private, link-local, or IPv4-mapped IPv6 equivalents). This defense-in-depth measure prevents SSRF attacks where an allowed hostname is pointed at internal infrastructure. The check is implemented by `resolve_and_reject_internal()` which calls `tokio::net::lookup_host()` and validates every resolved address via `is_internal_ip()`. If any resolved IP is internal, the connection receives a `403 Forbidden` response and a warning is logged. See [SSRF Protection](security-policy.md#ssrf-protection-internal-ip-rejection) for the full list of blocked ranges.
 
-### Inference interception (`InspectForInference` path)
+### Inference interception
 
-When OPA returns `InspectForInference`, the proxy does not connect to the upstream server. Instead, it TLS-terminates the client side and inspects the HTTP traffic to detect inference API calls. Matched requests are executed locally via the `navigator-router` crate. The function `handle_inference_interception()` implements this path and returns an `InferenceOutcome`:
+When a CONNECT target is `inference.local`, the proxy TLS-terminates the client side and inspects the HTTP traffic to detect inference API calls. Matched requests are executed locally via the `navigator-router` crate. The function `handle_inference_interception()` implements this path and returns an `InferenceOutcome`:
 
 ```rust
 enum InferenceOutcome {
@@ -640,7 +629,7 @@ enum InferenceOutcome {
 }
 ```
 
-Every exit path in `handle_inference_interception` produces an explicit outcome. The `Denied` variant carries a human-readable reason describing the failure. At the call site in `handle_tcp_connection`, `Denied` outcomes (and `Err` results) trigger a structured CONNECT deny log with the same fields as the initial decision log (see [Unified logging](#unified-logging)). The `route_inference_request` helper returns `Result<bool>` where `true` means the request was routed and `false` means the request was not allowed by policy and was denied inline.
+Every exit path in `handle_inference_interception` produces an explicit outcome. The `Denied` variant carries a human-readable reason describing the failure. At the call site in `handle_tcp_connection`, `Denied` outcomes trigger a structured CONNECT deny log with the denial reason. The `route_inference_request` helper returns `Result<bool>` where `true` means the request was routed and `false` means the request was not allowed by policy and was denied inline.
 
 The interception steps:
 
@@ -656,8 +645,10 @@ The interception steps:
    | `POST` | `/v1/completions` | `openai_completions` | `completion` |
    | `POST` | `/v1/responses` | `openai_responses` | `responses` |
    | `POST` | `/v1/messages` | `anthropic_messages` | `messages` |
+   | `GET` | `/v1/models` | `model_discovery` | `models_list` |
+   | `GET` | `/v1/models/*` | `model_discovery` | `models_get` |
 
-   Pattern matching strips query strings and uses exact path comparison (not glob).
+   Pattern matching strips query strings. Exact path comparison is used for most patterns; the `/v1/models/*` pattern matches `/v1/models` itself or any path under `/v1/models/` (e.g., `/v1/models/gpt-4.1`).
 
 4. **Header sanitization**: For matched inference requests, the proxy strips credential headers (`Authorization`, `x-api-key`) and framing/hop-by-hop headers (`host`, `content-length`, `transfer-encoding`, `connection`, etc.). The router rebuilds correct framing for the forwarded body.
 
@@ -666,7 +657,7 @@ The interception steps:
 6. **Response handling**:
    - On success: the router's response (status code, headers, body) is formatted as an HTTP/1.1 response and sent back to the client after stripping response framing/hop-by-hop headers (`transfer-encoding`, `content-length`, `connection`, etc.)
    - On router failure: the error is mapped to an HTTP status code via `router_error_to_http()` and returned as a JSON error body (see error table below)
-   - Empty route cache: returns `503` JSON error (`{"error": "inference endpoint detected without matching inference route"}`)
+   - Empty route cache: returns `503` JSON error (`{"error": "cluster inference is not configured"}`)
    - Non-inference requests: returns `403 Forbidden` with a JSON error body (`{"error": "connection not allowed by policy"}`)
 
 7. **Connection lifecycle**: The handler loops to process multiple HTTP requests on the same connection (HTTP keep-alive). The loop ends when the client closes the connection or an unrecoverable error occurs. Once at least one request has been successfully routed (`routed_any` flag), subsequent failures (client disconnect, I/O error, payload too large, request not allowed by policy) are treated as clean termination (`InferenceOutcome::Routed`) rather than denials.
@@ -677,7 +668,7 @@ When `Router::proxy_with_candidates()` returns an error, `router_error_to_http()
 
 | `RouterError` variant | HTTP status | Response body |
 |----------------------|-------------|---------------|
-| `RouteNotFound(hint)` | `400` | `no route configured for routing_hint '{hint}'` |
+| `RouteNotFound(hint)` | `400` | `no route configured for route '{hint}'` |
 | `NoCompatibleRoute(protocol)` | `400` | `no compatible route for source protocol '{protocol}'` |
 | `Unauthorized(msg)` | `401` | `{msg}` |
 | `UpstreamUnavailable(msg)` | `503` | `{msg}` |
@@ -701,13 +692,13 @@ pub struct InferenceContext {
 
 #### Design decision: standalone capability
 
-The sandbox is designed to operate both as part of a NemoClaw cluster and as a standalone component without any cluster infrastructure. This is intentional — it enables local development workflows (e.g., a developer running a sandbox against a local LLM server without deploying the full NemoClaw stack), CI/CD environments where sandboxes run as isolated test harnesses, and air-gapped deployments where the gateway is not available. Everything the sandbox needs — policy, inference routes — can be provided without any dependency on the control plane.
+The sandbox is designed to operate both as part of a cluster and as a standalone component without any cluster infrastructure. This is intentional -- it enables local development workflows (e.g., a developer running a sandbox against a local LLM server without deploying the full stack), CI/CD environments where sandboxes run as isolated test harnesses, and air-gapped deployments where the gateway is not available. Everything the sandbox needs -- policy, inference routes -- can be provided without any dependency on the control plane.
 
 #### Route sources (priority order)
 
 1. **Route file (standalone mode)**: `--inference-routes` / `NEMOCLAW_INFERENCE_ROUTES` points to a YAML file parsed by `RouterConfig::load_from_file()`. Routes are resolved via `config.resolve_routes()`. File loading or parsing errors are fatal (fail-fast), but an empty route list gracefully disables inference routing (returns `None`). The route file always takes precedence -- if both a route file and cluster credentials are present, the route file wins and the cluster bundle is not fetched.
 
-2. **Cluster bundle (cluster mode)**: When `sandbox_id` and `navigator_endpoint` are available (and no route file is configured), routes are fetched from the gateway via `grpc_client::fetch_inference_bundle()`, which calls the `GetSandboxInferenceBundle` gRPC RPC on the `Inference` service. The gateway returns a `GetSandboxInferenceBundleResponse` containing pre-filtered `SandboxResolvedRoute` entries (routes whose `routing_hint` matches the sandbox's `allowed_routes` policy). These proto messages are converted to `ResolvedRoute` structs by `bundle_to_resolved_routes()`.
+2. **Cluster bundle (cluster mode)**: When `navigator_endpoint` is available (and no route file is configured), routes are fetched from the gateway via `grpc_client::fetch_inference_bundle()`, which calls the `GetInferenceBundle` gRPC RPC on the `Inference` service. The RPC takes no arguments (the bundle is cluster-scoped, not per-sandbox). The gateway returns a `GetInferenceBundleResponse` containing resolved `ResolvedRoute` entries for the managed cluster route. These proto messages are converted to router `ResolvedRoute` structs by `bundle_to_resolved_routes()`, which maps provider types to auth headers and default headers via `navigator_core::inference::auth_for_provider_type()`.
 
 3. **No source**: If neither route file nor cluster credentials are configured, `build_inference_context()` returns `None` and inference routing is disabled.
 
@@ -722,7 +713,7 @@ Route sources handle empty route lists differently: file mode disables inference
 
 #### Background route cache refresh
 
-In cluster mode (when no route file is configured), `spawn_route_refresh()` starts a background tokio task that refreshes the route cache every 30 seconds. The task calls `fetch_inference_bundle()` on each tick and replaces the `RwLock<Vec<ResolvedRoute>>` contents. On fetch failure, the task logs a warning and keeps the stale routes. The `MissedTickBehavior::Skip` policy prevents refresh storms after temporary gateway outages.
+In cluster mode (when no route file is configured), `spawn_route_refresh()` starts a background tokio task that refreshes the route cache every 30 seconds (`ROUTE_REFRESH_INTERVAL_SECS`). The task calls `fetch_inference_bundle()` on each tick and replaces the `RwLock<Vec<ResolvedRoute>>` contents. On fetch failure, the task logs a warning and keeps the stale routes. The `MissedTickBehavior::Skip` policy prevents refresh storms after temporary gateway outages.
 
 ```mermaid
 flowchart TD
@@ -950,10 +941,15 @@ Wraps `tokio::process::Child` + PID. Platform-specific `spawn()` methods delegat
 Resolves user/group names from policy, then:
 1. `initgroups()` to set supplementary groups (Linux only, not macOS)
 2. `setgid()` to target group
-3. `setuid()` to target user
+3. Verify `getegid()` matches the target GID
+4. `setuid()` to target user
+5. Verify `geteuid()` matches the target UID
+6. Verify `setuid(0)` fails (confirms root cannot be re-acquired)
 
 The ordering is significant: `initgroups`/`setgid` must happen before `setuid` because switching user may drop the privileges needed for group manipulation. Similarly, privilege dropping must happen before Landlock because Landlock may block access to `/etc/passwd` and `/etc/group`.
 
+Steps 3, 5, and 6 are defense-in-depth post-condition checks (CWE-250 / CERT POS37-C). All three syscalls (`geteuid`, `getegid`, `setuid`) are async-signal-safe, so they are safe to call in the `pre_exec` context. The checks add negligible overhead while guarding against hypothetical kernel-level defects that could cause `setuid`/`setgid` to return success without actually changing the effective IDs.
+
 ### `ProcessStatus`
 
 Exit code is `code` if the process exited normally, or `128 + signal` if killed by a signal (standard Unix convention). Returns `-1` if neither is available.
@@ -1125,14 +1121,14 @@ Dual-output logging is configured in `main.rs`:
 - **`/var/log/navigator.log`**: Fixed at `info` level, no ANSI, non-blocking writer
 
 Key structured log events:
-- `CONNECT`: One per proxy CONNECT request with full identity context. `InspectForInference` connections that are ultimately denied produce a second `CONNECT action=deny` log with the denial reason.
+- `CONNECT`: One per proxy CONNECT request (for non-`inference.local` targets) with full identity context. Inference interception failures produce a separate `info!()` log with `action=deny` and the denial reason.
 - `L7_REQUEST`: One per L7-inspected request with method, path, and decision
 - Sandbox lifecycle events: process start, exit, namespace creation/cleanup
 - Policy reload events: new version detected, reload success/failure, status report outcomes
 
 ## Log Streaming
 
-In gRPC mode, sandbox supervisor logs are streamed to the gateway in real time. This enables operators and CLI users to view both gateway-side and sandbox-side logs in a unified stream via `nav sandbox logs`.
+In gRPC mode, sandbox supervisor logs are streamed to the gateway in real time. This enables operators and CLI users to view both gateway-side and sandbox-side logs in a unified stream via `nav logs`.
 
 ### Architecture overview
 
@@ -1244,7 +1240,7 @@ The `SandboxLogLine.fields` map (`map<string, string>` in proto) carries tracing
 | Field | Source | Description |
 |-------|--------|-------------|
 | `dst_host` | Proxy CONNECT log | Destination hostname |
-| `action` | Proxy CONNECT log | `allow`, `deny`, or `inspect_for_inference` |
+| `action` | Proxy CONNECT log | `allow` or `deny` |
 | `policy` | Proxy CONNECT log | Matched policy name |
 | `version` | Policy reload log | New policy version number |
 | `policy_hash` | Policy reload log | SHA256 hash of new policy |
@@ -1255,20 +1251,20 @@ Gateway-sourced logs do not currently populate the `fields` map (it remains empt
 
 **File:** `crates/navigator-cli/src/main.rs` (command definition), `crates/navigator-cli/src/run.rs` (`sandbox_logs()`)
 
-The `nav sandbox logs` command supports filtering by source and level:
+The `nav logs` command supports filtering by source and level:
 
 ```bash
 # Show only sandbox-side logs
-nav sandbox logs my-sandbox --source sandbox
+nav logs my-sandbox --source sandbox
 
 # Show only warnings and errors from the gateway
-nav sandbox logs my-sandbox --source gateway --level warn
+nav logs my-sandbox --source gateway --level warn
 
 # Stream live logs from all sources
-nav sandbox logs my-sandbox --tail
+nav logs my-sandbox --tail
 
 # Stream live sandbox logs only
-nav sandbox logs my-sandbox --tail --source sandbox
+nav logs my-sandbox --tail --source sandbox
 ```
 
 **CLI flags:**
@@ -1323,7 +1319,7 @@ sequenceDiagram
     participant BG as Background push task
     participant GW as Gateway (push_sandbox_logs)
     participant TB as TracingLogBus
-    participant CL as CLI (nav sandbox logs)
+    participant CL as CLI (nav logs)
 
     SB->>LP: tracing event (info!(...))
     LP->>LP: Check level >= NEMOCLAW_LOG_PUSH_LEVEL
diff --git a/architecture/security-policy.md b/architecture/security-policy.md
index 30321050..bf156764 100644
--- a/architecture/security-policy.md
+++ b/architecture/security-policy.md
@@ -36,14 +36,14 @@ When the sandbox runs inside a managed cluster, it fetches its typed protobuf po
 ```bash
 navigator-sandbox \
   --sandbox-id abc123 \
-  --nemoclaw-endpoint https://navigator:8080 \
+  --navigator-endpoint https://navigator:8080 \
   -- /bin/bash
 ```
 
-| Flag                   | Environment Variable   | Description                  |
-| ---------------------- | ---------------------- | ---------------------------- |
-| `--sandbox-id`         | `NEMOCLAW_SANDBOX_ID` | Sandbox ID for policy lookup |
-| `--nemoclaw-endpoint` | `NEMOCLAW_ENDPOINT`   | Gateway gRPC endpoint        |
+| Flag                     | Environment Variable   | Description                  |
+| ------------------------ | ---------------------- | ---------------------------- |
+| `--sandbox-id`           | `NEMOCLAW_SANDBOX_ID` | Sandbox ID for policy lookup |
+| `--navigator-endpoint`   | `NEMOCLAW_ENDPOINT`   | Gateway gRPC endpoint        |
 
 The gateway returns a `SandboxPolicy` protobuf message (defined in `proto/sandbox.proto`). The sandbox supervisor converts this proto into JSON, validates L7 config, expands presets, and loads it into the OPA engine using baked-in Rego rules (`sandbox-policy.rego` compiled via `include_str!`). See `crates/navigator-sandbox/src/opa.rs` -- `OpaEngine::from_proto()`.
 
@@ -53,7 +53,7 @@ The gateway returns a `SandboxPolicy` protobuf message (defined in `proto/sandbo
 flowchart TD
     START[Sandbox Startup] --> CHECK{File mode?<br/>--policy-rules +<br/>--policy-data}
     CHECK -->|Yes| FILE[Read .rego + .yaml from disk]
-    CHECK -->|No| NEMOCLAW{gRPC mode?<br/>--sandbox-id +<br/>--nemoclaw-endpoint}
+    CHECK -->|No| NEMOCLAW{gRPC mode?<br/>--sandbox-id +<br/>--navigator-endpoint}
     NEMOCLAW -->|Yes| FETCH[Fetch SandboxPolicy proto via gRPC]
     NEMOCLAW -->|No| ERR[Error: no policy source]
 
@@ -69,11 +69,11 @@ flowchart TD
 
 ### Priority
 
-File mode takes precedence. If both `--policy-rules`/`--policy-data` and `--sandbox-id`/`--nemoclaw-endpoint` are provided, file mode is used. See `crates/navigator-sandbox/src/lib.rs` -- `load_policy()`.
+File mode takes precedence. If both `--policy-rules`/`--policy-data` and `--sandbox-id`/`--navigator-endpoint` are provided, file mode is used. See `crates/navigator-sandbox/src/lib.rs` -- `load_policy()`.
 
 ## Live Policy Updates
 
-Policy can be updated on a running sandbox without restarting it. This enables operators to tighten or relax network access rules and inference routing in response to changing requirements.
+Policy can be updated on a running sandbox without restarting it. This enables operators to tighten or relax network access rules in response to changing requirements.
 
 Live updates are only available in **gRPC mode** (production clusters). File-mode sandboxes load policy once at startup and do not poll for changes.
 
@@ -84,7 +84,7 @@ Policy fields fall into two categories based on when they are enforced:
 | Category | Fields | Enforcement Point | Updatable? |
 |----------|--------|-------------------|------------|
 | **Static** | `filesystem_policy`, `landlock`, `process` | Applied once in the child process `pre_exec` (after `fork()`, before `exec()`). Kernel-level Landlock rulesets and UID/GID changes cannot be reversed. | No -- immutable after sandbox creation |
-| **Dynamic** | `network_policies`, `inference` | Evaluated at runtime by the OPA engine on every proxy CONNECT request and L7 rule check. The OPA engine can be atomically replaced. | Yes -- via `nav sandbox policy set` |
+| **Dynamic** | `network_policies`, `inference` | Evaluated at runtime by the OPA engine on every proxy CONNECT request and L7 rule check. The OPA engine can be atomically replaced. | Yes -- via `nemoclaw policy set` |
 
 Attempting to change a static field in an update request returns an `INVALID_ARGUMENT` error with a message indicating which field cannot be modified. See `crates/navigator-server/src/grpc.rs` -- `validate_static_fields_unchanged()`.
 
@@ -103,7 +103,7 @@ The update mechanism uses a poll-based model with versioned policy revisions and
 
 ```mermaid
 sequenceDiagram
-    participant CLI as nav sandbox policy set
+    participant CLI as nav policy set
     participant GW as Gateway (navigator-server)
     participant DB as Persistence (SQLite/Postgres)
     participant SB as Sandbox (navigator-sandbox)
@@ -160,8 +160,7 @@ The hash is computed as follows:
 1. Hash the `version` field as little-endian bytes.
 2. Hash the `filesystem`, `landlock`, and `process` sub-messages via `encode_to_vec()` (these contain no `map` fields, so encoding is deterministic).
 3. Collect `network_policies` entries, sort by map key, then hash each key (as UTF-8 bytes) followed by the value's `encode_to_vec()`.
-4. Hash the `inference` sub-message via `encode_to_vec()`.
-5. Return the hex-encoded SHA-256 digest.
+4. Return the hex-encoded SHA-256 digest.
 
 This guarantees that the same logical policy always produces the same hash regardless of protobuf serialization order.
 
@@ -211,32 +210,32 @@ Failure scenarios that trigger LKG behavior include:
 
 ### CLI Commands
 
-The `nav sandbox policy` subcommand group manages live policy updates:
+The `nav policy` subcommand group manages live policy updates:
 
 ```bash
 # Push a new policy to a running sandbox
-nav sandbox policy set <sandbox-name> --policy updated-policy.yaml
+nav policy set <sandbox-name> --policy updated-policy.yaml
 
 # Push and wait for the sandbox to load it (with 60s timeout)
-nav sandbox policy set <sandbox-name> --policy updated-policy.yaml --wait
+nav policy set <sandbox-name> --policy updated-policy.yaml --wait
 
 # Push and wait with a custom timeout
-nav sandbox policy set <sandbox-name> --policy updated-policy.yaml --wait --timeout 120
+nav policy set <sandbox-name> --policy updated-policy.yaml --wait --timeout 120
 
 # View the current active policy and its status
-nav sandbox policy get <sandbox-name>
+nav policy get <sandbox-name>
 
 # Inspect a specific revision
-nav sandbox policy get <sandbox-name> --rev 3
+nav policy get <sandbox-name> --rev 3
 
 # Print the full policy as YAML (round-trips with --policy input format)
-nav sandbox policy get <sandbox-name> --full
+nav policy get <sandbox-name> --full
 
 # Combine: inspect a specific revision's full policy
-nav sandbox policy get <sandbox-name> --rev 2 --full
+nav policy get <sandbox-name> --rev 2 --full
 
 # List policy revision history
-nav sandbox policy list <sandbox-name> --limit 20
+nav policy list <sandbox-name> --limit 20
 ```
 
 #### `policy get` flags
@@ -244,11 +243,11 @@ nav sandbox policy list <sandbox-name> --limit 20
 | Flag | Default | Description |
 |------|---------|-------------|
 | `--rev N` | `0` (latest) | Retrieve a specific policy revision by version number instead of the latest. Maps to the `version` field of `GetSandboxPolicyStatusRequest` -- version `0` resolves to the latest revision server-side. |
-| `--full` | off | Print the complete policy as YAML after the metadata summary. The YAML output uses the same schema as the `--policy` input file, so it round-trips: you can save it to a file and pass it back to `nav sandbox policy set --policy`. |
+| `--full` | off | Print the complete policy as YAML after the metadata summary. The YAML output uses the same schema as the `--policy` input file, so it round-trips: you can save it to a file and pass it back to `nav policy set --policy`. |
 
-When `--full` is specified, the server includes the deserialized `SandboxPolicy` protobuf in the `SandboxPolicyRevision.policy` field (see `crates/navigator-server/src/grpc.rs` -- `policy_record_to_revision()` with `include_policy: true`). The CLI converts this proto back to YAML via `policy_to_yaml()`, which uses a `BTreeMap` for `network_policies` to produce deterministic key ordering. See `crates/navigator-cli/src/run.rs` -- `policy_to_yaml()`, `sandbox_policy_get()`.
+When `--full` is specified, the server includes the deserialized `SandboxPolicy` protobuf in the `SandboxPolicyRevision.policy` field (see `crates/navigator-server/src/grpc.rs` -- `policy_record_to_revision()` with `include_policy: true`). The CLI converts this proto back to YAML via `policy_to_yaml()`, which uses a `BTreeMap` for `network_policies` to produce deterministic key ordering. See `crates/navigator-cli/src/run.rs` -- `policy_to_yaml()`, `policy_get()`.
 
-See `crates/navigator-cli/src/main.rs` -- `PolicyCommands` enum, `crates/navigator-cli/src/run.rs` -- `sandbox_policy_set()`, `sandbox_policy_get()`, `sandbox_policy_list()`.
+See `crates/navigator-cli/src/main.rs` -- `PolicyCommands` enum, `crates/navigator-cli/src/run.rs` -- `policy_set()`, `policy_get()`, `policy_list()`.
 
 ---
 
@@ -259,7 +258,7 @@ The YAML data file contains top-level keys that map directly to the OPA data nam
 ### Top-Level Structure
 
 ```yaml
-# Optional version field (currently informational)
+# Required version field
 version: 1
 
 # Filesystem access policy (applied at startup via Landlock)
@@ -284,9 +283,6 @@ network_policies:
     endpoints: []
     binaries: []
 
-# Inference routing policy (gRPC mode only)
-inference:
-  allowed_routes: []
 ```
 
 ---
@@ -364,9 +360,12 @@ Controls privilege dropping for the sandboxed process. **Static field** -- immut
 
 1. `initgroups()` -- set supplementary groups for the target user
 2. `setgid()` -- switch to the target group
-3. `setuid()` -- switch to the target user
+3. Verify `getegid()` matches the target GID (defense-in-depth, CWE-250 / CERT POS37-C)
+4. `setuid()` -- switch to the target user
+5. Verify `geteuid()` matches the target UID
+6. Verify `setuid(0)` fails -- confirms root cannot be re-acquired
 
-This happens before Landlock and seccomp are applied because `initgroups` needs access to `/etc/group` and `/etc/passwd`, which Landlock may subsequently block. See `crates/navigator-sandbox/src/process.rs` -- `drop_privileges()`.
+This happens before Landlock and seccomp are applied because `initgroups` needs access to `/etc/group` and `/etc/passwd`, which Landlock may subsequently block. The post-condition checks (steps 3, 5, 6) are async-signal-safe and add negligible overhead while guarding against hypothetical kernel-level defects. See `crates/navigator-sandbox/src/process.rs` -- `drop_privileges()`.
 
 ```yaml
 process:
@@ -380,7 +379,7 @@ process:
 
 A map of named network policy rules. Each rule defines which binary/endpoint pairs are allowed to make outbound network connections. This is the core of the network access control system. **Dynamic field** -- can be updated on a running sandbox via live policy updates (see [Live Policy Updates](#live-policy-updates)). However, the overall network mode (Block vs. Proxy) is immutable.
 
-**Behavioral trigger**: The mere presence of any entries in `network_policies` switches the sandbox to **proxy mode**. When `network_policies` is empty or absent, the sandbox operates in **block mode** where all outbound network access is denied via seccomp.
+**Behavioral trigger**: The sandbox always starts in **proxy mode** regardless of whether `network_policies` is present. The proxy is required so that all egress can be evaluated by OPA and the virtual hostname `inference.local` is always addressable for inference routing. When `network_policies` is empty, the OPA engine denies all connections.
 
 ```yaml
 network_policies:
@@ -466,19 +465,11 @@ See `crates/navigator-sandbox/src/l7/mod.rs` -- `expand_access_presets()`.
 
 ---
 
-### `inference`
-
-Controls access to the platform's inference routing system (gRPC mode only, included in the `SandboxPolicy` proto but not consumed by the sandbox supervisor directly). **Dynamic field** -- can be updated on a running sandbox via live policy updates (see [Live Policy Updates](#live-policy-updates)).
+### Inference Routing
 
-| Field                   | Type       | Default | Description                                                                                                                                                 |
-| ----------------------- | ---------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `allowed_routes` | `string[]` | `[]`    | Which routing hints the sandbox may request. e.g., `["local"]` for private-only, `["local", "frontier"]` for full access. Empty means no inference allowed. |
+Inference routing to `inference.local` is handled by the proxy's `InferenceContext`, not by the OPA policy engine or an `inference` block in the policy YAML. The proxy intercepts HTTPS CONNECT requests to `inference.local` and routes matching inference API requests (e.g., `POST /v1/chat/completions`, `POST /v1/messages`) through the sandbox-local `navigator-router`. See [Inference Routing](inference-routing.md) for details on route configuration and the router architecture.
 
-```yaml
-inference:
-  allowed_routes:
-    - local
-```
+The proxy always runs in proxy mode so that `inference.local` is addressable from within the sandbox's network namespace. Inference route sources are configured separately from policy: via `--inference-routes` (file mode) or fetched from the gateway's inference bundle (cluster mode). See `crates/navigator-sandbox/src/proxy.rs` -- `InferenceContext`, `crates/navigator-sandbox/src/l7/inference.rs`.
 
 ---
 
@@ -486,30 +477,42 @@ inference:
 
 Several policy fields trigger fundamentally different enforcement behavior. Understanding these triggers is critical for writing correct policies.
 
-### Network Mode: Block vs. Proxy
+### Network Mode: Always Proxy
 
-**Trigger**: The presence or absence of entries in `network_policies`.
+The sandbox always runs in **proxy mode**. Both file mode and gRPC mode set `NetworkMode::Proxy` unconditionally. This ensures all egress is evaluated by OPA and the virtual hostname `inference.local` is always addressable for inference routing. See `crates/navigator-sandbox/src/lib.rs` -- `load_policy()`, `crates/navigator-sandbox/src/policy.rs` -- `TryFrom<ProtoSandboxPolicy>`.
 
-| Condition                             | Network Mode | Behavior                                                                                                                                                                                                                           |
-| ------------------------------------- | ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `network_policies` is empty or absent | **Block**    | Seccomp blocks all `socket()` calls for `AF_INET` and `AF_INET6`. No network proxy is started. No outbound TCP connections are possible.                                                                                           |
-| `network_policies` has any entries    | **Proxy**    | Seccomp allows `AF_INET` and `AF_INET6` sockets. An HTTP CONNECT proxy starts. A network namespace with veth pair isolates the sandbox. `HTTP_PROXY`/`HTTPS_PROXY`/`ALL_PROXY` environment variables are set on the child process. |
+In proxy mode:
 
-In proxy mode, the seccomp filter still blocks `AF_NETLINK`, `AF_PACKET`, `AF_BLUETOOTH`, and `AF_VSOCK` socket domains regardless. See `crates/navigator-sandbox/src/sandbox/linux/seccomp.rs` -- `build_filter()`.
+- Seccomp allows `AF_INET` and `AF_INET6` sockets (but blocks `AF_NETLINK`, `AF_PACKET`, `AF_BLUETOOTH`, `AF_VSOCK`).
+- An HTTP CONNECT proxy starts, bound to the host side of a veth pair.
+- A network namespace with a veth pair isolates the sandbox process.
+- `HTTP_PROXY`/`HTTPS_PROXY`/`ALL_PROXY` environment variables are set on the child process.
 
-**Immutability**: The network mode is determined at sandbox creation and cannot change via live policy updates. Adding `network_policies` to a Block-mode sandbox or removing all policies from a Proxy-mode sandbox is rejected by the gateway. See [Network Mode Immutability](#network-mode-immutability).
+When `network_policies` is empty, the OPA engine denies all outbound connections (except `inference.local` which is handled separately by the proxy before OPA evaluation).
+
+**Gateway-side validation**: The `validate_network_mode_unchanged()` function on the server still rejects live policy updates that would add `network_policies` to a sandbox created without them or remove all `network_policies` from a sandbox created with them. This prevents unexpected behavioral changes in the OPA allow/deny logic. See `crates/navigator-server/src/grpc.rs` -- `validate_network_mode_unchanged()`.
+
+**Proxy sub-modes**: In proxy mode, the proxy handles two distinct request types:
+
+| Client sends | Proxy behavior | Typical use case |
+|---|---|---|
+| `CONNECT host:port` | CONNECT tunnel (bidirectional TCP relay or L7 inspection) | HTTPS to any destination, HTTP through an opaque tunnel |
+| `GET http://host/path HTTP/1.1` (absolute-form) | **Forward proxy** — rewrites to origin-form & relays | Plain HTTP to private IP endpoints |
+
+See [Behavioral Trigger: Forward Proxy Mode](#behavioral-trigger-forward-proxy-mode) for full details on the forward proxy path.
 
 ```mermaid
 flowchart LR
-    POLICY{network_policies<br/>present?}
-    POLICY -->|No| BLOCK[Block Mode]
-    POLICY -->|Yes| PROXY[Proxy Mode]
+    SANDBOX[Sandbox Startup] --> PROXY[Proxy Mode<br/>Always Active]
 
-    BLOCK --> SECCOMP_BLOCK["seccomp: block AF_INET + AF_INET6"]
     PROXY --> SECCOMP_ALLOW["seccomp: allow AF_INET + AF_INET6<br/>block AF_NETLINK, AF_PACKET, etc."]
     PROXY --> NETNS["Create network namespace<br/>veth pair: 10.200.0.1 ↔ 10.200.0.2"]
-    PROXY --> START_PROXY["Start HTTP CONNECT proxy<br/>bound to veth host IP"]
+    PROXY --> START_PROXY["Start HTTP proxy<br/>bound to veth host IP"]
     PROXY --> ENVVARS["Set HTTP_PROXY, HTTPS_PROXY,<br/>ALL_PROXY on child process"]
+
+    START_PROXY --> CONNECT{CONNECT request}
+    CONNECT -->|inference.local| INFERENCE["InferenceContext:<br/>route to local backend"]
+    CONNECT -->|Other host| OPA["OPA evaluation:<br/>network_policies"]
 ```
 
 ### Behavioral Trigger: L7 Inspection
@@ -528,6 +531,94 @@ This is the single most important behavioral trigger in the policy language. An
 
 **Validation requirement**: When `protocol` is set, either `rules` or `access` must also be present. An endpoint with `protocol` but no rules/access is rejected at validation time because it would deny all traffic (no allow rules means nothing matches). See `crates/navigator-sandbox/src/l7/mod.rs` -- `validate_l7_policies()`.
 
+### Behavioral Trigger: Forward Proxy Mode
+
+**Trigger**: A non-CONNECT HTTP method with an absolute-form URI (e.g., `GET http://host:port/path HTTP/1.1`).
+
+When a client sets `HTTP_PROXY` and makes a plain `http://` request, standard HTTP libraries send a **forward proxy request** instead of a CONNECT tunnel. The proxy handles these requests via the forward proxy path rather than the CONNECT path.
+
+**Security constraint**: Forward proxy mode is restricted to **private IP endpoints** that are explicitly allowed by policy. Plain HTTP traffic never reaches the public internet. All three conditions must be true:
+
+1. OPA policy explicitly allows the destination (`action=allow`)
+2. The matched endpoint has `allowed_ips` configured
+3. All resolved IP addresses are RFC 1918 private (`10/8`, `172.16/12`, `192.168/16`)
+
+If any condition fails, the proxy returns `403 Forbidden`.
+
+| Condition | Forward proxy | CONNECT |
+|---|---|---|
+| Public IP, no `allowed_ips` | 403 | Allowed (standard SSRF check) |
+| Public IP, with `allowed_ips` | 403 (private-IP gate) | Allowed if IP in allowlist |
+| Private IP, no `allowed_ips` | 403 | 403 (SSRF block) |
+| Private IP, with `allowed_ips` | **Allowed** | Allowed |
+| `https://` scheme | 403 (must use CONNECT) | N/A |
+
+**Request processing**: When a forward proxy request is accepted, the proxy:
+
+1. Parses the absolute-form URI to extract scheme, host, port, and path (`parse_proxy_uri`)
+2. Rejects `https://` — clients must use CONNECT for TLS
+3. Evaluates OPA policy (same `evaluate_opa_tcp` as CONNECT)
+4. Requires `allowed_ips` on the matched endpoint
+5. Resolves DNS and validates all IPs are private and within `allowed_ips`
+6. Connects to upstream
+7. Rewrites the request: absolute-form → origin-form (`GET /path HTTP/1.1`), strips hop-by-hop headers, adds `Via: 1.1 navigator-sandbox` and `Connection: close`
+8. Forwards the rewritten request, then relays bidirectionally using `tokio::io::copy_bidirectional` (supports chunked transfer, SSE streams, and other long-lived responses with no idle timeout)
+
+**V1 simplifications**: Forward proxy v1 injects `Connection: close` (no keep-alive) and does not perform L7 inspection on the forwarded traffic. Every forward proxy connection handles exactly one request-response exchange.
+
+**Implementation**: See `crates/navigator-sandbox/src/proxy.rs` -- `handle_forward_proxy()`, `parse_proxy_uri()`, `rewrite_forward_request()`.
+
+**Logging**: Forward proxy requests are logged distinctly from CONNECT:
+
+```
+FORWARD method=GET dst_host=10.86.8.223 dst_port=8000 path=/screenshot/ action=allow policy=computer-control
+```
+
+```mermaid
+flowchart TD
+    A["Non-CONNECT request received<br/>e.g. GET http://host/path"] --> B["parse_proxy_uri(uri)"]
+    B --> C{Scheme = http?}
+    C -- No --> D["403 Forbidden<br/>(HTTPS must use CONNECT)"]
+    C -- Yes --> E["OPA policy evaluation"]
+    E --> F{Allowed?}
+    F -- No --> G["403 Forbidden"]
+    F -- Yes --> H{allowed_ips on endpoint?}
+    H -- No --> I["403 Forbidden<br/>(forward proxy requires allowed_ips)"]
+    H -- Yes --> J["resolve_and_check_allowed_ips()"]
+    J --> K{All IPs private<br/>AND in allowlist?}
+    K -- No --> L["403 Forbidden"]
+    K -- Yes --> M["TCP connect to upstream"]
+    M --> N["Rewrite request to origin-form<br/>Add Via + Connection: close"]
+    N --> O["Forward request + copy_bidirectional"]
+```
+
+#### Example: Forward Proxy Policy
+
+The same policy that enables CONNECT to a private endpoint also enables forward proxy access. No new policy fields are needed:
+
+```yaml
+network_policies:
+  computer_control:
+    name: computer-control
+    endpoints:
+      - host: 10.86.8.223
+        port: 8000
+        allowed_ips:
+          - "10.86.8.223/32"
+    binaries:
+      - { path: /usr/local/bin/python3.12 }
+```
+
+With this policy, both work:
+
+```python
+# CONNECT tunnel (httpx with HTTPS, or explicit tunnel code)
+# Forward proxy (httpx with HTTP_PROXY set for http:// URLs)
+import httpx
+resp = httpx.get("http://10.86.8.223:8000/screenshot/",
+                 proxy="http://10.200.0.1:3128")
+```
+
 ### Behavioral Trigger: TLS Termination
 
 **Trigger**: The `tls` field on a `NetworkEndpoint`.
@@ -597,7 +688,7 @@ Regardless of network mode, certain socket domains are always blocked:
 | `AF_BLUETOOTH` | 31       | Prevents Bluetooth access                                                       |
 | `AF_VSOCK`     | 40       | Prevents VM socket communication                                                |
 
-In **block mode**, `AF_INET` (2) and `AF_INET6` (10) are also blocked, preventing all TCP/UDP networking.
+In proxy mode (which is always active), `AF_INET` (2) and `AF_INET6` (10) are allowed so the sandbox process can reach the proxy.
 
 The seccomp filter uses a default-allow policy (`SeccompAction::Allow`) with specific `socket()` syscall rules that return `EPERM` when the first argument (domain) matches a blocked value. See `crates/navigator-sandbox/src/sandbox/linux/seccomp.rs`.
 
@@ -771,9 +862,15 @@ Functions in `crates/navigator-sandbox/src/proxy.rs` implement the SSRF checks:
 
 ### Placement in Proxy Flow
 
+The SSRF check applies to both CONNECT and forward proxy requests. For forward proxy, an additional private-IP gate requires all resolved IPs to be RFC 1918 private.
+
 ```mermaid
 flowchart TD
-    A[CONNECT request received] --> D[OPA policy evaluation]
+    A["Request received"] --> B{CONNECT?}
+    B -- Yes --> INF{inference.local?}
+    INF -- Yes --> C["InferenceContext: route locally"]
+    INF -- No --> D[OPA policy evaluation]
+    B -- No --> FP["Forward proxy path<br/>(see Forward Proxy Mode)"]
     D --> E{Allowed?}
     E -- No --> F["403 Forbidden"]
     E -- Yes --> G{allowed_ips on endpoint?}
@@ -785,7 +882,13 @@ flowchart TD
     L --> M{All IPs public?}
     M -- No --> J
     M -- Yes --> K
-    K --> N[200 Connection Established]
+    K --> N["200 Connection Established"]
+
+    FP --> FP_OPA["OPA evaluation + require allowed_ips"]
+    FP_OPA --> FP_RESOLVE["resolve_and_check_allowed_ips"]
+    FP_RESOLVE --> FP_PRIVATE{All IPs private?}
+    FP_PRIVATE -- No --> J
+    FP_PRIVATE -- Yes --> FP_CONNECT["TCP connect + rewrite + relay"]
 ```
 
 ### Private IP Access via `allowed_ips`
@@ -974,6 +1077,20 @@ network_policies:
     binaries:
       - { path: /usr/bin/curl }
 
+  # Forward proxy + CONNECT: private service accessible via plain HTTP or tunnel
+  # With allowed_ips set and the destination being a private IP, both
+  # `http://10.86.8.223:8000/path` (forward proxy) and
+  # `CONNECT 10.86.8.223:8000` (tunnel) work.
+  computer_control:
+    name: computer-control
+    endpoints:
+      - host: 10.86.8.223
+        port: 8000
+        allowed_ips:
+          - "10.86.8.223/32"
+    binaries:
+      - { path: /usr/local/bin/python3.12 }
+
 inference:
   allowed_routes:
     - local
@@ -991,7 +1108,6 @@ When the gateway delivers policy via gRPC, the protobuf `SandboxPolicy` message
 | `SandboxPolicy`     | `landlock`                                                          | `landlock`                                  |
 | `SandboxPolicy`     | `process`                                                           | `process`                                   |
 | `SandboxPolicy`     | `network_policies`                                                  | `network_policies`                          |
-| `SandboxPolicy`     | `inference`                                                         | `inference`                                 |
 | `FilesystemPolicy`  | `include_workdir`                                                   | `filesystem_policy.include_workdir`         |
 | `FilesystemPolicy`  | `read_only`                                                         | `filesystem_policy.read_only`               |
 | `FilesystemPolicy`  | `read_write`                                                        | `filesystem_policy.read_write`              |
@@ -1004,7 +1120,6 @@ When the gateway delivers policy via gRPC, the protobuf `SandboxPolicy` message
 | `NetworkEndpoint`   | `host`, `port`, `protocol`, `tls`, `enforcement`, `access`, `rules`, `allowed_ips` | Same field names                            |
 | `L7Rule`            | `allow`                                                             | `rules[].allow`                             |
 | `L7Allow`           | `method`, `path`, `command`                                         | `rules[].allow.method`, `.path`, `.command` |
-| `InferencePolicy`   | `allowed_routes`                                                    | `inference.allowed_routes`                  |
 
 The conversion is performed in `crates/navigator-sandbox/src/opa.rs` -- `proto_to_opa_data_json()`.
 
@@ -1027,11 +1142,14 @@ This ordering is intentional: privilege dropping needs `/etc/group` and `/etc/pa
 
 The OPA engine evaluates two categories of rules:
 
-### L4 Rules (per-CONNECT)
+### L4 Rules (per-connection)
+
+Evaluated on every CONNECT request and every forward proxy request. The same OPA input is used in both cases.
 
 | Rule                      | Signature                                                                                                         | Returns                                                                                       |
 | ------------------------- | ----------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |
 | `allow_network`           | `input.network.host`, `input.network.port`, `input.exec.path`, `input.exec.ancestors`, `input.exec.cmdline_paths` | `true` if any policy matches both endpoint and binary                                         |
+| `network_action`          | Same input                                                                                                        | `"allow"` if endpoint + binary matched, `"deny"` otherwise                                    |
 | `deny_reason`             | Same input                                                                                                        | Human-readable string explaining why access was denied                                        |
 | `matched_network_policy`  | Same input                                                                                                        | Name of the matched policy (for audit logging)                                                |
 | `matched_endpoint_config` | Same input                                                                                                        | Raw endpoint object for L7 config extraction (returned if endpoint has `protocol` or `allowed_ips` field) |
@@ -1049,7 +1167,7 @@ See `sandbox-policy.rego` for the full Rego implementation.
 
 ## Sandbox Log Filtering
 
-The `nav sandbox logs` command retrieves log lines from the gateway's in-memory log buffer. Two server-side filters narrow the output before logs are sent to the CLI.
+The `nav logs` command retrieves log lines from the gateway's in-memory log buffer. Two server-side filters narrow the output before logs are sent to the CLI.
 
 ### Source Filter (`--source`)
 
@@ -1065,10 +1183,10 @@ Multiple sources can be specified: `--source gateway --source sandbox` is equiva
 
 ```bash
 # Show only proxy/OPA logs from the sandbox supervisor
-nav sandbox logs my-sandbox --source sandbox
+nav logs my-sandbox --source sandbox
 
 # Show only gateway-side reconciler logs
-nav sandbox logs my-sandbox --source gateway
+nav logs my-sandbox --source gateway
 ```
 
 The filter applies to both one-shot mode (`GetSandboxLogs` RPC) and streaming mode (`--tail`, via `WatchSandbox` RPC). In both cases, the server evaluates `source_matches()` before sending each log line to the client. See `crates/navigator-server/src/grpc.rs` -- `source_matches()`, `get_sandbox_logs()`.
@@ -1089,10 +1207,10 @@ The default (empty string) disables level filtering -- all levels pass. An unrec
 
 ```bash
 # Show only WARN and ERROR logs
-nav sandbox logs my-sandbox --level warn
+nav logs my-sandbox --level warn
 
 # Combine with source filter: only sandbox ERROR logs
-nav sandbox logs my-sandbox --source sandbox --level error
+nav logs my-sandbox --source sandbox --level error
 ```
 
 The filter is applied server-side via `level_matches()` in both one-shot and streaming modes. See `crates/navigator-server/src/grpc.rs` -- `level_matches()`.
@@ -1114,4 +1232,6 @@ An empty `sources`/`log_sources` list means no source filtering (all sources pas
 
 - [Sandbox Architecture](sandbox.md) -- Full sandbox lifecycle, enforcement mechanisms, and component interaction
 - [Gateway Architecture](gateway.md) -- How the gateway stores and delivers policies via gRPC
+- [Inference Routing](inference-routing.md) -- How `inference.local` requests are routed to model backends
 - [Overview](README.md) -- System-level context for how policies fit into the platform
+- [Plain HTTP Forward Proxy Plan](plans/plain-http-forward-proxy.md) -- Design document for the forward proxy feature
diff --git a/architecture/system-architecture.md b/architecture/system-architecture.md
index ad92f000..643cee2f 100644
--- a/architecture/system-architecture.md
+++ b/architecture/system-architecture.md
@@ -7,7 +7,7 @@ graph TB
     %% ============================================================
     subgraph UserMachine["User's Machine"]
         CLI["NemoClaw CLI<br/>(nemoclaw)"]
-        TUI["Gator TUI<br/>(nemoclaw gator)"]
+        TUI["NemoClaw TUI<br/>(nemoclaw term)"]
         SDK["Python SDK<br/>(nemoclaw)"]
         LocalConfig["~/.config/nemoclaw/<br/>clusters, mTLS certs,<br/>active_cluster"]
     end
@@ -17,7 +17,7 @@ graph TB
     %% ============================================================
     subgraph Cluster["NemoClaw Cluster Container (Docker)"]
 
-        subgraph K3s["k3s (v1.29.8-k3s1)"]
+        subgraph K3s["k3s (v1.35.2-k3s1)"]
             KubeAPI["Kubernetes API<br/>:6443"]
             HelmController["Helm Controller"]
             LocalPathProv["local-path-provisioner"]
@@ -177,7 +177,7 @@ graph TB
 
 | Color | Category | Examples |
 |-------|----------|---------|
-| Blue | User-side components | NemoClaw CLI, Gator TUI, Python SDK |
+| Blue | User-side components | NemoClaw CLI, NemoClaw TUI, Python SDK |
 | Orange | Gateway / Control plane | navigator-server, watch bus, log bus |
 | Green | Sandbox supervisor | SSH server, HTTP CONNECT proxy, OPA engine, inference router |
 | Purple | Agent process & isolation | AI agent, Landlock, Seccomp, network namespace |
diff --git a/architecture/gator.md b/architecture/tui.md
similarity index 71%
rename from architecture/gator.md
rename to architecture/tui.md
index 04202097..d614a8d4 100644
--- a/architecture/gator.md
+++ b/architecture/tui.md
@@ -1,16 +1,16 @@
-# Gator: NemoClaw TUI
+# NemoClaw TUI
 
-Gator is a terminal user interface for NemoClaw, inspired by [k9s](https://k9scli.io/). Instead of typing individual CLI commands to check cluster health, list sandboxes, and manage resources, Gator gives you a real-time, keyboard-driven dashboard — everything updates automatically and you navigate with a few keystrokes.
+The NemoClaw TUI is a terminal user interface for NemoClaw, inspired by [k9s](https://k9scli.io/). Instead of typing individual CLI commands to check cluster health, list sandboxes, and manage resources, the TUI gives you a real-time, keyboard-driven dashboard — everything updates automatically and you navigate with a few keystrokes.
 
-## Launching Gator
+## Launching the TUI
 
-Gator is a subcommand of the NemoClaw CLI, so it inherits all your existing configuration — cluster selection, TLS settings, and verbosity flags all work the same way.
+The TUI is a subcommand of the NemoClaw CLI, so it inherits all your existing configuration — cluster selection, TLS settings, and verbosity flags all work the same way.
 
 ```bash
-nemoclaw gator                   # launch against the active cluster
-nav gator                         # dev alias (builds from source)
-nav gator --cluster prod          # target a specific cluster
-NEMOCLAW_CLUSTER=prod nav gator  # same thing, via environment variable
+nemoclaw term                   # launch against the active cluster
+nav term                         # dev alias (builds from source)
+nav term --cluster prod          # target a specific cluster
+NEMOCLAW_CLUSTER=prod nav term  # same thing, via environment variable
 ```
 
 Cluster resolution follows the same priority as the rest of the CLI:
@@ -23,11 +23,11 @@ No separate configuration files or authentication are needed.
 
 ## Screen Layout
 
-Gator divides the terminal into four horizontal regions:
+The TUI divides the terminal into four horizontal regions:
 
 ```
 ┌─────────────────────────────────────────────────────────────────┐
-│  gator ─ my-cluster ─ Dashboard  ● Healthy                     │  ← title bar
+│  NemoClaw ─ my-cluster ─ Dashboard  ● Healthy                   │  ← title bar
 ├─────────────────────────────────────────────────────────────────┤
 │                                                                 │
 │  (view content — Dashboard or Sandboxes)                        │  ← main area
@@ -39,7 +39,7 @@ Gator divides the terminal into four horizontal regions:
 └─────────────────────────────────────────────────────────────────┘
 ```
 
-- **Title bar** — shows the Gator logo, cluster name, current view, and live cluster health status.
+- **Title bar** — shows the NemoClaw logo, cluster name, current view, and live cluster health status.
 - **Main area** — the active view (Dashboard or Sandboxes).
 - **Navigation bar** — lists available views with their shortcut keys, plus Help and Quit.
 - **Command bar** — appears when you press `:` to type a command (like vim).
@@ -84,7 +84,7 @@ When there are no sandboxes, the view displays: *"No sandboxes found."*
 
 ## Keyboard Controls
 
-Gator has two input modes: **Normal** (default) and **Command** (activated by pressing `:`).
+The TUI has two input modes: **Normal** (default) and **Command** (activated by pressing `:`).
 
 ### Normal Mode
 
@@ -95,7 +95,7 @@ Gator has two input modes: **Normal** (default) and **Command** (activated by pr
 | `j` or `↓` | Move selection down |
 | `k` or `↑` | Move selection up |
 | `:` | Enter command mode |
-| `q` | Quit Gator |
+| `q` | Quit |
 | `Ctrl+C` | Force quit |
 
 ### Command Mode
@@ -104,7 +104,7 @@ Press `:` to open the command bar at the bottom of the screen. Type a command an
 
 | Command | Action |
 |---------|--------|
-| `quit` or `q` | Quit Gator |
+| `quit` or `q` | Quit |
 | `dashboard` or `1` | Switch to Dashboard view |
 | `sandboxes` or `2` | Switch to Sandboxes view |
 
@@ -112,11 +112,11 @@ Press `Esc` to cancel and return to Normal mode. `Backspace` deletes characters
 
 ## Data Refresh
 
-Gator automatically polls the cluster every **2 seconds**. Both cluster health and the sandbox list update on each tick, so the display stays current without manual refreshing. This uses the same gRPC calls as the CLI — no additional server-side setup is required.
+The TUI automatically polls the cluster every **2 seconds**. Both cluster health and the sandbox list update on each tick, so the display stays current without manual refreshing. This uses the same gRPC calls as the CLI — no additional server-side setup is required.
 
 ## Theme
 
-Gator uses a dark terminal theme based on the NVIDIA brand palette:
+The TUI uses a dark terminal theme based on the NVIDIA brand palette:
 
 - **Background**: Black — the standard terminal background.
 - **Text**: White for primary content, dimmed white for labels and secondary information.
@@ -128,12 +128,12 @@ The title bar uses white text on an Everglade background to visually anchor the
 
 ## Port Forwarding
 
-Gator supports creating sandboxes with port forwarding directly from the create modal. When creating a sandbox, you can specify ports to forward in the **Ports** field (comma-separated, e.g., `8080,3000`). After the sandbox reaches `Ready` state, Gator automatically spawns background SSH tunnels (`ssh -N -f -L <port>:127.0.0.1:<port>`) for each specified port.
+The TUI supports creating sandboxes with port forwarding directly from the create modal. When creating a sandbox, you can specify ports to forward in the **Ports** field (comma-separated, e.g., `8080,3000`). After the sandbox reaches `Ready` state, the TUI automatically spawns background SSH tunnels (`ssh -N -f -L <port>:127.0.0.1:<port>`) for each specified port.
 
 Forwarded ports are displayed in the **NOTES** column of the sandbox table as `fwd:8080,3000` and in the **Forwards** row of the sandbox detail view.
 
 Port forwarding lifecycle:
-- **On create**: Gator polls for sandbox readiness (up to 30 attempts at 2-second intervals), then spawns SSH tunnels.
+- **On create**: The TUI polls for sandbox readiness (up to 30 attempts at 2-second intervals), then spawns SSH tunnels.
 - **On delete**: Any active forwards for the sandbox are automatically stopped before deletion.
 - **PID tracking**: Forward PIDs are stored in `~/.config/nemoclaw/forwards/<name>-<port>.pid`, shared with the CLI.
 
@@ -141,17 +141,15 @@ The forwarding implementation lives in `navigator-core::forward`, shared between
 
 ## What is Not Yet Available
 
-Gator is in its initial phase. The following features are planned but not yet implemented:
+The TUI is in its initial phase. The following features are planned but not yet implemented:
 
 - **Inference and provider views** — browsing inference routes and provider configurations.
 - **Help overlay** — the `?` key is shown in the nav bar but does not open a help screen yet.
 - **Command bar autocomplete** — the command bar accepts text but does not offer suggestions.
 - **Filtering and search** — no `/` search within views yet.
 
-See the [Gator design plan](plans/gator-tui.md) for the full roadmap, including mockups and future phases.
-
 ## Crate Structure
 
-The TUI lives in `crates/navigator-tui/`, a separate workspace crate. The CLI crate (`crates/navigator-cli/`) depends on it and launches it via the `Gator` command variant in the `Commands` enum. This keeps TUI-specific dependencies (ratatui, crossterm) out of the CLI when not in use.
+The TUI lives in `crates/navigator-tui/`, a separate workspace crate. The CLI crate (`crates/navigator-cli/`) depends on it and launches it via the `Term` command variant in the `Commands` enum. This keeps TUI-specific dependencies (ratatui, crossterm) out of the CLI when not in use.
 
 The `navigator-tui` crate depends on `navigator-core` for protobuf types, the gRPC client, and shared utilities (e.g., `navigator_core::forward` for port forwarding PID management) — it communicates with the gateway over the same gRPC channel the CLI uses.
diff --git a/crates/navigator-cli/Cargo.toml b/crates/navigator-cli/Cargo.toml
index 8ce2f613..61e39965 100644
--- a/crates/navigator-cli/Cargo.toml
+++ b/crates/navigator-cli/Cargo.toml
@@ -76,6 +76,7 @@ futures = { workspace = true }
 rcgen = { version = "0.13", features = ["crypto", "pem"] }
 reqwest = { workspace = true }
 serde_json = { workspace = true }
+temp-env = "0.3"
 tempfile = "3"
 tokio-stream = { workspace = true }
 url = { workspace = true }
diff --git a/crates/navigator-cli/src/bootstrap.rs b/crates/navigator-cli/src/bootstrap.rs
index f60343df..fbd1a266 100644
--- a/crates/navigator-cli/src/bootstrap.rs
+++ b/crates/navigator-cli/src/bootstrap.rs
@@ -97,13 +97,20 @@ fn is_connectivity_error(error: &miette::Report) -> bool {
 
 /// Prompt the user to confirm cluster bootstrap.
 ///
-/// Only prompts when stdin is a terminal. In non-interactive mode, returns an
-/// error with an actionable message.
-pub fn confirm_bootstrap() -> Result<bool> {
+/// When `override_value` is `Some(true)` or `Some(false)`, the decision is
+/// made immediately (from `--bootstrap` / `--no-bootstrap`). Otherwise,
+/// prompts interactively when stdin is a terminal, or returns an error in
+/// non-interactive mode.
+pub fn confirm_bootstrap(override_value: Option<bool>) -> Result<bool> {
+    // Explicit flag takes precedence over interactive detection.
+    if let Some(value) = override_value {
+        return Ok(value);
+    }
+
     if !std::io::stdin().is_terminal() {
         return Err(miette::miette!(
-            "Cluster not reachable and bootstrap requires confirmation from an interactive terminal.\n\
-             Run 'nemoclaw cluster admin deploy' first, then retry."
+            "Gateway not reachable and bootstrap requires confirmation from an interactive terminal.\n\
+              Pass --bootstrap to auto-confirm, or run 'nemoclaw gateway start' first."
         ));
     }
 
diff --git a/crates/navigator-cli/src/completers.rs b/crates/navigator-cli/src/completers.rs
index 6a67a02a..1a124db7 100644
--- a/crates/navigator-cli/src/completers.rs
+++ b/crates/navigator-cli/src/completers.rs
@@ -118,37 +118,45 @@ where
 }
 
 #[cfg(test)]
-#[allow(unsafe_code)]
 mod tests {
     use super::*;
+    use temp_env::with_vars;
+
+    fn with_isolated_cli_env<F: FnOnce()>(tmp: &std::path::Path, f: F) {
+        let tmp = tmp.to_string_lossy().into_owned();
+        with_vars(
+            [
+                ("XDG_CONFIG_HOME", Some(tmp.as_str())),
+                ("NEMOCLAW_CLUSTER", None::<&str>),
+            ],
+            f,
+        );
+    }
 
     #[test]
     fn cluster_completer_returns_empty_when_no_config() {
         let temp = tempfile::tempdir().unwrap();
-        // SAFETY: test-only; tests run with --test-threads=1 or are isolated.
-        unsafe { std::env::set_var("XDG_CONFIG_HOME", temp.path()) };
-        let result = complete_cluster_names(OsStr::new(""));
-        unsafe { std::env::remove_var("XDG_CONFIG_HOME") };
-        assert!(result.is_empty());
+        with_isolated_cli_env(temp.path(), || {
+            let result = complete_cluster_names(OsStr::new(""));
+            assert!(result.is_empty());
+        });
     }
 
     #[test]
     fn sandbox_completer_returns_empty_when_no_active_cluster() {
-        unsafe { std::env::remove_var("NEMOCLAW_CLUSTER") };
         let temp = tempfile::tempdir().unwrap();
-        unsafe { std::env::set_var("XDG_CONFIG_HOME", temp.path()) };
-        let result = complete_sandbox_names(OsStr::new(""));
-        unsafe { std::env::remove_var("XDG_CONFIG_HOME") };
-        assert!(result.is_empty());
+        with_isolated_cli_env(temp.path(), || {
+            let result = complete_sandbox_names(OsStr::new(""));
+            assert!(result.is_empty());
+        });
     }
 
     #[test]
     fn provider_completer_returns_empty_when_no_active_cluster() {
-        unsafe { std::env::remove_var("NEMOCLAW_CLUSTER") };
         let temp = tempfile::tempdir().unwrap();
-        unsafe { std::env::set_var("XDG_CONFIG_HOME", temp.path()) };
-        let result = complete_provider_names(OsStr::new(""));
-        unsafe { std::env::remove_var("XDG_CONFIG_HOME") };
-        assert!(result.is_empty());
+        with_isolated_cli_env(temp.path(), || {
+            let result = complete_provider_names(OsStr::new(""));
+            assert!(result.is_empty());
+        });
     }
 }
diff --git a/crates/navigator-cli/src/main.rs b/crates/navigator-cli/src/main.rs
index 8d056490..0d6faad3 100644
--- a/crates/navigator-cli/src/main.rs
+++ b/crates/navigator-cli/src/main.rs
@@ -44,17 +44,17 @@ fn resolve_cluster(cluster_flag: &Option<String>) -> Result<ClusterContext> {
         .or_else(load_active_cluster)
         .ok_or_else(|| {
             miette::miette!(
-                "No active cluster.\n\
-                 Set one with: nemoclaw cluster use <name>\n\
-                 Or deploy a new cluster: nemoclaw cluster admin deploy"
+                "No active gateway.\n\
+                 Set one with: nemoclaw gateway select <name>\n\
+                 Or deploy a new gateway: nemoclaw gateway start"
             )
         })?;
 
     let metadata = load_cluster_metadata(&name).map_err(|_| {
         miette::miette!(
-            "Unknown cluster '{name}'.\n\
-             Deploy it first: nemoclaw cluster admin deploy --name {name}\n\
-             Or list available clusters: nemoclaw cluster list"
+            "Unknown gateway '{name}'.\n\
+             Deploy it first: nemoclaw gateway start --name {name}\n\
+             Or list available gateways: nemoclaw gateway select"
         )
     })?;
 
@@ -66,8 +66,8 @@ fn resolve_cluster(cluster_flag: &Option<String>) -> Result<ClusterContext> {
 
 /// Resolve only the cluster name (without requiring metadata to exist).
 ///
-/// Used by admin commands that operate on a cluster by name but may not need
-/// the gateway endpoint (e.g., `cluster admin deploy` creates the cluster).
+/// Used by gateway commands that operate on a cluster by name but may not need
+/// the gateway endpoint (e.g., `gateway start` creates the cluster).
 fn resolve_cluster_name(cluster_flag: &Option<String>) -> Option<String> {
     cluster_flag
         .clone()
@@ -118,22 +118,64 @@ struct Cli {
 
 #[derive(Subcommand, Debug)]
 enum Commands {
-    /// Manage cluster.
-    Cluster {
+    /// Manage the gateway lifecycle.
+    Gateway {
         #[command(subcommand)]
-        command: ClusterCommands,
+        command: GatewayCommands,
     },
 
+    /// Show gateway status and information.
+    Status,
+
     /// Manage sandboxes.
     Sandbox {
         #[command(subcommand)]
         command: SandboxCommands,
     },
 
+    /// Manage port forwarding to a sandbox.
+    Forward {
+        #[command(subcommand)]
+        command: ForwardCommands,
+    },
+
+    /// View sandbox logs.
+    Logs {
+        /// Sandbox name (defaults to last-used sandbox).
+        name: Option<String>,
+
+        /// Number of log lines to return.
+        #[arg(short, default_value_t = 200)]
+        n: u32,
+
+        /// Stream live logs.
+        #[arg(long)]
+        tail: bool,
+
+        /// Only show logs from this duration ago (e.g. 5m, 1h, 30s).
+        #[arg(long)]
+        since: Option<String>,
+
+        /// Filter by log source: "gateway", "sandbox", or "all" (default).
+        /// Can be specified multiple times: --source gateway --source sandbox
+        #[arg(long, default_value = "all")]
+        source: Vec<String>,
+
+        /// Minimum log level to display: error, warn, info (default), debug, trace.
+        #[arg(long, default_value = "")]
+        level: String,
+    },
+
+    /// Manage sandbox policy.
+    Policy {
+        #[command(subcommand)]
+        command: PolicyCommands,
+    },
+
     /// Manage inference configuration.
     Inference {
         #[command(subcommand)]
-        command: InferenceCommands,
+        command: ClusterInferenceCommands,
     },
 
     /// Manage provider configuration.
@@ -142,8 +184,8 @@ enum Commands {
         command: ProviderCommands,
     },
 
-    /// Launch the Gator interactive TUI.
-    Gator,
+    /// Launch the NemoClaw interactive TUI.
+    Term,
 
     /// Generate shell completions.
     #[command(after_long_help = COMPLETIONS_HELP)]
@@ -187,6 +229,13 @@ enum Commands {
         #[arg(long)]
         name: Option<String>,
     },
+
+    /// Manage cluster (deprecated: use `gateway`).
+    #[command(hide = true)]
+    Cluster {
+        #[command(subcommand)]
+        command: ClusterCommands,
+    },
 }
 
 #[derive(Clone, Debug, ValueEnum)]
@@ -259,6 +308,8 @@ enum CliProviderType {
     Opencode,
     Codex,
     Generic,
+    Openai,
+    Anthropic,
     Nvidia,
     Gitlab,
     Github,
@@ -272,6 +323,8 @@ impl CliProviderType {
             Self::Opencode => "opencode",
             Self::Codex => "codex",
             Self::Generic => "generic",
+            Self::Openai => "openai",
+            Self::Anthropic => "anthropic",
             Self::Nvidia => "nvidia",
             Self::Gitlab => "gitlab",
             Self::Github => "github",
@@ -367,33 +420,15 @@ enum ProviderCommands {
     },
 }
 
-#[derive(Subcommand, Debug)]
-enum ClusterCommands {
-    /// Show server status and information.
-    Status,
-
-    /// Set the active cluster.
-    Use {
-        /// Cluster name to make active.
-        #[arg(add = ArgValueCompleter::new(completers::complete_cluster_names))]
-        name: String,
-    },
-
-    /// List all provisioned clusters.
-    List,
-
-    /// Manage local development cluster lifecycle.
-    Admin {
-        #[command(subcommand)]
-        command: ClusterAdminCommands,
-    },
-}
+// -----------------------------------------------------------------------
+// Gateway commands (replaces the old `cluster` / `cluster admin` groups)
+// -----------------------------------------------------------------------
 
 #[derive(Subcommand, Debug)]
-enum ClusterAdminCommands {
-    /// Provision or start a cluster (local or remote).
-    Deploy {
-        /// Cluster name.
+enum GatewayCommands {
+    /// Deploy/start the gateway.
+    Start {
+        /// Gateway name.
         #[arg(long, default_value = "nemoclaw")]
         name: String,
 
@@ -432,11 +467,18 @@ enum ClusterAdminCommands {
         /// allowing multiple clusters to coexist without port conflicts.
         #[arg(long, num_args = 0..=1, default_missing_value = "0")]
         kube_port: Option<u16>,
+
+        /// Destroy and recreate the gateway from scratch if one already exists.
+        ///
+        /// Without this flag, an interactive prompt asks what to do; in
+        /// non-interactive mode the existing gateway is reused silently.
+        #[arg(long)]
+        recreate: bool,
     },
 
-    /// Stop a cluster (preserves state).
+    /// Stop the gateway (preserves state).
     Stop {
-        /// Cluster name (defaults to active cluster).
+        /// Gateway name (defaults to active gateway).
         #[arg(long)]
         name: Option<String>,
 
@@ -449,9 +491,9 @@ enum ClusterAdminCommands {
         ssh_key: Option<String>,
     },
 
-    /// Destroy a cluster and its state.
+    /// Destroy the gateway and its state.
     Destroy {
-        /// Cluster name (defaults to active cluster).
+        /// Gateway name (defaults to active gateway).
         #[arg(long)]
         name: Option<String>,
 
@@ -464,16 +506,25 @@ enum ClusterAdminCommands {
         ssh_key: Option<String>,
     },
 
-    /// Show cluster deployment details.
+    /// Select the active gateway.
+    ///
+    /// When called without a name, lists available gateways to choose from.
+    Select {
+        /// Gateway name (omit to list available gateways).
+        #[arg(add = ArgValueCompleter::new(completers::complete_cluster_names))]
+        name: Option<String>,
+    },
+
+    /// Show gateway deployment details.
     Info {
-        /// Cluster name (defaults to active cluster).
+        /// Gateway name (defaults to active gateway).
         #[arg(long)]
         name: Option<String>,
     },
 
-    /// Print or start an SSH tunnel for kubectl access to a remote cluster.
+    /// Print or start an SSH tunnel for kubectl access to a remote gateway.
     Tunnel {
-        /// Cluster name (defaults to active cluster).
+        /// Gateway name (defaults to active gateway).
         #[arg(long)]
         name: Option<String>,
 
@@ -491,6 +542,97 @@ enum ClusterAdminCommands {
     },
 }
 
+// -----------------------------------------------------------------------
+// Hidden backwards-compat: `cluster admin deploy` → `gateway start`
+// -----------------------------------------------------------------------
+
+#[derive(Subcommand, Debug)]
+enum ClusterCommands {
+    /// Deprecated: use `gateway start`.
+    #[command(hide = true)]
+    Admin {
+        #[command(subcommand)]
+        command: ClusterAdminCommands,
+    },
+
+    /// Manage cluster-level inference configuration.
+    #[command(hide = true)]
+    Inference {
+        #[command(subcommand)]
+        command: ClusterInferenceCommands,
+    },
+}
+
+#[derive(Subcommand, Debug)]
+enum ClusterInferenceCommands {
+    /// Set cluster-level inference provider and model.
+    Set {
+        /// Provider name.
+        #[arg(long, add = ArgValueCompleter::new(completers::complete_provider_names))]
+        provider: String,
+
+        /// Model identifier to force for generation calls.
+        #[arg(long)]
+        model: String,
+    },
+
+    /// Update cluster-level inference configuration (partial update).
+    Update {
+        /// Provider name (unchanged if omitted).
+        #[arg(long, add = ArgValueCompleter::new(completers::complete_provider_names))]
+        provider: Option<String>,
+
+        /// Model identifier (unchanged if omitted).
+        #[arg(long)]
+        model: Option<String>,
+    },
+
+    /// Get cluster-level inference provider and model.
+    Get,
+}
+
+#[derive(Subcommand, Debug)]
+enum ClusterAdminCommands {
+    /// Deprecated: use `gateway start`.
+    Deploy {
+        /// Cluster name.
+        #[arg(long, default_value = "nemoclaw")]
+        name: String,
+
+        /// Write stored kubeconfig into local kubeconfig.
+        #[arg(long)]
+        update_kube_config: bool,
+
+        /// Print stored kubeconfig to stdout.
+        #[arg(long)]
+        get_kubeconfig: bool,
+
+        /// SSH destination for remote deployment (e.g., user@hostname).
+        #[arg(long)]
+        remote: Option<String>,
+
+        /// Path to SSH private key for remote deployment.
+        #[arg(long, value_hint = ValueHint::FilePath)]
+        ssh_key: Option<String>,
+
+        /// Host port to map to the gateway (default: 8080).
+        #[arg(long, default_value_t = navigator_bootstrap::DEFAULT_GATEWAY_PORT)]
+        port: u16,
+
+        /// Override the gateway host written into cluster metadata.
+        #[arg(long)]
+        gateway_host: Option<String>,
+
+        /// Expose the Kubernetes control plane on a host port for kubectl access.
+        #[arg(long, num_args = 0..=1, default_missing_value = "0")]
+        kube_port: Option<u16>,
+
+        /// Destroy and recreate from scratch if a cluster already exists.
+        #[arg(long)]
+        recreate: bool,
+    },
+}
+
 #[derive(Subcommand, Debug)]
 enum SandboxCommands {
     /// Create a sandbox.
@@ -512,9 +654,19 @@ enum SandboxCommands {
         #[arg(long)]
         from: Option<String>,
 
-        /// Sync local files into the sandbox before running.
-        #[arg(long)]
-        sync: bool,
+        /// Upload local files into the sandbox before running.
+        ///
+        /// Format: `<LOCAL_PATH>[:<SANDBOX_PATH>]`.
+        /// When `SANDBOX_PATH` is omitted, files are uploaded to the container
+        /// working directory (`/sandbox`).
+        /// `.gitignore` rules are applied by default; use `--no-git-ignore` to
+        /// upload everything.
+        #[arg(long, value_hint = ValueHint::AnyPath)]
+        upload: Option<String>,
+
+        /// Disable `.gitignore` filtering for `--upload`.
+        #[arg(long, requires = "upload")]
+        no_git_ignore: bool,
 
         /// Keep the sandbox alive after non-interactive commands.
         #[arg(long)]
@@ -555,6 +707,28 @@ enum SandboxCommands {
         #[arg(long, overrides_with = "tty")]
         no_tty: bool,
 
+        /// Auto-bootstrap a gateway if none is available.
+        ///
+        /// Without this flag, an interactive prompt asks whether to bootstrap;
+        /// in non-interactive mode the command errors.
+        #[arg(long, overrides_with = "no_bootstrap")]
+        bootstrap: bool,
+
+        /// Never bootstrap a gateway automatically; error if none is available.
+        #[arg(long, overrides_with = "bootstrap")]
+        no_bootstrap: bool,
+
+        /// Auto-create missing providers from local credentials.
+        ///
+        /// Without this flag, an interactive prompt asks per-provider;
+        /// in non-interactive mode the command errors.
+        #[arg(long, overrides_with = "no_auto_providers")]
+        auto_providers: bool,
+
+        /// Never auto-create providers; error if required providers are missing.
+        #[arg(long, overrides_with = "auto_providers")]
+        no_auto_providers: bool,
+
         /// Command to run after "--" (defaults to an interactive shell).
         #[arg(trailing_var_arg = true)]
         command: Vec<String>,
@@ -602,63 +776,35 @@ enum SandboxCommands {
         name: Option<String>,
     },
 
-    /// Manage port forwarding to a sandbox.
-    Forward {
-        #[command(subcommand)]
-        command: ForwardCommands,
-    },
-
-    /// Sync files to or from a sandbox.
-    Sync {
-        /// Sandbox name (defaults to last-used sandbox).
+    /// Upload local files to a sandbox.
+    Upload {
+        /// Sandbox name.
         #[arg(add = ArgValueCompleter::new(completers::complete_sandbox_names))]
-        name: Option<String>,
-
-        /// Push local files up to the sandbox.
-        #[arg(long, conflicts_with = "down", value_name = "LOCAL_PATH", value_hint = ValueHint::AnyPath)]
-        up: Option<String>,
+        name: String,
 
-        /// Pull sandbox files down to the local machine.
-        #[arg(long, conflicts_with = "up", value_name = "SANDBOX_PATH")]
-        down: Option<String>,
+        /// Local path to upload.
+        #[arg(value_hint = ValueHint::AnyPath)]
+        local_path: String,
 
-        /// Destination path (sandbox path when pushing, local path when pulling).
-        /// Defaults to /sandbox for --up or . for --down.
-        #[arg(value_name = "DEST")]
+        /// Destination path in the sandbox (defaults to `/sandbox`).
         dest: Option<String>,
-    },
-
-    /// Manage sandbox policy.
-    Policy {
-        #[command(subcommand)]
-        command: PolicyCommands,
-    },
-
-    /// View sandbox logs.
-    Logs {
-        /// Sandbox name (defaults to last-used sandbox).
-        name: Option<String>,
-
-        /// Number of log lines to return.
-        #[arg(short, default_value_t = 200)]
-        n: u32,
 
-        /// Stream live logs.
+        /// Disable `.gitignore` filtering (uploads everything).
         #[arg(long)]
-        tail: bool,
+        no_git_ignore: bool,
+    },
 
-        /// Only show logs from this duration ago (e.g. 5m, 1h, 30s).
-        #[arg(long)]
-        since: Option<String>,
+    /// Download files from a sandbox.
+    Download {
+        /// Sandbox name.
+        #[arg(add = ArgValueCompleter::new(completers::complete_sandbox_names))]
+        name: String,
 
-        /// Filter by log source: "gateway", "sandbox", or "all" (default).
-        /// Can be specified multiple times: --source gateway --source sandbox
-        #[arg(long, default_value = "all")]
-        source: Vec<String>,
+        /// Sandbox path to download.
+        sandbox_path: String,
 
-        /// Minimum log level to display: error, warn, info (default), debug, trace.
-        #[arg(long, default_value = "")]
-        level: String,
+        /// Local destination (defaults to `.`).
+        dest: Option<String>,
     },
 
     /// Print an SSH config entry for a sandbox.
@@ -746,69 +892,6 @@ enum ForwardCommands {
     List,
 }
 
-#[derive(Subcommand, Debug)]
-enum InferenceCommands {
-    /// Create an inference route.
-    Create {
-        /// Optional route name (auto-generated if omitted).
-        #[arg(long)]
-        name: Option<String>,
-        #[arg(long)]
-        routing_hint: String,
-        #[arg(long)]
-        base_url: String,
-        /// Supported protocol(s). Repeat flag or pass comma-separated values.
-        ///
-        /// If omitted, protocols are auto-detected by probing the base URL.
-        #[arg(long = "protocol", value_delimiter = ',')]
-        protocols: Vec<String>,
-        /// API key for the inference endpoint. Defaults to empty (for local models).
-        #[arg(long, default_value = "")]
-        api_key: String,
-        #[arg(long)]
-        model_id: String,
-        #[arg(long)]
-        disabled: bool,
-    },
-
-    /// Update an inference route.
-    Update {
-        /// Route name.
-        name: String,
-        #[arg(long)]
-        routing_hint: String,
-        #[arg(long)]
-        base_url: String,
-        /// Supported protocol(s). Repeat flag or pass comma-separated values.
-        ///
-        /// If omitted, protocols are auto-detected by probing the base URL.
-        #[arg(long = "protocol", value_delimiter = ',')]
-        protocols: Vec<String>,
-        /// API key for the inference endpoint. Defaults to empty (for local models).
-        #[arg(long, default_value = "")]
-        api_key: String,
-        #[arg(long)]
-        model_id: String,
-        #[arg(long)]
-        disabled: bool,
-    },
-
-    /// Delete inference routes.
-    Delete {
-        /// Route names.
-        #[arg(required = true, num_args = 1.., value_name = "NAME")]
-        names: Vec<String>,
-    },
-
-    /// List inference routes.
-    List {
-        #[arg(long, default_value_t = 100)]
-        limit: u32,
-        #[arg(long, default_value_t = 0)]
-        offset: u32,
-    },
-}
-
 #[tokio::main]
 async fn main() -> Result<()> {
     // Install the rustls crypto provider before completion runs — completers may
@@ -838,93 +921,277 @@ async fn main() -> Result<()> {
         .init();
 
     match cli.command {
-        Some(Commands::Cluster { command }) => match command {
-            ClusterCommands::Status => {
-                let ctx = resolve_cluster(&cli.cluster)?;
-                let endpoint = &ctx.endpoint;
-                let tls = tls.with_cluster_name(&ctx.name);
-                run::cluster_status(&ctx.name, endpoint, &tls).await?;
-            }
-            ClusterCommands::Use { name } => {
-                run::cluster_use(&name)?;
-            }
-            ClusterCommands::List => {
-                run::cluster_list(&cli.cluster)?;
-            }
-            ClusterCommands::Admin { command } => match command {
-                ClusterAdminCommands::Deploy {
-                    name,
+        // -----------------------------------------------------------
+        // Gateway commands (was `cluster` / `cluster admin`)
+        // -----------------------------------------------------------
+        Some(Commands::Gateway { command }) => match command {
+            GatewayCommands::Start {
+                name,
+                update_kube_config,
+                get_kubeconfig,
+                remote,
+                ssh_key,
+                port,
+                gateway_host,
+                kube_port,
+                recreate,
+            } => {
+                run::cluster_admin_deploy(
+                    &name,
                     update_kube_config,
                     get_kubeconfig,
-                    remote,
-                    ssh_key,
+                    remote.as_deref(),
+                    ssh_key.as_deref(),
                     port,
-                    gateway_host,
+                    gateway_host.as_deref(),
                     kube_port,
-                } => {
-                    run::cluster_admin_deploy(
-                        &name,
-                        update_kube_config,
-                        get_kubeconfig,
-                        remote.as_deref(),
-                        ssh_key.as_deref(),
-                        port,
-                        gateway_host.as_deref(),
-                        kube_port,
-                    )
-                    .await?;
+                    recreate,
+                )
+                .await?;
+            }
+            GatewayCommands::Stop {
+                name,
+                remote,
+                ssh_key,
+            } => {
+                let name = name
+                    .or_else(|| resolve_cluster_name(&cli.cluster))
+                    .unwrap_or_else(|| "nemoclaw".to_string());
+                run::cluster_admin_stop(&name, remote.as_deref(), ssh_key.as_deref()).await?;
+            }
+            GatewayCommands::Destroy {
+                name,
+                remote,
+                ssh_key,
+            } => {
+                let name = name
+                    .or_else(|| resolve_cluster_name(&cli.cluster))
+                    .unwrap_or_else(|| "nemoclaw".to_string());
+                run::cluster_admin_destroy(&name, remote.as_deref(), ssh_key.as_deref()).await?;
+            }
+            GatewayCommands::Select { name } => {
+                if let Some(name) = name {
+                    run::cluster_use(&name)?;
+                } else {
+                    // No name provided — show available gateways.
+                    run::cluster_list(&cli.cluster)?;
+                    eprintln!();
+                    eprintln!(
+                        "Select a gateway with: {}",
+                        "nemoclaw gateway select <name>".dimmed()
+                    );
                 }
-                ClusterAdminCommands::Stop {
-                    name,
-                    remote,
-                    ssh_key,
-                } => {
-                    let name = name
-                        .or_else(|| resolve_cluster_name(&cli.cluster))
-                        .unwrap_or_else(|| "nemoclaw".to_string());
-                    run::cluster_admin_stop(&name, remote.as_deref(), ssh_key.as_deref()).await?;
+            }
+            GatewayCommands::Info { name } => {
+                let name = name
+                    .or_else(|| resolve_cluster_name(&cli.cluster))
+                    .unwrap_or_else(|| "nemoclaw".to_string());
+                run::cluster_admin_info(&name)?;
+            }
+            GatewayCommands::Tunnel {
+                name,
+                remote,
+                ssh_key,
+                print_command,
+            } => {
+                let name = name
+                    .or_else(|| resolve_cluster_name(&cli.cluster))
+                    .unwrap_or_else(|| "nemoclaw".to_string());
+                run::cluster_admin_tunnel(
+                    &name,
+                    remote.as_deref(),
+                    ssh_key.as_deref(),
+                    print_command,
+                )?;
+            }
+        },
+
+        // -----------------------------------------------------------
+        // Top-level status (was `cluster status`)
+        // -----------------------------------------------------------
+        Some(Commands::Status) => {
+            if let Ok(ctx) = resolve_cluster(&cli.cluster) {
+                let tls = tls.with_cluster_name(&ctx.name);
+                run::cluster_status(&ctx.name, &ctx.endpoint, &tls).await?;
+            } else {
+                println!("{}", "Gateway Status".cyan().bold());
+                println!();
+                println!("  {} No gateway configured.", "Status:".dimmed(),);
+                println!();
+                println!(
+                    "Deploy a gateway with: {}",
+                    "nemoclaw gateway start".dimmed()
+                );
+            }
+        }
+
+        // -----------------------------------------------------------
+        // Top-level forward (was `sandbox forward`)
+        // -----------------------------------------------------------
+        Some(Commands::Forward { command: fwd_cmd }) => match fwd_cmd {
+            ForwardCommands::Stop { port, name } => {
+                let cluster_name = resolve_cluster_name(&cli.cluster).unwrap_or_default();
+                let name = resolve_sandbox_name(name, &cluster_name)?;
+                if run::stop_forward(&name, port)? {
+                    eprintln!(
+                        "{} Stopped forward of port {port} for sandbox {name}",
+                        "✓".green().bold(),
+                    );
+                } else {
+                    eprintln!(
+                        "{} No active forward found for port {port} on sandbox {name}",
+                        "!".yellow(),
+                    );
+                }
+            }
+            ForwardCommands::List => {
+                let forwards = run::list_forwards()?;
+                if forwards.is_empty() {
+                    eprintln!("No active forwards.");
+                } else {
+                    let name_width = forwards
+                        .iter()
+                        .map(|f| f.sandbox.len())
+                        .max()
+                        .unwrap_or(7)
+                        .max(7);
+                    println!(
+                        "{:<width$} {:<8} {:<10} STATUS",
+                        "SANDBOX",
+                        "PORT",
+                        "PID",
+                        width = name_width,
+                    );
+                    for f in &forwards {
+                        let status = if f.alive {
+                            "running".green().to_string()
+                        } else {
+                            "dead".red().to_string()
+                        };
+                        println!(
+                            "{:<width$} {:<8} {:<10} {}",
+                            f.sandbox,
+                            f.port,
+                            f.pid,
+                            status,
+                            width = name_width,
+                        );
+                    }
                 }
-                ClusterAdminCommands::Destroy {
+            }
+            ForwardCommands::Start {
+                port,
+                name,
+                background,
+            } => {
+                let ctx = resolve_cluster(&cli.cluster)?;
+                let tls = tls.with_cluster_name(&ctx.name);
+                let name = resolve_sandbox_name(name, &ctx.name)?;
+                run::sandbox_forward(&ctx.endpoint, &name, port, background, &tls).await?;
+                if background {
+                    eprintln!(
+                        "{} Forwarding port {port} to sandbox {name} in the background",
+                        "✓".green().bold(),
+                    );
+                    eprintln!("  Access at: http://127.0.0.1:{port}/");
+                    eprintln!("  Stop with: nemoclaw forward stop {port} {name}");
+                }
+            }
+        },
+
+        // -----------------------------------------------------------
+        // Top-level logs (was `sandbox logs`)
+        // -----------------------------------------------------------
+        Some(Commands::Logs {
+            name,
+            n,
+            tail,
+            since,
+            source,
+            level,
+        }) => {
+            let ctx = resolve_cluster(&cli.cluster)?;
+            let tls = tls.with_cluster_name(&ctx.name);
+            let name = resolve_sandbox_name(name, &ctx.name)?;
+            run::sandbox_logs(
+                &ctx.endpoint,
+                &name,
+                n,
+                tail,
+                since.as_deref(),
+                &source,
+                &level,
+                &tls,
+            )
+            .await?;
+        }
+
+        // -----------------------------------------------------------
+        // Top-level policy (was `sandbox policy`)
+        // -----------------------------------------------------------
+        Some(Commands::Policy {
+            command: policy_cmd,
+        }) => {
+            let ctx = resolve_cluster(&cli.cluster)?;
+            let tls = tls.with_cluster_name(&ctx.name);
+            match policy_cmd {
+                PolicyCommands::Set {
                     name,
-                    remote,
-                    ssh_key,
+                    policy,
+                    wait,
+                    timeout,
                 } => {
-                    let name = name
-                        .or_else(|| resolve_cluster_name(&cli.cluster))
-                        .unwrap_or_else(|| "nemoclaw".to_string());
-                    run::cluster_admin_destroy(&name, remote.as_deref(), ssh_key.as_deref())
+                    let name = resolve_sandbox_name(name, &ctx.name)?;
+                    run::sandbox_policy_set(&ctx.endpoint, &name, &policy, wait, timeout, &tls)
                         .await?;
                 }
-                ClusterAdminCommands::Info { name } => {
-                    let name = name
-                        .or_else(|| resolve_cluster_name(&cli.cluster))
-                        .unwrap_or_else(|| "nemoclaw".to_string());
-                    run::cluster_admin_info(&name)?;
+                PolicyCommands::Get { name, rev, full } => {
+                    let name = resolve_sandbox_name(name, &ctx.name)?;
+                    run::sandbox_policy_get(&ctx.endpoint, &name, rev, full, &tls).await?;
                 }
-                ClusterAdminCommands::Tunnel {
-                    name,
-                    remote,
-                    ssh_key,
-                    print_command,
-                } => {
-                    let name = name
-                        .or_else(|| resolve_cluster_name(&cli.cluster))
-                        .unwrap_or_else(|| "nemoclaw".to_string());
-                    run::cluster_admin_tunnel(
-                        &name,
-                        remote.as_deref(),
-                        ssh_key.as_deref(),
-                        print_command,
-                    )?;
+                PolicyCommands::List { name, limit } => {
+                    let name = resolve_sandbox_name(name, &ctx.name)?;
+                    run::sandbox_policy_list(&ctx.endpoint, &name, limit, &tls).await?;
                 }
-            },
-        },
+            }
+        }
+
+        // -----------------------------------------------------------
+        // Inference commands
+        // -----------------------------------------------------------
+        Some(Commands::Inference { command }) => {
+            let ctx = resolve_cluster(&cli.cluster)?;
+            let endpoint = &ctx.endpoint;
+            let tls = tls.with_cluster_name(&ctx.name);
+            match command {
+                ClusterInferenceCommands::Set { provider, model } => {
+                    run::cluster_inference_set(endpoint, &provider, &model, &tls).await?;
+                }
+                ClusterInferenceCommands::Update { provider, model } => {
+                    run::cluster_inference_update(
+                        endpoint,
+                        provider.as_deref(),
+                        model.as_deref(),
+                        &tls,
+                    )
+                    .await?;
+                }
+                ClusterInferenceCommands::Get => {
+                    run::cluster_inference_get(endpoint, &tls).await?;
+                }
+            }
+        }
+
+        // -----------------------------------------------------------
+        // Sandbox commands
+        // -----------------------------------------------------------
         Some(Commands::Sandbox { command }) => {
             match command {
                 SandboxCommands::Create {
                     name,
                     from,
-                    sync,
+                    upload,
+                    no_git_ignore,
                     keep,
                     remote,
                     ssh_key,
@@ -933,6 +1200,10 @@ async fn main() -> Result<()> {
                     forward,
                     tty,
                     no_tty,
+                    bootstrap,
+                    no_bootstrap,
+                    auto_providers,
+                    no_auto_providers,
                     command,
                 } => {
                     // Resolve --tty / --no-tty into an Option<bool> override.
@@ -944,14 +1215,38 @@ async fn main() -> Result<()> {
                         None // auto-detect
                     };
 
+                    // Resolve --bootstrap / --no-bootstrap into an Option<bool>.
+                    let bootstrap_override = if no_bootstrap {
+                        Some(false)
+                    } else if bootstrap {
+                        Some(true)
+                    } else {
+                        None // prompt or auto-detect
+                    };
+
+                    // Resolve --auto-providers / --no-auto-providers.
+                    let auto_providers_override = if no_auto_providers {
+                        Some(false)
+                    } else if auto_providers {
+                        Some(true)
+                    } else {
+                        None // prompt or auto-detect
+                    };
+
+                    // Parse --upload spec into (local_path, sandbox_path, git_ignore).
+                    let upload_spec = upload.as_deref().map(|s| {
+                        let (local, remote) = parse_upload_spec(s);
+                        (local, remote, !no_git_ignore)
+                    });
+
                     // For `sandbox create`, a missing cluster is not fatal — the
                     // bootstrap flow inside `sandbox_create` can deploy one.
                     match resolve_cluster(&cli.cluster) {
                         Ok(ctx) => {
                             if remote.is_some() {
                                 eprintln!(
-                                    "{} --remote ignored: cluster '{}' is already active. \
-                                     To redeploy, use: nemoclaw cluster admin deploy",
+                                    "{} --remote ignored: gateway '{}' is already active. \
+                                     To redeploy, use: nemoclaw gateway start",
                                     "!".yellow(),
                                     ctx.name,
                                 );
@@ -964,7 +1259,7 @@ async fn main() -> Result<()> {
                                 name.as_deref(),
                                 from.as_deref(),
                                 &ctx.name,
-                                sync,
+                                upload_spec.as_ref(),
                                 keep,
                                 remote.as_deref(),
                                 ssh_key.as_deref(),
@@ -973,6 +1268,8 @@ async fn main() -> Result<()> {
                                 forward,
                                 &command,
                                 tty_override,
+                                bootstrap_override,
+                                auto_providers_override,
                                 &tls,
                             ))
                             .await?;
@@ -982,7 +1279,7 @@ async fn main() -> Result<()> {
                             Box::pin(run::sandbox_create_with_bootstrap(
                                 name.as_deref(),
                                 from.as_deref(),
-                                sync,
+                                upload_spec.as_ref(),
                                 keep,
                                 remote.as_deref(),
                                 ssh_key.as_deref(),
@@ -991,90 +1288,78 @@ async fn main() -> Result<()> {
                                 forward,
                                 &command,
                                 tty_override,
+                                bootstrap_override,
+                                auto_providers_override,
                             ))
                             .await?;
                         }
                     }
                 }
-                SandboxCommands::Forward {
-                    command: ForwardCommands::Stop { port, name },
+                SandboxCommands::Upload {
+                    name,
+                    local_path,
+                    dest,
+                    no_git_ignore,
                 } => {
-                    let cluster_name = resolve_cluster_name(&cli.cluster).unwrap_or_default();
-                    let name = resolve_sandbox_name(name, &cluster_name)?;
-                    if run::stop_forward(&name, port)? {
-                        eprintln!(
-                            "{} Stopped forward of port {port} for sandbox {name}",
-                            "✓".green().bold(),
-                        );
-                    } else {
-                        eprintln!(
-                            "{} No active forward found for port {port} on sandbox {name}",
-                            "!".yellow(),
-                        );
+                    let ctx = resolve_cluster(&cli.cluster)?;
+                    let tls = tls.with_cluster_name(&ctx.name);
+                    let sandbox_dest = dest.as_deref().unwrap_or("/sandbox");
+                    let local = std::path::Path::new(&local_path);
+                    if !local.exists() {
+                        return Err(miette::miette!(
+                            "local path does not exist: {}",
+                            local.display()
+                        ));
+                    }
+                    eprintln!("Uploading {} -> sandbox:{}", local.display(), sandbox_dest);
+                    if !no_git_ignore
+                        && let Ok(repo_root) = run::git_repo_root()
+                        && let Ok(files) = run::git_sync_files(&repo_root)
+                        && !files.is_empty()
+                    {
+                        run::sandbox_sync_up_files(
+                            &ctx.endpoint,
+                            &name,
+                            &repo_root,
+                            &files,
+                            sandbox_dest,
+                            &tls,
+                        )
+                        .await?;
+                        eprintln!("{} Upload complete", "✓".green().bold());
+                        return Ok(());
                     }
+                    // Fallback: upload without git filtering
+                    run::sandbox_sync_up(&ctx.endpoint, &name, local, sandbox_dest, &tls).await?;
+                    eprintln!("{} Upload complete", "✓".green().bold());
                 }
-                SandboxCommands::Forward {
-                    command: ForwardCommands::List,
+                SandboxCommands::Download {
+                    name,
+                    sandbox_path,
+                    dest,
                 } => {
-                    let forwards = run::list_forwards()?;
-                    if forwards.is_empty() {
-                        eprintln!("No active forwards.");
-                    } else {
-                        let name_width = forwards
-                            .iter()
-                            .map(|f| f.sandbox.len())
-                            .max()
-                            .unwrap_or(7)
-                            .max(7); // at least as wide as "SANDBOX"
-                        println!(
-                            "{:<width$} {:<8} {:<10} STATUS",
-                            "SANDBOX",
-                            "PORT",
-                            "PID",
-                            width = name_width,
-                        );
-                        for f in &forwards {
-                            let status = if f.alive {
-                                "running".green().to_string()
-                            } else {
-                                "dead".red().to_string()
-                            };
-                            println!(
-                                "{:<width$} {:<8} {:<10} {}",
-                                f.sandbox,
-                                f.port,
-                                f.pid,
-                                status,
-                                width = name_width,
-                            );
-                        }
-                    }
+                    let ctx = resolve_cluster(&cli.cluster)?;
+                    let tls = tls.with_cluster_name(&ctx.name);
+                    let local_dest = std::path::Path::new(dest.as_deref().unwrap_or("."));
+                    eprintln!(
+                        "Downloading sandbox:{} -> {}",
+                        sandbox_path,
+                        local_dest.display()
+                    );
+                    run::sandbox_sync_down(&ctx.endpoint, &name, &sandbox_path, local_dest, &tls)
+                        .await?;
+                    eprintln!("{} Download complete", "✓".green().bold());
                 }
                 other => {
                     let ctx = resolve_cluster(&cli.cluster)?;
                     let endpoint = &ctx.endpoint;
                     let tls = tls.with_cluster_name(&ctx.name);
                     match other {
-                        SandboxCommands::Create { .. } => {
+                        SandboxCommands::Create { .. }
+                        | SandboxCommands::Upload { .. }
+                        | SandboxCommands::Download { .. } => {
                             unreachable!()
                         }
-                        SandboxCommands::Sync {
-                            name,
-                            up,
-                            down,
-                            dest,
-                        } => {
-                            let name = resolve_sandbox_name(name, &ctx.name)?;
-                            run::sandbox_sync_command(
-                                endpoint,
-                                &name,
-                                up.as_deref(),
-                                down.as_deref(),
-                                dest.as_deref(),
-                                &tls,
-                            )
-                            .await?;
-                        }
                         SandboxCommands::Get { name } => {
                             let name = resolve_sandbox_name(name, &ctx.name)?;
                             run::sandbox_get(endpoint, &name, &tls).await?;
@@ -1095,73 +1380,6 @@ async fn main() -> Result<()> {
                             let _ = save_last_sandbox(&ctx.name, &name);
                             run::sandbox_connect(endpoint, &name, &tls).await?;
                         }
-                        SandboxCommands::Forward { command: fwd } => match fwd {
-                            ForwardCommands::Start {
-                                port,
-                                name,
-                                background,
-                            } => {
-                                let name = resolve_sandbox_name(name, &ctx.name)?;
-                                run::sandbox_forward(endpoint, &name, port, background, &tls)
-                                    .await?;
-                                if background {
-                                    eprintln!(
-                                        "{} Forwarding port {port} to sandbox {name} in the background",
-                                        "✓".green().bold(),
-                                    );
-                                    eprintln!("  Access at: http://127.0.0.1:{port}/");
-                                    eprintln!(
-                                        "  Stop with: nemoclaw sandbox forward stop {port} {name}",
-                                    );
-                                }
-                            }
-                            ForwardCommands::Stop { .. } | ForwardCommands::List => unreachable!(),
-                        },
-                        SandboxCommands::Policy {
-                            command: policy_cmd,
-                        } => match policy_cmd {
-                            PolicyCommands::Set {
-                                name,
-                                policy,
-                                wait,
-                                timeout,
-                            } => {
-                                let name = resolve_sandbox_name(name, &ctx.name)?;
-                                run::sandbox_policy_set(
-                                    endpoint, &name, &policy, wait, timeout, &tls,
-                                )
-                                .await?;
-                            }
-                            PolicyCommands::Get { name, rev, full } => {
-                                let name = resolve_sandbox_name(name, &ctx.name)?;
-                                run::sandbox_policy_get(endpoint, &name, rev, full, &tls).await?;
-                            }
-                            PolicyCommands::List { name, limit } => {
-                                let name = resolve_sandbox_name(name, &ctx.name)?;
-                                run::sandbox_policy_list(endpoint, &name, limit, &tls).await?;
-                            }
-                        },
-                        SandboxCommands::Logs {
-                            name,
-                            n,
-                            tail,
-                            since,
-                            source,
-                            level,
-                        } => {
-                            let name = resolve_sandbox_name(name, &ctx.name)?;
-                            run::sandbox_logs(
-                                endpoint,
-                                &name,
-                                n,
-                                tail,
-                                since.as_deref(),
-                                &source,
-                                &level,
-                                &tls,
-                            )
-                            .await?;
-                        }
                         SandboxCommands::SshConfig { name } => {
                             let name = resolve_sandbox_name(name, &ctx.name)?;
                             run::print_ssh_config(&ctx.name, &name);
@@ -1170,64 +1388,6 @@ async fn main() -> Result<()> {
                 }
             }
         }
-        Some(Commands::Inference { command }) => {
-            let ctx = resolve_cluster(&cli.cluster)?;
-            let endpoint = &ctx.endpoint;
-            let tls = tls.with_cluster_name(&ctx.name);
-
-            match command {
-                InferenceCommands::Create {
-                    name,
-                    routing_hint,
-                    base_url,
-                    protocols,
-                    api_key,
-                    model_id,
-                    disabled,
-                } => {
-                    run::inference_route_create(
-                        endpoint,
-                        name.as_deref(),
-                        &routing_hint,
-                        &base_url,
-                        &protocols,
-                        &api_key,
-                        &model_id,
-                        !disabled,
-                        &tls,
-                    )
-                    .await?;
-                }
-                InferenceCommands::Update {
-                    name,
-                    routing_hint,
-                    base_url,
-                    protocols,
-                    api_key,
-                    model_id,
-                    disabled,
-                } => {
-                    run::inference_route_update(
-                        endpoint,
-                        &name,
-                        &routing_hint,
-                        &base_url,
-                        &protocols,
-                        &api_key,
-                        &model_id,
-                        !disabled,
-                        &tls,
-                    )
-                    .await?;
-                }
-                InferenceCommands::Delete { names } => {
-                    run::inference_route_delete(endpoint, &names, &tls).await?;
-                }
-                InferenceCommands::List { limit, offset } => {
-                    run::inference_route_list(endpoint, limit, offset, &tls).await?;
-                }
-            }
-        }
         Some(Commands::Provider { command }) => {
             let ctx = resolve_cluster(&cli.cluster)?;
             let endpoint = &ctx.endpoint;
@@ -1285,7 +1445,7 @@ async fn main() -> Result<()> {
                 }
             }
         }
-        Some(Commands::Gator) => {
+        Some(Commands::Term) => {
             let ctx = resolve_cluster(&cli.cluster)?;
             let tls = tls.with_cluster_name(&ctx.name);
             let channel = navigator_cli::tls::build_channel(&ctx.endpoint, &tls).await?;
@@ -1326,9 +1486,9 @@ async fn main() -> Result<()> {
                     } else {
                         let meta = load_cluster_metadata(&c).map_err(|_| {
                             miette::miette!(
-                                "Unknown cluster '{c}'.\n\
-                                  Deploy it first: nemoclaw cluster admin deploy --name {c}\n\
-                                  Or list available clusters: nemoclaw cluster list"
+                                "Unknown gateway '{c}'.\n\
+                                  Deploy it first: nemoclaw gateway start --name {c}\n\
+                                  Or list available gateways: nemoclaw gateway select"
                             )
                         })?;
                         meta.gateway_endpoint
@@ -1347,6 +1507,66 @@ async fn main() -> Result<()> {
                 }
             }
         }
+
+        // -----------------------------------------------------------
+        // Hidden backwards-compat: `cluster admin deploy`
+        // -----------------------------------------------------------
+        Some(Commands::Cluster { command }) => match command {
+            ClusterCommands::Admin { command } => match command {
+                ClusterAdminCommands::Deploy {
+                    name,
+                    update_kube_config,
+                    get_kubeconfig,
+                    remote,
+                    ssh_key,
+                    port,
+                    gateway_host,
+                    kube_port,
+                    recreate,
+                } => {
+                    eprintln!(
+                        "{} `nemoclaw cluster admin deploy` is deprecated. \
+                         Use `nemoclaw gateway start` instead.",
+                        "warning:".yellow().bold(),
+                    );
+                    run::cluster_admin_deploy(
+                        &name,
+                        update_kube_config,
+                        get_kubeconfig,
+                        remote.as_deref(),
+                        ssh_key.as_deref(),
+                        port,
+                        gateway_host.as_deref(),
+                        kube_port,
+                        recreate,
+                    )
+                    .await?;
+                }
+            },
+            ClusterCommands::Inference { command } => {
+                let ctx = resolve_cluster(&cli.cluster)?;
+                let endpoint = &ctx.endpoint;
+                let tls = tls.with_cluster_name(&ctx.name);
+                match command {
+                    ClusterInferenceCommands::Set { provider, model } => {
+                        run::cluster_inference_set(endpoint, &provider, &model, &tls).await?;
+                    }
+                    ClusterInferenceCommands::Update { provider, model } => {
+                        run::cluster_inference_update(
+                            endpoint,
+                            provider.as_deref(),
+                            model.as_deref(),
+                            &tls,
+                        )
+                        .await?;
+                    }
+                    ClusterInferenceCommands::Get => {
+                        run::cluster_inference_get(endpoint, &tls).await?;
+                    }
+                }
+            }
+        },
+
         None => {
             Cli::command().print_help().expect("Failed to print help");
         }
@@ -1355,6 +1575,22 @@ async fn main() -> Result<()> {
     Ok(())
 }
 
+/// Parse an upload spec like `<local>[:<remote>]` into (local_path, optional_sandbox_path).
+fn parse_upload_spec(spec: &str) -> (String, Option<String>) {
+    if let Some((local, remote)) = spec.split_once(':') {
+        (
+            local.to_string(),
+            if remote.is_empty() {
+                None
+            } else {
+                Some(remote.to_string())
+            },
+        )
+    } else {
+        (spec.to_string(), None)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1422,7 +1658,7 @@ mod tests {
     #[test]
     fn completions_policy_flag_falls_back_to_file_paths() {
         let temp = tempfile::tempdir().expect("failed to create tempdir");
-        std::fs::write(temp.path().join("policy.yaml"), "version: 1\n")
+        fs::write(temp.path().join("policy.yaml"), "version: 1\n")
             .expect("failed to create policy file");
 
         let mut cmd = Cli::command();
@@ -1449,15 +1685,15 @@ mod tests {
     #[test]
     fn completions_other_path_flags_fall_back_to_path_candidates() {
         let temp = tempfile::tempdir().expect("failed to create tempdir");
-        std::fs::write(temp.path().join("id_rsa"), "key").expect("failed to create key file");
-        std::fs::write(temp.path().join("Dockerfile"), "FROM scratch\n")
+        fs::write(temp.path().join("id_rsa"), "key").expect("failed to create key file");
+        fs::write(temp.path().join("Dockerfile"), "FROM scratch\n")
             .expect("failed to create dockerfile");
-        std::fs::create_dir(temp.path().join("ctx")).expect("failed to create context directory");
+        fs::create_dir(temp.path().join("ctx")).expect("failed to create context directory");
 
         let cases: Vec<(Vec<&str>, usize, &str)> = vec![
             (
-                vec!["nemoclaw", "cluster", "admin", "deploy", "--ssh-key", "id"],
-                5,
+                vec!["nemoclaw", "gateway", "start", "--ssh-key", "id"],
+                4,
                 "id_rsa",
             ),
             (
@@ -1466,8 +1702,8 @@ mod tests {
                 "id_rsa",
             ),
             (
-                vec!["nemoclaw", "sandbox", "sync", "demo", "--up", "Do"],
-                5,
+                vec!["nemoclaw", "sandbox", "upload", "demo", "Do"],
+                4,
                 "Dockerfile",
             ),
         ];
@@ -1491,26 +1727,26 @@ mod tests {
     }
 
     #[test]
-    fn sandbox_sync_up_uses_path_value_hint() {
+    fn sandbox_upload_uses_path_value_hint() {
         let cmd = Cli::command();
         let sandbox = cmd
             .get_subcommands()
             .find(|c| c.get_name() == "sandbox")
             .expect("missing sandbox subcommand");
-        let sync = sandbox
+        let upload = sandbox
             .get_subcommands()
-            .find(|c| c.get_name() == "sync")
-            .expect("missing sandbox sync subcommand");
-        let up = sync
+            .find(|c| c.get_name() == "upload")
+            .expect("missing sandbox upload subcommand");
+        let local_path = upload
             .get_arguments()
-            .find(|arg| arg.get_id() == "up")
-            .expect("missing --up argument");
+            .find(|arg| arg.get_id() == "local_path")
+            .expect("missing local_path argument");
 
-        assert_eq!(up.get_value_hint(), ValueHint::AnyPath);
+        assert_eq!(local_path.get_value_hint(), ValueHint::AnyPath);
     }
 
     #[test]
-    fn sandbox_sync_up_completion_suggests_local_paths() {
+    fn sandbox_upload_completion_suggests_local_paths() {
         let temp = tempfile::tempdir().expect("failed to create tempdir");
         fs::write(temp.path().join("sample.txt"), "x").expect("failed to create sample file");
 
@@ -1518,12 +1754,11 @@ mod tests {
         let args: Vec<OsString> = vec![
             "nemoclaw".into(),
             "sandbox".into(),
-            "sync".into(),
+            "upload".into(),
             "demo".into(),
-            "--up".into(),
             "sa".into(),
         ];
-        let candidates = clap_complete::engine::complete(&mut cmd, args, 5, Some(temp.path()))
+        let candidates = clap_complete::engine::complete(&mut cmd, args, 4, Some(temp.path()))
             .expect("completion engine failed");
 
         let names: Vec<String> = candidates
@@ -1532,10 +1767,31 @@ mod tests {
             .collect();
         assert!(
             names.iter().any(|name| name.contains("sample.txt")),
-            "expected path completion for --up, got: {names:?}"
+            "expected path completion for upload local_path, got: {names:?}"
         );
     }
 
+    #[test]
+    fn parse_upload_spec_without_remote() {
+        let (local, remote) = parse_upload_spec("./src");
+        assert_eq!(local, "./src");
+        assert_eq!(remote, None);
+    }
+
+    #[test]
+    fn parse_upload_spec_with_remote() {
+        let (local, remote) = parse_upload_spec("./src:/sandbox/src");
+        assert_eq!(local, "./src");
+        assert_eq!(remote, Some("/sandbox/src".to_string()));
+    }
+
+    #[test]
+    fn parse_upload_spec_with_trailing_colon() {
+        let (local, remote) = parse_upload_spec("./src:");
+        assert_eq!(local, "./src");
+        assert_eq!(remote, None);
+    }
+
     #[test]
     fn resolve_sandbox_name_returns_explicit_name() {
         // When a name is provided, it should be returned regardless of any
diff --git a/crates/navigator-cli/src/run.rs b/crates/navigator-cli/src/run.rs
index 68d49a23..84829683 100644
--- a/crates/navigator-cli/src/run.rs
+++ b/crates/navigator-cli/src/run.rs
@@ -22,19 +22,17 @@ use navigator_bootstrap::{
 };
 use navigator_core::proto::navigator_client::NavigatorClient;
 use navigator_core::proto::{
-    CreateInferenceRouteRequest, CreateProviderRequest, CreateSandboxRequest,
-    DeleteInferenceRouteRequest, DeleteProviderRequest, DeleteSandboxRequest, GetProviderRequest,
-    GetSandboxLogsRequest, GetSandboxPolicyStatusRequest, GetSandboxRequest, HealthRequest,
-    InferenceRoute, InferenceRouteSpec, ListInferenceRoutesRequest, ListProvidersRequest,
+    CreateProviderRequest, CreateSandboxRequest, DeleteProviderRequest, DeleteSandboxRequest,
+    GetClusterInferenceRequest, GetProviderRequest, GetSandboxLogsRequest,
+    GetSandboxPolicyStatusRequest, GetSandboxRequest, HealthRequest, ListProvidersRequest,
     ListSandboxPoliciesRequest, ListSandboxesRequest, PolicyStatus, Provider, Sandbox,
-    SandboxPhase, SandboxPolicy, SandboxSpec, SandboxTemplate, UpdateInferenceRouteRequest,
+    SandboxPhase, SandboxPolicy, SandboxSpec, SandboxTemplate, SetClusterInferenceRequest,
     UpdateProviderRequest, UpdateSandboxPolicyRequest, WatchSandboxRequest,
 };
 use navigator_providers::{
     ProviderRegistry, detect_provider_from_command, normalize_provider_type,
 };
 use owo_colors::OwoColorize;
-use reqwest::StatusCode as ReqwestStatusCode;
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::io::{IsTerminal, Write};
 use std::path::{Path, PathBuf};
@@ -511,14 +509,14 @@ pub fn cluster_use(name: &str) -> Result<()> {
     // Verify the cluster exists
     get_cluster_metadata(name).ok_or_else(|| {
         miette::miette!(
-            "No cluster metadata found for '{name}'.\n\
-             Deploy a cluster first with: nemoclaw cluster admin deploy --name {name}\n\
-             Or list available clusters: nemoclaw cluster list"
+            "No gateway metadata found for '{name}'.\n\
+              Deploy a gateway first with: nemoclaw gateway start --name {name}\n\
+              Or list available gateways: nemoclaw gateway select"
         )
     })?;
 
     save_active_cluster(name)?;
-    eprintln!("{} Active cluster set to '{name}'", "✓".green().bold());
+    eprintln!("{} Active gateway set to '{name}'", "✓".green().bold());
     Ok(())
 }
 
@@ -528,11 +526,11 @@ pub fn cluster_list(cluster_flag: &Option<String>) -> Result<()> {
     let active = cluster_flag.clone().or_else(load_active_cluster);
 
     if clusters.is_empty() {
-        println!("No clusters found.");
+        println!("No gateways found.");
         println!();
         println!(
-            "Deploy a cluster with: {}",
-            "nemoclaw cluster admin deploy".dimmed()
+            "Deploy a gateway with: {}",
+            "nemoclaw gateway start".dimmed()
         );
         return Ok(());
     }
@@ -635,7 +633,7 @@ fn prompt_existing_cluster(
 /// Deploy a cluster with the rich progress panel (interactive) or simple
 /// logging (non-interactive). Returns the [`ClusterHandle`] on success.
 ///
-/// This is the shared deploy UX used by both `cluster admin deploy` and
+/// This is the shared deploy UX used by both `gateway start` and
 /// the auto-bootstrap path in `sandbox create`.
 pub(crate) async fn deploy_cluster_with_panel(
     options: DeployOptions,
@@ -716,6 +714,7 @@ pub async fn cluster_admin_deploy(
     port: u16,
     gateway_host: Option<&str>,
     kube_port: Option<u16>,
+    recreate: bool,
 ) -> Result<()> {
     let location = if remote.is_some() { "remote" } else { "local" };
 
@@ -741,8 +740,9 @@ pub async fn cluster_admin_deploy(
 
     let interactive = std::io::stderr().is_terminal();
 
-    // Check for existing cluster and prompt user if found
-    if interactive {
+    // Check for existing cluster and prompt user if found.
+    // --recreate skips the prompt and always destroys.
+    {
         let remote_opts = remote.map(|dest| {
             let mut opts = RemoteOptions::new(dest);
             if let Some(key) = ssh_key {
@@ -753,8 +753,15 @@ pub async fn cluster_admin_deploy(
         if let Some(info) =
             navigator_bootstrap::check_existing_deployment(name, remote_opts.as_ref()).await?
         {
-            let recreate = prompt_existing_cluster(name, &info)?;
-            if recreate {
+            let should_recreate = if recreate {
+                true
+            } else if interactive {
+                prompt_existing_cluster(name, &info)?
+            } else {
+                false // non-interactive without --recreate: silently reuse
+            };
+
+            if should_recreate {
                 eprintln!("• Destroying existing cluster...");
                 let handle =
                     navigator_bootstrap::cluster_handle(name, remote_opts.as_ref()).await?;
@@ -866,8 +873,8 @@ pub async fn cluster_admin_destroy(
 pub fn cluster_admin_info(name: &str) -> Result<()> {
     let metadata = get_cluster_metadata(name).ok_or_else(|| {
         miette::miette!(
-            "No cluster metadata found for '{name}'.\n\
-             Deploy a cluster first with: nemoclaw cluster admin deploy --name {name}"
+            "No gateway metadata found for '{name}'.\n\
+              Deploy a gateway first with: nemoclaw gateway start --name {name}"
         )
     })?;
 
@@ -902,7 +909,7 @@ pub fn cluster_admin_info(name: &str) -> Result<()> {
         if let (Some(host), Some(kube_port)) = (&metadata.remote_host, metadata.kube_port) {
             println!();
             println!("{}", "SSH tunnel for kubectl access:".dimmed());
-            println!("  nemoclaw cluster admin tunnel --name {name}");
+            println!("  nemoclaw gateway tunnel --name {name}");
             println!("Or manually:");
             println!("  ssh -L {kube_port}:127.0.0.1:6443 {host}");
         }
@@ -969,7 +976,7 @@ pub fn cluster_admin_tunnel(
 pub async fn sandbox_create_with_bootstrap(
     name: Option<&str>,
     from: Option<&str>,
-    sync: bool,
+    upload: Option<&(String, Option<String>, bool)>,
     keep: bool,
     remote: Option<&str>,
     ssh_key: Option<&str>,
@@ -978,12 +985,14 @@ pub async fn sandbox_create_with_bootstrap(
     forward: Option<u16>,
     command: &[String],
     tty_override: Option<bool>,
+    bootstrap_override: Option<bool>,
+    auto_providers_override: Option<bool>,
 ) -> Result<()> {
-    if !crate::bootstrap::confirm_bootstrap()? {
+    if !crate::bootstrap::confirm_bootstrap(bootstrap_override)? {
         return Err(miette::miette!(
-            "No active cluster.\n\
-             Set one with: nemoclaw cluster use <name>\n\
-             Or deploy a new cluster: nemoclaw cluster admin deploy"
+            "No active gateway.\n\
+             Set one with: nemoclaw gateway select <name>\n\
+             Or deploy a new gateway: nemoclaw gateway start"
         ));
     }
     let (tls, server) = crate::bootstrap::run_bootstrap(remote, ssh_key).await?;
@@ -994,7 +1003,7 @@ pub async fn sandbox_create_with_bootstrap(
         name,
         from,
         cluster_name,
-        sync,
+        upload,
         keep,
         remote,
         ssh_key,
@@ -1003,6 +1012,8 @@ pub async fn sandbox_create_with_bootstrap(
         forward,
         command,
         tty_override,
+        bootstrap_override,
+        auto_providers_override,
         &tls,
     )
     .await
@@ -1015,7 +1026,7 @@ pub async fn sandbox_create(
     name: Option<&str>,
     from: Option<&str>,
     cluster_name: &str,
-    sync: bool,
+    upload: Option<&(String, Option<String>, bool)>,
     keep: bool,
     remote: Option<&str>,
     ssh_key: Option<&str>,
@@ -1024,6 +1035,8 @@ pub async fn sandbox_create(
     forward: Option<u16>,
     command: &[String],
     tty_override: Option<bool>,
+    bootstrap_override: Option<bool>,
+    auto_providers_override: Option<bool>,
     tls: &TlsOptions,
 ) -> Result<()> {
     // Try connecting to the cluster. If it fails due to an unreachable cluster,
@@ -1034,7 +1047,7 @@ pub async fn sandbox_create(
             if !crate::bootstrap::should_attempt_bootstrap(&err, tls) {
                 return Err(err);
             }
-            if !crate::bootstrap::confirm_bootstrap()? {
+            if !crate::bootstrap::confirm_bootstrap(bootstrap_override)? {
                 return Err(err);
             }
             let (new_tls, new_server) = crate::bootstrap::run_bootstrap(remote, ssh_key).await?;
@@ -1065,8 +1078,13 @@ pub async fn sandbox_create(
     };
 
     let inferred_types: Vec<String> = inferred_provider_type(command).into_iter().collect();
-    let configured_providers =
-        ensure_required_providers(&mut client, providers, &inferred_types).await?;
+    let configured_providers = ensure_required_providers(
+        &mut client,
+        providers,
+        &inferred_types,
+        auto_providers_override,
+    )
+    .await?;
 
     let mut policy = load_sandbox_policy(policy)?;
 
@@ -1247,16 +1265,29 @@ pub async fn sandbox_create(
             drop(stream);
             drop(client);
 
-            if sync {
-                let repo_root = git_repo_root()?;
-                let files = git_sync_files(&repo_root)?;
-                if !files.is_empty() {
+            if let Some((local_path, sandbox_path, git_ignore)) = upload {
+                let dest = sandbox_path.as_deref().unwrap_or("/sandbox");
+                let local = Path::new(local_path);
+                if *git_ignore
+                    && let Ok(repo_root) = git_repo_root()
+                    && let Ok(files) = git_sync_files(&repo_root)
+                    && !files.is_empty()
+                {
                     sandbox_sync_up_files(
                         &effective_server,
                         &sandbox_name,
                         &repo_root,
                         &files,
-                        "/sandbox",
+                        dest,
+                        &effective_tls,
+                    )
+                    .await?;
+                } else if local.exists() {
+                    sandbox_sync_up(
+                        &effective_server,
+                        &sandbox_name,
+                        local,
+                        dest,
                         &effective_tls,
                     )
                     .await?;
@@ -1280,7 +1311,7 @@ pub async fn sandbox_create(
                     "✓".green().bold(),
                 );
                 eprintln!("Access at: http://127.0.0.1:{port}/");
-                eprintln!("Stop with: nemoclaw sandbox forward stop {port} {sandbox_name}",);
+                eprintln!("Stop with: nemoclaw forward stop {port} {sandbox_name}",);
             }
 
             if command.is_empty() {
@@ -1752,6 +1783,7 @@ async fn ensure_required_providers(
     client: &mut NavigatorClient<Channel>,
     explicit_names: &[String],
     inferred_types: &[String],
+    auto_providers_override: Option<bool>,
 ) -> Result<Vec<String>> {
     if explicit_names.is_empty() && inferred_types.is_empty() {
         return Ok(Vec::new());
@@ -1811,9 +1843,23 @@ async fn ensure_required_providers(
             .collect::<Vec<_>>();
 
         if !missing.is_empty() {
-            if !std::io::stdin().is_terminal() {
+            // --no-auto-providers: skip all missing providers silently.
+            if auto_providers_override == Some(false) {
+                for provider_type in &missing {
+                    eprintln!(
+                        "{} Skipping provider '{provider_type}' (--no-auto-providers)",
+                        "!".yellow(),
+                    );
+                }
+                return Ok(configured_names);
+            }
+
+            // No override and non-interactive: error.
+            if auto_providers_override.is_none() && !std::io::stdin().is_terminal() {
                 return Err(miette::miette!(
-                    "missing required providers: {}. Create them first with `nemoclaw provider create --type <type> --name <name> --from-existing`, or set them up manually from inside the sandbox",
+                    "missing required providers: {}. Create them first with \
+                     `nemoclaw provider create --type <type> --name <name> --from-existing`, \
+                     pass --auto-providers to auto-create, or set them up manually from inside the sandbox",
                     missing.join(", ")
                 ));
             }
@@ -1821,11 +1867,17 @@ async fn ensure_required_providers(
             let registry = ProviderRegistry::new();
             for provider_type in missing {
                 eprintln!("Missing provider: {provider_type}");
-                let should_create = Confirm::new()
-                    .with_prompt("Create from local credentials?")
-                    .default(true)
-                    .interact()
-                    .into_diagnostic()?;
+
+                // --auto-providers: auto-confirm all.
+                let should_create = if auto_providers_override == Some(true) {
+                    true
+                } else {
+                    Confirm::new()
+                        .with_prompt("Create from local credentials?")
+                        .default(true)
+                        .interact()
+                        .into_diagnostic()?
+                };
 
                 if !should_create {
                     eprintln!("{} Skipping provider '{provider_type}'", "!".yellow(),);
@@ -2219,377 +2271,88 @@ pub async fn provider_delete(server: &str, names: &[String], tls: &TlsOptions) -
     Ok(())
 }
 
-#[allow(clippy::too_many_arguments)]
-pub async fn inference_route_create(
+pub async fn cluster_inference_set(
     server: &str,
-    name: Option<&str>,
-    routing_hint: &str,
-    base_url: &str,
-    protocols: &[String],
-    api_key: &str,
+    provider_name: &str,
     model_id: &str,
-    enabled: bool,
     tls: &TlsOptions,
 ) -> Result<()> {
-    let spinner = inference_route_spinner("Preparing inference route...");
-    let (resolved_protocols, auto_detected) =
-        match resolve_route_protocols(protocols, base_url, api_key, model_id, Some(&spinner)).await
-        {
-            Ok(result) => result,
-            Err(err) => {
-                spinner.finish_and_clear();
-                return Err(err);
-            }
-        };
-
-    spinner.set_message("Creating inference route...".to_string());
-
-    let mut client = match grpc_inference_client(server, tls).await {
-        Ok(client) => client,
-        Err(err) => {
-            spinner.finish_and_clear();
-            return Err(err);
-        }
-    };
-
+    let mut client = grpc_inference_client(server, tls).await?;
     let response = client
-        .create_inference_route(CreateInferenceRouteRequest {
-            name: name.unwrap_or_default().to_string(),
-            route: Some(InferenceRouteSpec {
-                routing_hint: routing_hint.to_string(),
-                base_url: base_url.to_string(),
-                protocols: resolved_protocols.clone(),
-                api_key: api_key.to_string(),
-                model_id: model_id.to_string(),
-                enabled,
-            }),
+        .set_cluster_inference(SetClusterInferenceRequest {
+            provider_name: provider_name.to_string(),
+            model_id: model_id.to_string(),
         })
         .await
-        .into_diagnostic();
-
-    let response = match response {
-        Ok(response) => response,
-        Err(err) => {
-            spinner.finish_and_clear();
-            return Err(err);
-        }
-    };
-
-    spinner.finish_and_clear();
+        .into_diagnostic()?;
 
-    if let Some(route) = response.into_inner().route {
-        println!("{} Created route {}", "✓".green().bold(), route.name);
-        if auto_detected {
-            println!(
-                "  {} {}",
-                "Detected protocols:".dimmed(),
-                resolved_protocols.join(", ")
-            );
-        }
-    }
+    let configured = response.into_inner();
+    println!("{}", "Cluster inference configured:".cyan().bold());
+    println!();
+    println!("  {} {}", "Provider:".dimmed(), configured.provider_name);
+    println!("  {} {}", "Model:".dimmed(), configured.model_id);
+    println!("  {} {}", "Version:".dimmed(), configured.version);
     Ok(())
 }
 
-#[allow(clippy::too_many_arguments)]
-pub async fn inference_route_update(
+pub async fn cluster_inference_update(
     server: &str,
-    name: &str,
-    routing_hint: &str,
-    base_url: &str,
-    protocols: &[String],
-    api_key: &str,
-    model_id: &str,
-    enabled: bool,
+    provider_name: Option<&str>,
+    model_id: Option<&str>,
     tls: &TlsOptions,
 ) -> Result<()> {
-    let spinner = inference_route_spinner("Preparing inference route update...");
-    let (resolved_protocols, auto_detected) =
-        match resolve_route_protocols(protocols, base_url, api_key, model_id, Some(&spinner)).await
-        {
-            Ok(result) => result,
-            Err(err) => {
-                spinner.finish_and_clear();
-                return Err(err);
-            }
-        };
+    if provider_name.is_none() && model_id.is_none() {
+        return Err(miette::miette!(
+            "at least one of --provider or --model must be specified"
+        ));
+    }
 
-    spinner.set_message(format!("Updating inference route {name}..."));
+    let mut client = grpc_inference_client(server, tls).await?;
 
-    let mut client = match grpc_inference_client(server, tls).await {
-        Ok(client) => client,
-        Err(err) => {
-            spinner.finish_and_clear();
-            return Err(err);
-        }
-    };
+    // Fetch current config to use as base for the partial update.
+    let current = client
+        .get_cluster_inference(GetClusterInferenceRequest {})
+        .await
+        .into_diagnostic()?
+        .into_inner();
+
+    let provider = provider_name.unwrap_or(&current.provider_name);
+    let model = model_id.unwrap_or(&current.model_id);
 
     let response = client
-        .update_inference_route(UpdateInferenceRouteRequest {
-            name: name.to_string(),
-            route: Some(InferenceRouteSpec {
-                routing_hint: routing_hint.to_string(),
-                base_url: base_url.to_string(),
-                protocols: resolved_protocols.clone(),
-                api_key: api_key.to_string(),
-                model_id: model_id.to_string(),
-                enabled,
-            }),
+        .set_cluster_inference(SetClusterInferenceRequest {
+            provider_name: provider.to_string(),
+            model_id: model.to_string(),
         })
         .await
-        .into_diagnostic();
-
-    let response = match response {
-        Ok(response) => response,
-        Err(err) => {
-            spinner.finish_and_clear();
-            return Err(err);
-        }
-    };
-
-    spinner.finish_and_clear();
-
-    if let Some(route) = response.into_inner().route {
-        println!("{} Updated route {}", "✓".green().bold(), route.name);
-        if auto_detected {
-            println!(
-                "  {} {}",
-                "Detected protocols:".dimmed(),
-                resolved_protocols.join(", ")
-            );
-        }
-    }
-    Ok(())
-}
+        .into_diagnostic()?;
 
-pub async fn inference_route_delete(
-    server: &str,
-    names: &[String],
-    tls: &TlsOptions,
-) -> Result<()> {
-    let mut client = grpc_inference_client(server, tls).await?;
-    for name in names {
-        let response = client
-            .delete_inference_route(DeleteInferenceRouteRequest { name: name.clone() })
-            .await
-            .into_diagnostic()?;
-        if response.into_inner().deleted {
-            println!("{} Deleted route {name}", "✓".green().bold());
-        } else {
-            println!("{} Route {name} not found", "!".yellow());
-        }
-    }
+    let configured = response.into_inner();
+    println!("{}", "Cluster inference updated:".cyan().bold());
+    println!();
+    println!("  {} {}", "Provider:".dimmed(), configured.provider_name);
+    println!("  {} {}", "Model:".dimmed(), configured.model_id);
+    println!("  {} {}", "Version:".dimmed(), configured.version);
     Ok(())
 }
 
-pub async fn inference_route_list(
-    server: &str,
-    limit: u32,
-    offset: u32,
-    tls: &TlsOptions,
-) -> Result<()> {
+pub async fn cluster_inference_get(server: &str, tls: &TlsOptions) -> Result<()> {
     let mut client = grpc_inference_client(server, tls).await?;
     let response = client
-        .list_inference_routes(ListInferenceRoutesRequest { limit, offset })
+        .get_cluster_inference(GetClusterInferenceRequest {})
         .await
         .into_diagnostic()?;
-    let routes = response.into_inner().routes;
-
-    if routes.is_empty() {
-        println!("No inference routes found");
-        return Ok(());
-    }
-
-    println!(
-        "{:<12}  {:<16}  {:<40}  {:<30}  {:<30}  {:<8}",
-        "NAME".bold(),
-        "HINT".bold(),
-        "BASE URL".bold(),
-        "PROTOCOLS".bold(),
-        "MODEL".bold(),
-        "ENABLED".bold()
-    );
-    for route in routes {
-        print_route_row(&route);
-    }
 
+    let configured = response.into_inner();
+    println!("{}", "Cluster inference:".cyan().bold());
+    println!();
+    println!("  {} {}", "Provider:".dimmed(), configured.provider_name);
+    println!("  {} {}", "Model:".dimmed(), configured.model_id);
+    println!("  {} {}", "Version:".dimmed(), configured.version);
     Ok(())
 }
 
-fn print_route_row(route: &InferenceRoute) {
-    let Some(spec) = route.spec.as_ref() else {
-        println!(
-            "{:<12}  {:<16}  {:<40}  {:<30}  {:<30}  {:<8}",
-            route.name, "<missing>", "", "", "", "false"
-        );
-        return;
-    };
-
-    let protocols = route_protocols(spec);
-    let protocol_display = if protocols.is_empty() {
-        "<none>".to_string()
-    } else {
-        protocols.join(",")
-    };
-
-    println!(
-        "{:<12}  {:<16}  {:<40}  {:<30}  {:<30}  {:<8}",
-        route.name, spec.routing_hint, spec.base_url, protocol_display, spec.model_id, spec.enabled
-    );
-}
-
-fn route_protocols(spec: &InferenceRouteSpec) -> Vec<String> {
-    navigator_core::inference::normalize_protocols(&spec.protocols)
-}
-
-fn inference_route_spinner(initial_message: &str) -> ProgressBar {
-    let spinner = ProgressBar::new_spinner();
-    spinner.set_style(
-        ProgressStyle::with_template("{spinner:.cyan} {msg}")
-            .unwrap_or_else(|_| ProgressStyle::default_spinner()),
-    );
-    spinner.enable_steady_tick(Duration::from_millis(120));
-    spinner.set_message(initial_message.to_string());
-    spinner
-}
-
-fn protocol_probe_url(base_url: &str, endpoint_path: &str) -> String {
-    let base = base_url.trim_end_matches('/');
-    if base.ends_with("/v1") && endpoint_path.starts_with("/v1/") {
-        format!("{base}{}", &endpoint_path[3..])
-    } else {
-        format!("{base}{endpoint_path}")
-    }
-}
-
-fn is_supported_probe_status(status: ReqwestStatusCode) -> bool {
-    !matches!(status.as_u16(), 404 | 501)
-}
-
-/// Auto-detect which inference protocols a route endpoint supports.
-///
-/// **Note:** This sends real HTTP POST requests (with `max_tokens: 1`) to the
-/// endpoint to probe for protocol support. This may consume a small amount of
-/// API credits on production endpoints.
-async fn detect_route_protocols(
-    base_url: &str,
-    api_key: &str,
-    model_id: &str,
-    spinner: Option<&ProgressBar>,
-) -> Result<Vec<String>> {
-    let client = reqwest::Client::builder()
-        .timeout(Duration::from_secs(8))
-        .build()
-        .into_diagnostic()
-        .wrap_err("failed to build protocol detection HTTP client")?;
-
-    let probes = [
-        (
-            "openai_chat_completions",
-            "/v1/chat/completions",
-            serde_json::json!({
-                "model": model_id,
-                "messages": [{"role": "user", "content": "ping"}],
-                "max_tokens": 1,
-            }),
-        ),
-        (
-            "openai_completions",
-            "/v1/completions",
-            serde_json::json!({
-                "model": model_id,
-                "prompt": "ping",
-                "max_tokens": 1,
-            }),
-        ),
-        (
-            "anthropic_messages",
-            "/v1/messages",
-            serde_json::json!({
-                "model": model_id,
-                "max_tokens": 1,
-                "messages": [{"role": "user", "content": "ping"}],
-            }),
-        ),
-    ];
-
-    let mut detected = Vec::new();
-    let mut transport_errors = Vec::new();
-
-    let probe_count = probes.len();
-    for (index, (protocol, path, body)) in probes.into_iter().enumerate() {
-        if let Some(spinner) = spinner {
-            spinner.set_message(format!(
-                "Detecting protocols ({}/{}): POST {}",
-                index + 1,
-                probe_count,
-                path
-            ));
-        }
-
-        let url = protocol_probe_url(base_url, path);
-
-        let mut request = client.post(url).header("content-type", "application/json");
-
-        if protocol == "anthropic_messages" {
-            request = request
-                .header("x-api-key", api_key)
-                .header("anthropic-version", "2023-06-01");
-        } else {
-            request = request.bearer_auth(api_key);
-        }
-
-        match request.json(&body).send().await {
-            Ok(response) => {
-                if is_supported_probe_status(response.status()) {
-                    detected.push(protocol.to_string());
-                }
-            }
-            Err(err) => {
-                transport_errors.push(format!("{protocol}: {err}"));
-            }
-        }
-    }
-
-    if detected.is_empty() {
-        if transport_errors.is_empty() {
-            return Err(miette::miette!(
-                "could not detect any supported protocols for {base_url}; pass --protocol manually"
-            ));
-        }
-
-        return Err(miette::miette!(
-            "could not detect any supported protocols for {base_url}; first probe error: {}; pass --protocol manually",
-            transport_errors[0]
-        ));
-    }
-
-    Ok(detected)
-}
-
-async fn resolve_route_protocols(
-    protocols: &[String],
-    base_url: &str,
-    api_key: &str,
-    model_id: &str,
-    spinner: Option<&ProgressBar>,
-) -> Result<(Vec<String>, bool)> {
-    let normalized = navigator_core::inference::normalize_protocols(protocols);
-    if !normalized.is_empty() {
-        if let Some(spinner) = spinner {
-            spinner.set_message("Using explicitly provided protocols...".to_string());
-        }
-        return Ok((normalized, false));
-    }
-
-    if let Some(spinner) = spinner {
-        spinner.set_message(format!("Detecting supported protocols from {base_url}..."));
-    }
-
-    let detected = detect_route_protocols(base_url, api_key, model_id, spinner).await?;
-    Ok((detected, true))
-}
-
-fn git_repo_root() -> Result<PathBuf> {
+pub fn git_repo_root() -> Result<PathBuf> {
     let output = Command::new("git")
         .args(["rev-parse", "--show-toplevel"])
         .output()
@@ -2613,7 +2376,7 @@ fn git_repo_root() -> Result<PathBuf> {
     Ok(PathBuf::from(root))
 }
 
-fn git_sync_files(repo_root: &Path) -> Result<Vec<String>> {
+pub fn git_sync_files(repo_root: &Path) -> Result<Vec<String>> {
     let output = Command::new("git")
         .args(["ls-files", "-co", "--exclude-standard", "-z"])
         .current_dir(repo_root)
@@ -3024,7 +2787,7 @@ fn print_log_line(log: &navigator_core::proto::SandboxLogLine) {
 
 #[cfg(test)]
 mod tests {
-    use super::{inferred_provider_type, parse_credential_pairs, resolve_route_protocols};
+    use super::{inferred_provider_type, parse_credential_pairs};
 
     struct EnvVarGuard {
         key: &'static str,
@@ -3105,44 +2868,6 @@ mod tests {
         ));
     }
 
-    #[tokio::test]
-    async fn resolve_route_protocols_skips_autodetect_when_protocols_are_provided() {
-        let (protocols, autodetected) = resolve_route_protocols(
-            &[
-                " OpenAI_Chat_Completions ".to_string(),
-                "openai_chat_completions".to_string(),
-                "anthropic_messages".to_string(),
-            ],
-            "not-a-valid-url",
-            "dummy-key",
-            "dummy-model",
-            None,
-        )
-        .await
-        .expect("manual protocols should bypass auto-detection");
-
-        assert!(!autodetected);
-        assert_eq!(
-            protocols,
-            vec![
-                "openai_chat_completions".to_string(),
-                "anthropic_messages".to_string(),
-            ]
-        );
-    }
-
-    #[tokio::test]
-    async fn resolve_route_protocols_errors_when_autodetect_fails() {
-        let err = resolve_route_protocols(&[], "not-a-valid-url", "dummy-key", "dummy-model", None)
-            .await
-            .expect_err("missing protocols should require auto-detection");
-
-        assert!(
-            err.to_string()
-                .contains("could not detect any supported protocols")
-        );
-    }
-
     #[test]
     fn inferred_provider_type_returns_type_for_known_command() {
         let result = inferred_provider_type(&["claude".to_string(), "--help".to_string()]);
diff --git a/crates/navigator-core/src/inference.rs b/crates/navigator-core/src/inference.rs
index a01e4b15..709260ac 100644
--- a/crates/navigator-core/src/inference.rs
+++ b/crates/navigator-core/src/inference.rs
@@ -3,6 +3,125 @@
 
 use std::collections::HashSet;
 
+// ---------------------------------------------------------------------------
+// Auth header abstraction
+// ---------------------------------------------------------------------------
+
+/// How to inject an API key on outgoing inference requests.
+///
+/// Defined in `navigator-core` so both `navigator-router` (which applies it)
+/// and `navigator-server` / `navigator-sandbox` (which resolve it from
+/// provider metadata) can share the same type.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum AuthHeader {
+    /// `Authorization: Bearer <key>`
+    Bearer,
+    /// Custom header name (e.g. `x-api-key` for Anthropic).
+    Custom(&'static str),
+}
+
+// ---------------------------------------------------------------------------
+// Inference provider profiles
+// ---------------------------------------------------------------------------
+
+/// Static metadata describing how to talk to a specific inference provider's API.
+///
+/// This is the single source of truth for provider-specific inference knowledge:
+/// default endpoint, supported protocols, credential key lookup order, auth
+/// header style, and default headers.
+///
+/// This is separate from [`navigator_providers::ProviderPlugin`] which handles
+/// credential *discovery* (scanning env vars). `InferenceProviderProfile` handles
+/// how to *use* discovered credentials to make inference API calls.
+pub struct InferenceProviderProfile {
+    pub provider_type: &'static str,
+    pub default_base_url: &'static str,
+    pub protocols: &'static [&'static str],
+    /// Credential map key names to search for the API key, in priority order.
+    pub credential_key_names: &'static [&'static str],
+    /// Config map key names to search for a base URL override, in priority order.
+    pub base_url_config_keys: &'static [&'static str],
+    /// Auth header style for outgoing requests.
+    pub auth: AuthHeader,
+    /// Default headers injected on every outgoing request.
+    pub default_headers: &'static [(&'static str, &'static str)],
+}
+
+const OPENAI_PROTOCOLS: &[&str] = &[
+    "openai_chat_completions",
+    "openai_completions",
+    "openai_responses",
+    "model_discovery",
+];
+
+const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"];
+
+static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "openai",
+    default_base_url: "https://api.openai.com/v1",
+    protocols: OPENAI_PROTOCOLS,
+    credential_key_names: &["OPENAI_API_KEY"],
+    base_url_config_keys: &["OPENAI_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+};
+
+static ANTHROPIC_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "anthropic",
+    default_base_url: "https://api.anthropic.com/v1",
+    protocols: ANTHROPIC_PROTOCOLS,
+    credential_key_names: &["ANTHROPIC_API_KEY"],
+    base_url_config_keys: &["ANTHROPIC_BASE_URL"],
+    auth: AuthHeader::Custom("x-api-key"),
+    default_headers: &[("anthropic-version", "2023-06-01")],
+};
+
+static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "nvidia",
+    default_base_url: "https://integrate.api.nvidia.com/v1",
+    protocols: OPENAI_PROTOCOLS,
+    credential_key_names: &["NVIDIA_API_KEY"],
+    base_url_config_keys: &["NVIDIA_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+};
+
+/// Look up the inference provider profile for a given provider type.
+///
+/// Returns `None` for provider types that don't support inference routing
+/// (e.g. `github`, `gitlab`, `outlook`).
+pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProfile> {
+    match provider_type.trim().to_ascii_lowercase().as_str() {
+        "openai" => Some(&OPENAI_PROFILE),
+        "anthropic" => Some(&ANTHROPIC_PROFILE),
+        "nvidia" => Some(&NVIDIA_PROFILE),
+        _ => None,
+    }
+}
+
+/// Derive the [`AuthHeader`] and default headers for a provider type string.
+///
+/// This is a convenience wrapper around [`profile_for`] for callers that only
+/// need the auth/header information (e.g. the sandbox bundle-to-route
+/// conversion).
+pub fn auth_for_provider_type(provider_type: &str) -> (AuthHeader, Vec<(String, String)>) {
+    match profile_for(provider_type) {
+        Some(profile) => {
+            let headers = profile
+                .default_headers
+                .iter()
+                .map(|(k, v)| ((*k).to_string(), (*v).to_string()))
+                .collect();
+            (profile.auth.clone(), headers)
+        }
+        None => (AuthHeader::Bearer, Vec::new()),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Protocol normalization
+// ---------------------------------------------------------------------------
+
 /// Normalize a list of protocol strings: trim, lowercase, deduplicate, skip empty.
 pub fn normalize_protocols(protocols: &[String]) -> Vec<String> {
     let mut normalized = Vec::new();
@@ -51,4 +170,40 @@ mod tests {
         let result = normalize_protocols(&[]);
         assert!(result.is_empty());
     }
+
+    #[test]
+    fn profile_for_known_types() {
+        assert!(profile_for("openai").is_some());
+        assert!(profile_for("anthropic").is_some());
+        assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("OpenAI").is_some()); // case insensitive
+    }
+
+    #[test]
+    fn profile_for_unknown_types() {
+        assert!(profile_for("github").is_none());
+        assert!(profile_for("gitlab").is_none());
+        assert!(profile_for("unknown").is_none());
+    }
+
+    #[test]
+    fn auth_for_anthropic_uses_custom_header() {
+        let (auth, headers) = auth_for_provider_type("anthropic");
+        assert_eq!(auth, AuthHeader::Custom("x-api-key"));
+        assert!(headers.iter().any(|(k, _)| k == "anthropic-version"));
+    }
+
+    #[test]
+    fn auth_for_openai_uses_bearer() {
+        let (auth, headers) = auth_for_provider_type("openai");
+        assert_eq!(auth, AuthHeader::Bearer);
+        assert!(headers.is_empty());
+    }
+
+    #[test]
+    fn auth_for_unknown_defaults_to_bearer() {
+        let (auth, headers) = auth_for_provider_type("unknown");
+        assert_eq!(auth, AuthHeader::Bearer);
+        assert!(headers.is_empty());
+    }
 }
diff --git a/crates/navigator-core/src/proto/navigator.inference.v1.rs b/crates/navigator-core/src/proto/navigator.inference.v1.rs
deleted file mode 100644
index 90a928f5..00000000
--- a/crates/navigator-core/src/proto/navigator.inference.v1.rs
+++ /dev/null
@@ -1,791 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-// This file is @generated by prost-build.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct InferenceRouteSpec {
-    #[prost(string, tag = "1")]
-    pub routing_hint: ::prost::alloc::string::String,
-    #[prost(string, tag = "2")]
-    pub base_url: ::prost::alloc::string::String,
-    #[prost(string, tag = "3")]
-    pub protocol: ::prost::alloc::string::String,
-    #[prost(string, tag = "4")]
-    pub api_key: ::prost::alloc::string::String,
-    #[prost(string, tag = "5")]
-    pub model_id: ::prost::alloc::string::String,
-    #[prost(bool, tag = "6")]
-    pub enabled: bool,
-}
-/// Inference route model stored by Navigator.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct InferenceRoute {
-    #[prost(string, tag = "1")]
-    pub id: ::prost::alloc::string::String,
-    #[prost(message, optional, tag = "2")]
-    pub spec: ::core::option::Option<InferenceRouteSpec>,
-    /// Human-friendly name, unique per object type. Auto-generated if not provided.
-    #[prost(string, tag = "3")]
-    pub name: ::prost::alloc::string::String,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct CreateInferenceRouteRequest {
-    #[prost(message, optional, tag = "1")]
-    pub route: ::core::option::Option<InferenceRouteSpec>,
-    /// Optional name. If empty, a random 6-char name is generated.
-    #[prost(string, tag = "2")]
-    pub name: ::prost::alloc::string::String,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct UpdateInferenceRouteRequest {
-    /// Route name (canonical lookup key).
-    #[prost(string, tag = "1")]
-    pub name: ::prost::alloc::string::String,
-    #[prost(message, optional, tag = "2")]
-    pub route: ::core::option::Option<InferenceRouteSpec>,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct DeleteInferenceRouteRequest {
-    /// Route name (canonical lookup key).
-    #[prost(string, tag = "1")]
-    pub name: ::prost::alloc::string::String,
-}
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
-pub struct DeleteInferenceRouteResponse {
-    #[prost(bool, tag = "1")]
-    pub deleted: bool,
-}
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
-pub struct ListInferenceRoutesRequest {
-    #[prost(uint32, tag = "1")]
-    pub limit: u32,
-    #[prost(uint32, tag = "2")]
-    pub offset: u32,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ListInferenceRoutesResponse {
-    #[prost(message, repeated, tag = "1")]
-    pub routes: ::prost::alloc::vec::Vec<InferenceRoute>,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct InferenceRouteResponse {
-    #[prost(message, optional, tag = "1")]
-    pub route: ::core::option::Option<InferenceRoute>,
-}
-/// Completion request.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct CompletionRequest {
-    /// Routing hint tells the router what kind of inference to use (e.g. "frontier", "local").
-    /// The router resolves this to a specific model and backend.
-    #[prost(string, tag = "1")]
-    pub routing_hint: ::prost::alloc::string::String,
-    /// Chat messages for the completion.
-    #[prost(message, repeated, tag = "2")]
-    pub messages: ::prost::alloc::vec::Vec<ChatMessage>,
-    /// Sampling temperature.
-    #[prost(float, optional, tag = "3")]
-    pub temperature: ::core::option::Option<f32>,
-    /// Maximum number of tokens to generate.
-    #[prost(int32, optional, tag = "4")]
-    pub max_tokens: ::core::option::Option<i32>,
-    /// Nucleus sampling parameter.
-    #[prost(float, optional, tag = "5")]
-    pub top_p: ::core::option::Option<f32>,
-}
-/// A single chat message.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ChatMessage {
-    /// Message role (e.g. "system", "user", "assistant").
-    #[prost(string, tag = "1")]
-    pub role: ::prost::alloc::string::String,
-    /// Message content.
-    #[prost(string, tag = "2")]
-    pub content: ::prost::alloc::string::String,
-    /// Optional model reasoning content when provided by upstream.
-    #[prost(string, optional, tag = "3")]
-    pub reasoning_content: ::core::option::Option<::prost::alloc::string::String>,
-}
-/// Completion response.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct CompletionResponse {
-    /// Unique completion ID.
-    #[prost(string, tag = "1")]
-    pub id: ::prost::alloc::string::String,
-    /// The actual model that served the request (resolved by router).
-    #[prost(string, tag = "2")]
-    pub model: ::prost::alloc::string::String,
-    /// Creation timestamp (unix seconds).
-    #[prost(int64, tag = "3")]
-    pub created: i64,
-    /// Completion choices.
-    #[prost(message, repeated, tag = "4")]
-    pub choices: ::prost::alloc::vec::Vec<CompletionChoice>,
-    /// Token usage statistics.
-    #[prost(message, optional, tag = "5")]
-    pub usage: ::core::option::Option<CompletionUsage>,
-}
-/// A single completion choice.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct CompletionChoice {
-    /// Choice index.
-    #[prost(int32, tag = "1")]
-    pub index: i32,
-    /// The generated message.
-    #[prost(message, optional, tag = "2")]
-    pub message: ::core::option::Option<ChatMessage>,
-    /// Reason the generation stopped (e.g. "stop", "length").
-    #[prost(string, tag = "3")]
-    pub finish_reason: ::prost::alloc::string::String,
-}
-/// Token usage statistics.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
-pub struct CompletionUsage {
-    /// Number of tokens in the prompt.
-    #[prost(int32, tag = "1")]
-    pub prompt_tokens: i32,
-    /// Number of tokens in the completion.
-    #[prost(int32, tag = "2")]
-    pub completion_tokens: i32,
-    /// Total tokens used.
-    #[prost(int32, tag = "3")]
-    pub total_tokens: i32,
-}
-/// Generated client implementations.
-pub mod inference_client {
-    #![allow(
-        unused_variables,
-        dead_code,
-        missing_docs,
-        clippy::wildcard_imports,
-        clippy::let_unit_value,
-    )]
-    use tonic::codegen::*;
-    use tonic::codegen::http::Uri;
-    /// Inference service provides LLM completion capabilities.
-    #[derive(Debug, Clone)]
-    pub struct InferenceClient<T> {
-        inner: tonic::client::Grpc<T>,
-    }
-    impl InferenceClient<tonic::transport::Channel> {
-        /// Attempt to create a new client by connecting to a given endpoint.
-        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
-        where
-            D: TryInto<tonic::transport::Endpoint>,
-            D::Error: Into<StdError>,
-        {
-            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
-            Ok(Self::new(conn))
-        }
-    }
-    impl<T> InferenceClient<T>
-    where
-        T: tonic::client::GrpcService<tonic::body::BoxBody>,
-        T::Error: Into<StdError>,
-        T::ResponseBody: Body<Data = Bytes> + std::marker::Send + 'static,
-        <T::ResponseBody as Body>::Error: Into<StdError> + std::marker::Send,
-    {
-        pub fn new(inner: T) -> Self {
-            let inner = tonic::client::Grpc::new(inner);
-            Self { inner }
-        }
-        pub fn with_origin(inner: T, origin: Uri) -> Self {
-            let inner = tonic::client::Grpc::with_origin(inner, origin);
-            Self { inner }
-        }
-        pub fn with_interceptor<F>(
-            inner: T,
-            interceptor: F,
-        ) -> InferenceClient<InterceptedService<T, F>>
-        where
-            F: tonic::service::Interceptor,
-            T::ResponseBody: Default,
-            T: tonic::codegen::Service<
-                http::Request<tonic::body::BoxBody>,
-                Response = http::Response<
-                    <T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
-                >,
-            >,
-            <T as tonic::codegen::Service<
-                http::Request<tonic::body::BoxBody>,
-            >>::Error: Into<StdError> + std::marker::Send + std::marker::Sync,
-        {
-            InferenceClient::new(InterceptedService::new(inner, interceptor))
-        }
-        /// Compress requests with the given encoding.
-        ///
-        /// This requires the server to support it otherwise it might respond with an
-        /// error.
-        #[must_use]
-        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
-            self.inner = self.inner.send_compressed(encoding);
-            self
-        }
-        /// Enable decompressing responses.
-        #[must_use]
-        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
-            self.inner = self.inner.accept_compressed(encoding);
-            self
-        }
-        /// Limits the maximum size of a decoded message.
-        ///
-        /// Default: `4MB`
-        #[must_use]
-        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
-            self.inner = self.inner.max_decoding_message_size(limit);
-            self
-        }
-        /// Limits the maximum size of an encoded message.
-        ///
-        /// Default: `usize::MAX`
-        #[must_use]
-        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
-            self.inner = self.inner.max_encoding_message_size(limit);
-            self
-        }
-        /// Send a completion request routed by the server's inference router.
-        pub async fn completion(
-            &mut self,
-            request: impl tonic::IntoRequest<super::CompletionRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::CompletionResponse>,
-            tonic::Status,
-        > {
-            self.inner
-                .ready()
-                .await
-                .map_err(|e| {
-                    tonic::Status::unknown(
-                        format!("Service was not ready: {}", e.into()),
-                    )
-                })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/navigator.inference.v1.Inference/Completion",
-            );
-            let mut req = request.into_request();
-            req.extensions_mut()
-                .insert(
-                    GrpcMethod::new("navigator.inference.v1.Inference", "Completion"),
-                );
-            self.inner.unary(req, path, codec).await
-        }
-        /// Create a new inference route.
-        pub async fn create_inference_route(
-            &mut self,
-            request: impl tonic::IntoRequest<super::CreateInferenceRouteRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::InferenceRouteResponse>,
-            tonic::Status,
-        > {
-            self.inner
-                .ready()
-                .await
-                .map_err(|e| {
-                    tonic::Status::unknown(
-                        format!("Service was not ready: {}", e.into()),
-                    )
-                })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/navigator.inference.v1.Inference/CreateInferenceRoute",
-            );
-            let mut req = request.into_request();
-            req.extensions_mut()
-                .insert(
-                    GrpcMethod::new(
-                        "navigator.inference.v1.Inference",
-                        "CreateInferenceRoute",
-                    ),
-                );
-            self.inner.unary(req, path, codec).await
-        }
-        /// Update an existing inference route.
-        pub async fn update_inference_route(
-            &mut self,
-            request: impl tonic::IntoRequest<super::UpdateInferenceRouteRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::InferenceRouteResponse>,
-            tonic::Status,
-        > {
-            self.inner
-                .ready()
-                .await
-                .map_err(|e| {
-                    tonic::Status::unknown(
-                        format!("Service was not ready: {}", e.into()),
-                    )
-                })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/navigator.inference.v1.Inference/UpdateInferenceRoute",
-            );
-            let mut req = request.into_request();
-            req.extensions_mut()
-                .insert(
-                    GrpcMethod::new(
-                        "navigator.inference.v1.Inference",
-                        "UpdateInferenceRoute",
-                    ),
-                );
-            self.inner.unary(req, path, codec).await
-        }
-        /// Delete an inference route.
-        pub async fn delete_inference_route(
-            &mut self,
-            request: impl tonic::IntoRequest<super::DeleteInferenceRouteRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::DeleteInferenceRouteResponse>,
-            tonic::Status,
-        > {
-            self.inner
-                .ready()
-                .await
-                .map_err(|e| {
-                    tonic::Status::unknown(
-                        format!("Service was not ready: {}", e.into()),
-                    )
-                })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/navigator.inference.v1.Inference/DeleteInferenceRoute",
-            );
-            let mut req = request.into_request();
-            req.extensions_mut()
-                .insert(
-                    GrpcMethod::new(
-                        "navigator.inference.v1.Inference",
-                        "DeleteInferenceRoute",
-                    ),
-                );
-            self.inner.unary(req, path, codec).await
-        }
-        /// List inference routes.
-        pub async fn list_inference_routes(
-            &mut self,
-            request: impl tonic::IntoRequest<super::ListInferenceRoutesRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::ListInferenceRoutesResponse>,
-            tonic::Status,
-        > {
-            self.inner
-                .ready()
-                .await
-                .map_err(|e| {
-                    tonic::Status::unknown(
-                        format!("Service was not ready: {}", e.into()),
-                    )
-                })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/navigator.inference.v1.Inference/ListInferenceRoutes",
-            );
-            let mut req = request.into_request();
-            req.extensions_mut()
-                .insert(
-                    GrpcMethod::new(
-                        "navigator.inference.v1.Inference",
-                        "ListInferenceRoutes",
-                    ),
-                );
-            self.inner.unary(req, path, codec).await
-        }
-    }
-}
-/// Generated server implementations.
-pub mod inference_server {
-    #![allow(
-        unused_variables,
-        dead_code,
-        missing_docs,
-        clippy::wildcard_imports,
-        clippy::let_unit_value,
-    )]
-    use tonic::codegen::*;
-    /// Generated trait containing gRPC methods that should be implemented for use with InferenceServer.
-    #[async_trait]
-    pub trait Inference: std::marker::Send + std::marker::Sync + 'static {
-        /// Send a completion request routed by the server's inference router.
-        async fn completion(
-            &self,
-            request: tonic::Request<super::CompletionRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::CompletionResponse>,
-            tonic::Status,
-        >;
-        /// Create a new inference route.
-        async fn create_inference_route(
-            &self,
-            request: tonic::Request<super::CreateInferenceRouteRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::InferenceRouteResponse>,
-            tonic::Status,
-        >;
-        /// Update an existing inference route.
-        async fn update_inference_route(
-            &self,
-            request: tonic::Request<super::UpdateInferenceRouteRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::InferenceRouteResponse>,
-            tonic::Status,
-        >;
-        /// Delete an inference route.
-        async fn delete_inference_route(
-            &self,
-            request: tonic::Request<super::DeleteInferenceRouteRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::DeleteInferenceRouteResponse>,
-            tonic::Status,
-        >;
-        /// List inference routes.
-        async fn list_inference_routes(
-            &self,
-            request: tonic::Request<super::ListInferenceRoutesRequest>,
-        ) -> std::result::Result<
-            tonic::Response<super::ListInferenceRoutesResponse>,
-            tonic::Status,
-        >;
-    }
-    /// Inference service provides LLM completion capabilities.
-    #[derive(Debug)]
-    pub struct InferenceServer<T> {
-        inner: Arc<T>,
-        accept_compression_encodings: EnabledCompressionEncodings,
-        send_compression_encodings: EnabledCompressionEncodings,
-        max_decoding_message_size: Option<usize>,
-        max_encoding_message_size: Option<usize>,
-    }
-    impl<T> InferenceServer<T> {
-        pub fn new(inner: T) -> Self {
-            Self::from_arc(Arc::new(inner))
-        }
-        pub fn from_arc(inner: Arc<T>) -> Self {
-            Self {
-                inner,
-                accept_compression_encodings: Default::default(),
-                send_compression_encodings: Default::default(),
-                max_decoding_message_size: None,
-                max_encoding_message_size: None,
-            }
-        }
-        pub fn with_interceptor<F>(
-            inner: T,
-            interceptor: F,
-        ) -> InterceptedService<Self, F>
-        where
-            F: tonic::service::Interceptor,
-        {
-            InterceptedService::new(Self::new(inner), interceptor)
-        }
-        /// Enable decompressing requests with the given encoding.
-        #[must_use]
-        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
-            self.accept_compression_encodings.enable(encoding);
-            self
-        }
-        /// Compress responses with the given encoding, if the client supports it.
-        #[must_use]
-        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
-            self.send_compression_encodings.enable(encoding);
-            self
-        }
-        /// Limits the maximum size of a decoded message.
-        ///
-        /// Default: `4MB`
-        #[must_use]
-        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
-            self.max_decoding_message_size = Some(limit);
-            self
-        }
-        /// Limits the maximum size of an encoded message.
-        ///
-        /// Default: `usize::MAX`
-        #[must_use]
-        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
-            self.max_encoding_message_size = Some(limit);
-            self
-        }
-    }
-    impl<T, B> tonic::codegen::Service<http::Request<B>> for InferenceServer<T>
-    where
-        T: Inference,
-        B: Body + std::marker::Send + 'static,
-        B::Error: Into<StdError> + std::marker::Send + 'static,
-    {
-        type Response = http::Response<tonic::body::BoxBody>;
-        type Error = std::convert::Infallible;
-        type Future = BoxFuture<Self::Response, Self::Error>;
-        fn poll_ready(
-            &mut self,
-            _cx: &mut Context<'_>,
-        ) -> Poll<std::result::Result<(), Self::Error>> {
-            Poll::Ready(Ok(()))
-        }
-        fn call(&mut self, req: http::Request<B>) -> Self::Future {
-            match req.uri().path() {
-                "/navigator.inference.v1.Inference/Completion" => {
-                    #[allow(non_camel_case_types)]
-                    struct CompletionSvc<T: Inference>(pub Arc<T>);
-                    impl<
-                        T: Inference,
-                    > tonic::server::UnaryService<super::CompletionRequest>
-                    for CompletionSvc<T> {
-                        type Response = super::CompletionResponse;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::Response>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::CompletionRequest>,
-                        ) -> Self::Future {
-                            let inner = Arc::clone(&self.0);
-                            let fut = async move {
-                                <T as Inference>::completion(&inner, request).await
-                            };
-                            Box::pin(fut)
-                        }
-                    }
-                    let accept_compression_encodings = self.accept_compression_encodings;
-                    let send_compression_encodings = self.send_compression_encodings;
-                    let max_decoding_message_size = self.max_decoding_message_size;
-                    let max_encoding_message_size = self.max_encoding_message_size;
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let method = CompletionSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = tonic::server::Grpc::new(codec)
-                            .apply_compression_config(
-                                accept_compression_encodings,
-                                send_compression_encodings,
-                            )
-                            .apply_max_message_size_config(
-                                max_decoding_message_size,
-                                max_encoding_message_size,
-                            );
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/navigator.inference.v1.Inference/CreateInferenceRoute" => {
-                    #[allow(non_camel_case_types)]
-                    struct CreateInferenceRouteSvc<T: Inference>(pub Arc<T>);
-                    impl<
-                        T: Inference,
-                    > tonic::server::UnaryService<super::CreateInferenceRouteRequest>
-                    for CreateInferenceRouteSvc<T> {
-                        type Response = super::InferenceRouteResponse;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::Response>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::CreateInferenceRouteRequest>,
-                        ) -> Self::Future {
-                            let inner = Arc::clone(&self.0);
-                            let fut = async move {
-                                <T as Inference>::create_inference_route(&inner, request)
-                                    .await
-                            };
-                            Box::pin(fut)
-                        }
-                    }
-                    let accept_compression_encodings = self.accept_compression_encodings;
-                    let send_compression_encodings = self.send_compression_encodings;
-                    let max_decoding_message_size = self.max_decoding_message_size;
-                    let max_encoding_message_size = self.max_encoding_message_size;
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let method = CreateInferenceRouteSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = tonic::server::Grpc::new(codec)
-                            .apply_compression_config(
-                                accept_compression_encodings,
-                                send_compression_encodings,
-                            )
-                            .apply_max_message_size_config(
-                                max_decoding_message_size,
-                                max_encoding_message_size,
-                            );
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/navigator.inference.v1.Inference/UpdateInferenceRoute" => {
-                    #[allow(non_camel_case_types)]
-                    struct UpdateInferenceRouteSvc<T: Inference>(pub Arc<T>);
-                    impl<
-                        T: Inference,
-                    > tonic::server::UnaryService<super::UpdateInferenceRouteRequest>
-                    for UpdateInferenceRouteSvc<T> {
-                        type Response = super::InferenceRouteResponse;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::Response>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::UpdateInferenceRouteRequest>,
-                        ) -> Self::Future {
-                            let inner = Arc::clone(&self.0);
-                            let fut = async move {
-                                <T as Inference>::update_inference_route(&inner, request)
-                                    .await
-                            };
-                            Box::pin(fut)
-                        }
-                    }
-                    let accept_compression_encodings = self.accept_compression_encodings;
-                    let send_compression_encodings = self.send_compression_encodings;
-                    let max_decoding_message_size = self.max_decoding_message_size;
-                    let max_encoding_message_size = self.max_encoding_message_size;
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let method = UpdateInferenceRouteSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = tonic::server::Grpc::new(codec)
-                            .apply_compression_config(
-                                accept_compression_encodings,
-                                send_compression_encodings,
-                            )
-                            .apply_max_message_size_config(
-                                max_decoding_message_size,
-                                max_encoding_message_size,
-                            );
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/navigator.inference.v1.Inference/DeleteInferenceRoute" => {
-                    #[allow(non_camel_case_types)]
-                    struct DeleteInferenceRouteSvc<T: Inference>(pub Arc<T>);
-                    impl<
-                        T: Inference,
-                    > tonic::server::UnaryService<super::DeleteInferenceRouteRequest>
-                    for DeleteInferenceRouteSvc<T> {
-                        type Response = super::DeleteInferenceRouteResponse;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::Response>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::DeleteInferenceRouteRequest>,
-                        ) -> Self::Future {
-                            let inner = Arc::clone(&self.0);
-                            let fut = async move {
-                                <T as Inference>::delete_inference_route(&inner, request)
-                                    .await
-                            };
-                            Box::pin(fut)
-                        }
-                    }
-                    let accept_compression_encodings = self.accept_compression_encodings;
-                    let send_compression_encodings = self.send_compression_encodings;
-                    let max_decoding_message_size = self.max_decoding_message_size;
-                    let max_encoding_message_size = self.max_encoding_message_size;
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let method = DeleteInferenceRouteSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = tonic::server::Grpc::new(codec)
-                            .apply_compression_config(
-                                accept_compression_encodings,
-                                send_compression_encodings,
-                            )
-                            .apply_max_message_size_config(
-                                max_decoding_message_size,
-                                max_encoding_message_size,
-                            );
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/navigator.inference.v1.Inference/ListInferenceRoutes" => {
-                    #[allow(non_camel_case_types)]
-                    struct ListInferenceRoutesSvc<T: Inference>(pub Arc<T>);
-                    impl<
-                        T: Inference,
-                    > tonic::server::UnaryService<super::ListInferenceRoutesRequest>
-                    for ListInferenceRoutesSvc<T> {
-                        type Response = super::ListInferenceRoutesResponse;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::Response>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::ListInferenceRoutesRequest>,
-                        ) -> Self::Future {
-                            let inner = Arc::clone(&self.0);
-                            let fut = async move {
-                                <T as Inference>::list_inference_routes(&inner, request)
-                                    .await
-                            };
-                            Box::pin(fut)
-                        }
-                    }
-                    let accept_compression_encodings = self.accept_compression_encodings;
-                    let send_compression_encodings = self.send_compression_encodings;
-                    let max_decoding_message_size = self.max_decoding_message_size;
-                    let max_encoding_message_size = self.max_encoding_message_size;
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let method = ListInferenceRoutesSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = tonic::server::Grpc::new(codec)
-                            .apply_compression_config(
-                                accept_compression_encodings,
-                                send_compression_encodings,
-                            )
-                            .apply_max_message_size_config(
-                                max_decoding_message_size,
-                                max_encoding_message_size,
-                            );
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                _ => {
-                    Box::pin(async move {
-                        let mut response = http::Response::new(empty_body());
-                        let headers = response.headers_mut();
-                        headers
-                            .insert(
-                                tonic::Status::GRPC_STATUS,
-                                (tonic::Code::Unimplemented as i32).into(),
-                            );
-                        headers
-                            .insert(
-                                http::header::CONTENT_TYPE,
-                                tonic::metadata::GRPC_CONTENT_TYPE,
-                            );
-                        Ok(response)
-                    })
-                }
-            }
-        }
-    }
-    impl<T> Clone for InferenceServer<T> {
-        fn clone(&self) -> Self {
-            let inner = self.inner.clone();
-            Self {
-                inner,
-                accept_compression_encodings: self.accept_compression_encodings,
-                send_compression_encodings: self.send_compression_encodings,
-                max_decoding_message_size: self.max_decoding_message_size,
-                max_encoding_message_size: self.max_encoding_message_size,
-            }
-        }
-    }
-    /// Generated gRPC service name
-    pub const SERVICE_NAME: &str = "navigator.inference.v1.Inference";
-    impl<T> tonic::server::NamedService for InferenceServer<T> {
-        const NAME: &'static str = SERVICE_NAME;
-    }
-}
diff --git a/crates/navigator-policy/src/lib.rs b/crates/navigator-policy/src/lib.rs
index ba0ab2f3..24bcc7b1 100644
--- a/crates/navigator-policy/src/lib.rs
+++ b/crates/navigator-policy/src/lib.rs
@@ -10,11 +10,13 @@
 //! these types, ensuring round-trip fidelity.
 
 use std::collections::{BTreeMap, HashMap};
+use std::fmt;
+use std::path::Path;
 
 use miette::{IntoDiagnostic, Result, WrapErr};
 use navigator_core::proto::{
-    self, FilesystemPolicy, InferenceApiPattern, L7Allow, L7Rule, LandlockPolicy, NetworkBinary,
-    NetworkEndpoint, NetworkPolicyRule, ProcessPolicy, SandboxPolicy,
+    FilesystemPolicy, L7Allow, L7Rule, LandlockPolicy, NetworkBinary, NetworkEndpoint,
+    NetworkPolicyRule, ProcessPolicy, SandboxPolicy,
 };
 use serde::{Deserialize, Serialize};
 
@@ -27,8 +29,6 @@ use serde::{Deserialize, Serialize};
 struct PolicyFile {
     version: u32,
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    inference: Option<InferenceDef>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
     filesystem_policy: Option<FilesystemDef>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
     landlock: Option<LandlockDef>,
@@ -65,28 +65,6 @@ struct ProcessDef {
     run_as_group: String,
 }
 
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(deny_unknown_fields)]
-struct InferenceDef {
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    allowed_routes: Vec<String>,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    api_patterns: Vec<InferenceApiPatternDef>,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(deny_unknown_fields)]
-struct InferenceApiPatternDef {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    method: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    path_glob: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    protocol: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    kind: String,
-}
-
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 struct NetworkPolicyRuleDef {
@@ -212,19 +190,6 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy {
             run_as_group: p.run_as_group,
         }),
         network_policies,
-        inference: raw.inference.map(|inf| proto::InferencePolicy {
-            allowed_routes: inf.allowed_routes,
-            api_patterns: inf
-                .api_patterns
-                .into_iter()
-                .map(|p| InferenceApiPattern {
-                    method: p.method,
-                    path_glob: p.path_glob,
-                    protocol: p.protocol,
-                    kind: p.kind,
-                })
-                .collect(),
-        }),
     }
 }
 
@@ -233,20 +198,6 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy {
 // ---------------------------------------------------------------------------
 
 fn from_proto(policy: &SandboxPolicy) -> PolicyFile {
-    let inference = policy.inference.as_ref().map(|inf| InferenceDef {
-        allowed_routes: inf.allowed_routes.clone(),
-        api_patterns: inf
-            .api_patterns
-            .iter()
-            .map(|p| InferenceApiPatternDef {
-                method: p.method.clone(),
-                path_glob: p.path_glob.clone(),
-                protocol: p.protocol.clone(),
-                kind: p.kind.clone(),
-            })
-            .collect(),
-    });
-
     let filesystem_policy = policy.filesystem.as_ref().map(|fs| FilesystemDef {
         include_workdir: fs.include_workdir,
         read_only: fs.read_only.clone(),
@@ -316,7 +267,6 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile {
 
     PolicyFile {
         version: policy.version,
-        inference,
         filesystem_policy,
         landlock,
         process,
@@ -359,12 +309,12 @@ pub fn serialize_sandbox_policy(policy: &SandboxPolicy) -> Result<String> {
 /// default.
 pub fn load_sandbox_policy(cli_path: Option<&str>) -> Result<Option<SandboxPolicy>> {
     let contents = if let Some(p) = cli_path {
-        let path = std::path::Path::new(p);
+        let path = Path::new(p);
         std::fs::read_to_string(path)
             .into_diagnostic()
             .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))?
     } else if let Ok(policy_path) = std::env::var("NEMOCLAW_SANDBOX_POLICY") {
-        let path = std::path::Path::new(&policy_path);
+        let path = Path::new(&policy_path);
         std::fs::read_to_string(path)
             .into_diagnostic()
             .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))?
@@ -410,7 +360,6 @@ pub fn restrictive_default_policy() -> SandboxPolicy {
             run_as_group: "sandbox".into(),
         }),
         network_policies: HashMap::new(),
-        inference: None,
     }
 }
 
@@ -425,6 +374,195 @@ pub fn clear_process_identity(policy: &mut SandboxPolicy) {
     }
 }
 
+// ---------------------------------------------------------------------------
+// Policy safety validation
+// ---------------------------------------------------------------------------
+
+/// Maximum number of filesystem paths (`read_only` + `read_write` combined).
+const MAX_FILESYSTEM_PATHS: usize = 256;
+
+/// Maximum length of any single filesystem path string.
+const MAX_PATH_LENGTH: usize = 4096;
+
+/// A safety violation found in a sandbox policy.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum PolicyViolation {
+    /// `run_as_user` or `run_as_group` is "root" or "0".
+    RootProcessIdentity { field: &'static str, value: String },
+    /// A filesystem path contains `..` components.
+    PathTraversal { path: String },
+    /// A filesystem path is not absolute (does not start with `/`).
+    RelativePath { path: String },
+    /// A read-write filesystem path is overly broad (e.g. `/`).
+    OverlyBroadPath { path: String },
+    /// A filesystem path exceeds the maximum allowed length.
+    FieldTooLong { path: String, length: usize },
+    /// Too many filesystem paths in the policy.
+    TooManyPaths { count: usize },
+}
+
+impl fmt::Display for PolicyViolation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::RootProcessIdentity { field, value } => {
+                write!(f, "{field} cannot be '{value}' (root is not allowed)")
+            }
+            Self::PathTraversal { path } => {
+                write!(f, "path contains '..' traversal component: {path}")
+            }
+            Self::RelativePath { path } => {
+                write!(f, "path must be absolute (start with '/'): {path}")
+            }
+            Self::OverlyBroadPath { path } => {
+                write!(f, "read-write path is overly broad: {path}")
+            }
+            Self::FieldTooLong { path, length } => {
+                write!(
+                    f,
+                    "path exceeds maximum length ({length} > {MAX_PATH_LENGTH}): {path}"
+                )
+            }
+            Self::TooManyPaths { count } => {
+                write!(
+                    f,
+                    "too many filesystem paths ({count} > {MAX_FILESYSTEM_PATHS})"
+                )
+            }
+        }
+    }
+}
+
+/// Validate that a sandbox policy does not contain unsafe content.
+///
+/// Returns `Ok(())` if the policy is safe, or `Err(violations)` listing all
+/// safety violations found. Callers decide how to handle violations (hard
+/// error vs. logged warning).
+///
+/// Checks performed:
+/// - `run_as_user` / `run_as_group` must not be "root" or "0"
+/// - Filesystem paths must be absolute (start with `/`)
+/// - Filesystem paths must not contain `..` components
+/// - Read-write paths must not be overly broad (just `/`)
+/// - Individual path lengths must not exceed [`MAX_PATH_LENGTH`]
+/// - Total path count must not exceed [`MAX_FILESYSTEM_PATHS`]
+pub fn validate_sandbox_policy(
+    policy: &SandboxPolicy,
+) -> std::result::Result<(), Vec<PolicyViolation>> {
+    let mut violations = Vec::new();
+
+    // Check process identity
+    if let Some(ref process) = policy.process {
+        if is_root_identity(&process.run_as_user) {
+            violations.push(PolicyViolation::RootProcessIdentity {
+                field: "run_as_user",
+                value: process.run_as_user.clone(),
+            });
+        }
+        if is_root_identity(&process.run_as_group) {
+            violations.push(PolicyViolation::RootProcessIdentity {
+                field: "run_as_group",
+                value: process.run_as_group.clone(),
+            });
+        }
+    }
+
+    // Check filesystem paths
+    if let Some(ref fs) = policy.filesystem {
+        let total_paths = fs.read_only.len() + fs.read_write.len();
+        if total_paths > MAX_FILESYSTEM_PATHS {
+            violations.push(PolicyViolation::TooManyPaths { count: total_paths });
+        }
+
+        for path_str in fs.read_only.iter().chain(fs.read_write.iter()) {
+            if path_str.len() > MAX_PATH_LENGTH {
+                violations.push(PolicyViolation::FieldTooLong {
+                    path: truncate_for_display(path_str),
+                    length: path_str.len(),
+                });
+                continue;
+            }
+
+            let path = Path::new(path_str);
+
+            if !path.has_root() {
+                violations.push(PolicyViolation::RelativePath {
+                    path: path_str.clone(),
+                });
+            }
+
+            if path
+                .components()
+                .any(|c| matches!(c, std::path::Component::ParentDir))
+            {
+                violations.push(PolicyViolation::PathTraversal {
+                    path: path_str.clone(),
+                });
+            }
+        }
+
+        // Only reject "/" as read-write (overly broad)
+        for path_str in &fs.read_write {
+            let normalized = path_str.trim_end_matches('/');
+            if normalized.is_empty() {
+                // Path is "/" or "///" etc.
+                violations.push(PolicyViolation::OverlyBroadPath {
+                    path: path_str.clone(),
+                });
+            }
+        }
+    }
+
+    if violations.is_empty() {
+        Ok(())
+    } else {
+        Err(violations)
+    }
+}
+
+/// Check if a user/group identity string refers to root.
+fn is_root_identity(value: &str) -> bool {
+    if value.is_empty() {
+        return false;
+    }
+    let trimmed = value.trim();
+    trimmed == "root" || trimmed == "0"
+}
+
+/// Truncate a string for safe inclusion in error messages.
+fn truncate_for_display(s: &str) -> String {
+    if s.len() <= 80 {
+        s.to_string()
+    } else {
+        format!("{}...", &s[..77])
+    }
+}
+
+/// Normalize a filesystem path by collapsing redundant separators
+/// and removing trailing slashes, without requiring the path to exist on disk.
+///
+/// This is a lexical normalization only — it does NOT resolve symlinks or
+/// check the filesystem.
+pub fn normalize_path(path: &str) -> String {
+    use std::path::Component;
+
+    let p = Path::new(path);
+    let mut normalized = std::path::PathBuf::new();
+    for component in p.components() {
+        match component {
+            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
+            #[allow(clippy::path_buf_push_overwrite)]
+            Component::RootDir => normalized.push("/"),
+            Component::CurDir => {} // skip "."
+            Component::ParentDir => {
+                // Keep ".." — validation will catch it separately
+                normalized.push("..");
+            }
+            Component::Normal(c) => normalized.push(c),
+        }
+    }
+    normalized.to_string_lossy().to_string()
+}
+
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
@@ -498,35 +636,6 @@ network_policies:
         assert_eq!(proto2.network_policies["my_api"].name, "my-custom-api-name");
     }
 
-    /// Verify that `api_patterns` on inference survives the round-trip.
-    #[test]
-    fn round_trip_preserves_api_patterns() {
-        let yaml = r#"
-version: 1
-inference:
-  allowed_routes:
-    - local
-  api_patterns:
-    - method: POST
-      path_glob: "/v1/chat/completions"
-      protocol: openai_chat_completions
-      kind: chat_completion
-"#;
-        let proto1 = parse_sandbox_policy(yaml).expect("parse failed");
-        assert_eq!(proto1.inference.as_ref().unwrap().api_patterns.len(), 1);
-
-        let yaml_out = serialize_sandbox_policy(&proto1).expect("serialize failed");
-        let proto2 = parse_sandbox_policy(&yaml_out).expect("re-parse failed");
-
-        let patterns1 = &proto1.inference.as_ref().unwrap().api_patterns;
-        let patterns2 = &proto2.inference.as_ref().unwrap().api_patterns;
-        assert_eq!(patterns1.len(), patterns2.len());
-        assert_eq!(patterns1[0].method, patterns2[0].method);
-        assert_eq!(patterns1[0].path_glob, patterns2[0].path_glob);
-        assert_eq!(patterns1[0].protocol, patterns2[0].protocol);
-        assert_eq!(patterns1[0].kind, patterns2[0].kind);
-    }
-
     #[test]
     fn restrictive_default_has_no_network_policies() {
         let policy = restrictive_default_policy();
@@ -536,12 +645,6 @@ inference:
         );
     }
 
-    #[test]
-    fn restrictive_default_has_no_inference() {
-        let policy = restrictive_default_policy();
-        assert!(policy.inference.is_none());
-    }
-
     #[test]
     fn restrictive_default_has_filesystem_policy() {
         let policy = restrictive_default_policy();
@@ -589,7 +692,6 @@ inference:
         assert_eq!(policy.version, 1);
         assert!(policy.network_policies.is_empty());
         assert!(policy.filesystem.is_none());
-        assert!(policy.inference.is_none());
     }
 
     #[test]
@@ -635,4 +737,168 @@ network_policies:
     fn container_policy_path_is_expected() {
         assert_eq!(CONTAINER_POLICY_PATH, "/etc/navigator/policy.yaml");
     }
+
+    // ---- Policy validation tests ----
+
+    #[test]
+    fn validate_rejects_root_run_as_user() {
+        let mut policy = restrictive_default_policy();
+        policy.process = Some(ProcessPolicy {
+            run_as_user: "root".into(),
+            run_as_group: "sandbox".into(),
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert!(violations.iter().any(|v| matches!(
+            v,
+            PolicyViolation::RootProcessIdentity {
+                field: "run_as_user",
+                ..
+            }
+        )));
+    }
+
+    #[test]
+    fn validate_rejects_uid_zero() {
+        let mut policy = restrictive_default_policy();
+        policy.process = Some(ProcessPolicy {
+            run_as_user: "0".into(),
+            run_as_group: "0".into(),
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert_eq!(violations.len(), 2);
+    }
+
+    #[test]
+    fn validate_rejects_path_traversal() {
+        let mut policy = restrictive_default_policy();
+        policy.filesystem = Some(FilesystemPolicy {
+            include_workdir: true,
+            read_only: vec!["/usr/../etc/shadow".into()],
+            read_write: vec!["/tmp".into()],
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert!(
+            violations
+                .iter()
+                .any(|v| matches!(v, PolicyViolation::PathTraversal { .. }))
+        );
+    }
+
+    #[test]
+    fn validate_rejects_relative_paths() {
+        let mut policy = restrictive_default_policy();
+        policy.filesystem = Some(FilesystemPolicy {
+            include_workdir: true,
+            read_only: vec!["usr/lib".into()],
+            read_write: vec!["/tmp".into()],
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert!(
+            violations
+                .iter()
+                .any(|v| matches!(v, PolicyViolation::RelativePath { .. }))
+        );
+    }
+
+    #[test]
+    fn validate_rejects_overly_broad_read_write_path() {
+        let mut policy = restrictive_default_policy();
+        policy.filesystem = Some(FilesystemPolicy {
+            include_workdir: true,
+            read_only: vec!["/usr".into()],
+            read_write: vec!["/".into()],
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert!(
+            violations
+                .iter()
+                .any(|v| matches!(v, PolicyViolation::OverlyBroadPath { .. }))
+        );
+    }
+
+    #[test]
+    fn validate_accepts_valid_policy() {
+        let policy = restrictive_default_policy();
+        assert!(validate_sandbox_policy(&policy).is_ok());
+    }
+
+    #[test]
+    fn validate_accepts_empty_process() {
+        let policy = SandboxPolicy {
+            version: 1,
+            process: None,
+            filesystem: None,
+            landlock: None,
+            network_policies: HashMap::new(),
+        };
+        assert!(validate_sandbox_policy(&policy).is_ok());
+    }
+
+    #[test]
+    fn validate_accepts_empty_run_as_user() {
+        let mut policy = restrictive_default_policy();
+        policy.process = Some(ProcessPolicy {
+            run_as_user: String::new(),
+            run_as_group: String::new(),
+        });
+        assert!(validate_sandbox_policy(&policy).is_ok());
+    }
+
+    #[test]
+    fn validate_rejects_too_many_paths() {
+        let mut policy = restrictive_default_policy();
+        let many_paths: Vec<String> = (0..300).map(|i| format!("/path/{i}")).collect();
+        policy.filesystem = Some(FilesystemPolicy {
+            include_workdir: true,
+            read_only: many_paths,
+            read_write: vec!["/tmp".into()],
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert!(
+            violations
+                .iter()
+                .any(|v| matches!(v, PolicyViolation::TooManyPaths { .. }))
+        );
+    }
+
+    #[test]
+    fn validate_rejects_path_too_long() {
+        let mut policy = restrictive_default_policy();
+        let long_path = format!("/{}", "a".repeat(5000));
+        policy.filesystem = Some(FilesystemPolicy {
+            include_workdir: true,
+            read_only: vec![long_path],
+            read_write: vec!["/tmp".into()],
+        });
+        let violations = validate_sandbox_policy(&policy).unwrap_err();
+        assert!(
+            violations
+                .iter()
+                .any(|v| matches!(v, PolicyViolation::FieldTooLong { .. }))
+        );
+    }
+
+    #[test]
+    fn normalize_path_collapses_separators() {
+        assert_eq!(normalize_path("/usr//lib"), "/usr/lib");
+        assert_eq!(normalize_path("/usr/./lib"), "/usr/lib");
+        assert_eq!(normalize_path("/tmp/"), "/tmp");
+    }
+
+    #[test]
+    fn normalize_path_preserves_parent_dir() {
+        // normalize_path preserves ".." — validation catches it separately
+        assert_eq!(normalize_path("/usr/../etc"), "/usr/../etc");
+    }
+
+    #[test]
+    fn policy_violation_display() {
+        let v = PolicyViolation::RootProcessIdentity {
+            field: "run_as_user",
+            value: "root".into(),
+        };
+        let s = format!("{v}");
+        assert!(s.contains("root"));
+        assert!(s.contains("run_as_user"));
+    }
 }
diff --git a/crates/navigator-providers/src/lib.rs b/crates/navigator-providers/src/lib.rs
index 67d9aa5a..7a28f025 100644
--- a/crates/navigator-providers/src/lib.rs
+++ b/crates/navigator-providers/src/lib.rs
@@ -79,6 +79,8 @@ impl ProviderRegistry {
         registry.register(providers::codex::CodexProvider);
         registry.register(providers::opencode::OpencodeProvider);
         registry.register(providers::generic::GenericProvider);
+        registry.register(providers::openai::OpenaiProvider);
+        registry.register(providers::anthropic::AnthropicProvider);
         registry.register(providers::nvidia::NvidiaProvider);
         registry.register(providers::gitlab::GitlabProvider);
         registry.register(providers::github::GithubProvider);
@@ -128,6 +130,8 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
         "codex" => Some("codex"),
         "opencode" => Some("opencode"),
         "generic" => Some("generic"),
+        "openai" => Some("openai"),
+        "anthropic" => Some("anthropic"),
         "nvidia" => Some("nvidia"),
         "gitlab" | "glab" => Some("gitlab"),
         "github" | "gh" => Some("github"),
@@ -157,6 +161,8 @@ mod tests {
         assert_eq!(normalize_provider_type("gh"), Some("github"));
         assert_eq!(normalize_provider_type("CLAUDE"), Some("claude"));
         assert_eq!(normalize_provider_type("generic"), Some("generic"));
+        assert_eq!(normalize_provider_type("openai"), Some("openai"));
+        assert_eq!(normalize_provider_type("anthropic"), Some("anthropic"));
         assert_eq!(normalize_provider_type("nvidia"), Some("nvidia"));
         assert_eq!(normalize_provider_type("unknown"), None);
     }
diff --git a/crates/navigator-providers/src/providers/anthropic.rs b/crates/navigator-providers/src/providers/anthropic.rs
new file mode 100644
index 00000000..f4851dad
--- /dev/null
+++ b/crates/navigator-providers/src/providers/anthropic.rs
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{
+    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+};
+
+pub struct AnthropicProvider;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "anthropic",
+    credential_env_vars: &["ANTHROPIC_API_KEY"],
+};
+
+impl ProviderPlugin for AnthropicProvider {
+    fn id(&self) -> &'static str {
+        SPEC.id
+    }
+
+    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
+        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    }
+
+    fn credential_env_vars(&self) -> &'static [&'static str] {
+        SPEC.credential_env_vars
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SPEC;
+    use crate::discover_with_spec;
+    use crate::test_helpers::MockDiscoveryContext;
+
+    #[test]
+    fn discovers_anthropic_env_credentials() {
+        let ctx = MockDiscoveryContext::new().with_env("ANTHROPIC_API_KEY", "sk-ant-test");
+        let discovered = discover_with_spec(&SPEC, &ctx)
+            .expect("discovery")
+            .expect("provider");
+        assert_eq!(
+            discovered.credentials.get("ANTHROPIC_API_KEY"),
+            Some(&"sk-ant-test".to_string())
+        );
+    }
+}
diff --git a/crates/navigator-providers/src/providers/mod.rs b/crates/navigator-providers/src/providers/mod.rs
index 277e9342..8ab52ed9 100644
--- a/crates/navigator-providers/src/providers/mod.rs
+++ b/crates/navigator-providers/src/providers/mod.rs
@@ -1,11 +1,13 @@
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+pub mod anthropic;
 pub mod claude;
 pub mod codex;
 pub mod generic;
 pub mod github;
 pub mod gitlab;
 pub mod nvidia;
+pub mod openai;
 pub mod opencode;
 pub mod outlook;
diff --git a/crates/navigator-providers/src/providers/openai.rs b/crates/navigator-providers/src/providers/openai.rs
new file mode 100644
index 00000000..0dbe3941
--- /dev/null
+++ b/crates/navigator-providers/src/providers/openai.rs
@@ -0,0 +1,46 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{
+    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+};
+
+pub struct OpenaiProvider;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "openai",
+    credential_env_vars: &["OPENAI_API_KEY"],
+};
+
+impl ProviderPlugin for OpenaiProvider {
+    fn id(&self) -> &'static str {
+        SPEC.id
+    }
+
+    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
+        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    }
+
+    fn credential_env_vars(&self) -> &'static [&'static str] {
+        SPEC.credential_env_vars
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SPEC;
+    use crate::discover_with_spec;
+    use crate::test_helpers::MockDiscoveryContext;
+
+    #[test]
+    fn discovers_openai_env_credentials() {
+        let ctx = MockDiscoveryContext::new().with_env("OPENAI_API_KEY", "sk-openai-test");
+        let discovered = discover_with_spec(&SPEC, &ctx)
+            .expect("discovery")
+            .expect("provider");
+        assert_eq!(
+            discovered.credentials.get("OPENAI_API_KEY"),
+            Some(&"sk-openai-test".to_string())
+        );
+    }
+}
diff --git a/crates/navigator-router/README.md b/crates/navigator-router/README.md
index 75f7ff2f..6d57d675 100644
--- a/crates/navigator-router/README.md
+++ b/crates/navigator-router/README.md
@@ -24,8 +24,8 @@ Current split:
 
 - `navigator-server`:
   - authenticates request origin
-  - enforces sandbox policy (`allowed_routes`)
-  - loads enabled, policy-allowed route candidates from the entity store
+  - resolves cluster-managed inference route candidates from providers
+  - loads enabled route candidates from the entity store
 - `navigator-router`:
   - picks a route from candidates (`proxy_with_candidates`)
   - forwards the HTTP request upstream and returns the raw response
diff --git a/crates/navigator-router/src/backend.rs b/crates/navigator-router/src/backend.rs
index 5a8e57a0..23c4cd02 100644
--- a/crates/navigator-router/src/backend.rs
+++ b/crates/navigator-router/src/backend.rs
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use crate::RouterError;
-use crate::config::ResolvedRoute;
+use crate::config::{AuthHeader, ResolvedRoute};
 
 /// Response from a proxied HTTP request to a backend.
 #[derive(Debug)]
@@ -15,20 +15,19 @@ pub struct ProxyResponse {
 /// Forward a raw HTTP request to the backend configured in `route`.
 ///
 /// Rewrites the auth header with the route's API key (using the
-/// protocol-appropriate mechanism) and the `Host` header to match the
-/// backend endpoint. The original path is appended to the route's
-/// endpoint URL.
+/// route's configured [`AuthHeader`] mechanism) and the `Host` header
+/// to match the backend endpoint. The original path is appended to
+/// the route's endpoint URL.
 pub async fn proxy_to_backend(
     client: &reqwest::Client,
     route: &ResolvedRoute,
-    source_protocol: &str,
+    _source_protocol: &str,
     method: &str,
     path: &str,
     headers: Vec<(String, String)>,
     body: bytes::Bytes,
 ) -> Result<ProxyResponse, RouterError> {
-    let base = route.endpoint.trim_end_matches('/');
-    let url = format!("{base}{path}");
+    let url = build_backend_url(&route.endpoint, path);
 
     let reqwest_method: reqwest::Method = method
         .parse()
@@ -36,23 +35,48 @@ pub async fn proxy_to_backend(
 
     let mut builder = client.request(reqwest_method, &url);
 
-    // Set the route's API key using the protocol-appropriate header.
-    // Anthropic uses `x-api-key`; OpenAI and others use `Authorization: Bearer`.
-    if source_protocol.starts_with("anthropic") {
-        builder = builder.header("x-api-key", &route.api_key);
-    } else {
-        builder = builder.bearer_auth(&route.api_key);
+    // Inject API key using the route's configured auth mechanism.
+    match &route.auth {
+        AuthHeader::Bearer => {
+            builder = builder.bearer_auth(&route.api_key);
+        }
+        AuthHeader::Custom(header_name) => {
+            builder = builder.header(*header_name, &route.api_key);
+        }
     }
 
-    // Forward non-sensitive headers (skip auth and host — we rewrite those)
+    // Collect header names we need to strip (auth, host, and any default header
+    // names that will be set from route defaults).
+    let strip_headers: Vec<String> = {
+        let mut s = vec![
+            "authorization".to_string(),
+            "x-api-key".to_string(),
+            "host".to_string(),
+        ];
+        for (name, _) in &route.default_headers {
+            s.push(name.to_ascii_lowercase());
+        }
+        s
+    };
+
+    // Forward non-sensitive headers (skip auth, host, and any we'll override)
     for (name, value) in &headers {
         let name_lc = name.to_ascii_lowercase();
-        if name_lc == "authorization" || name_lc == "x-api-key" || name_lc == "host" {
+        if strip_headers.contains(&name_lc) {
             continue;
         }
         builder = builder.header(name.as_str(), value.as_str());
     }
 
+    // Apply route-level default headers (e.g. anthropic-version) unless
+    // the client already sent them.
+    for (name, value) in &route.default_headers {
+        let already_sent = headers.iter().any(|(h, _)| h.eq_ignore_ascii_case(name));
+        if !already_sent {
+            builder = builder.header(name.as_str(), value.as_str());
+        }
+    }
+
     // Set the "model" field in the JSON body to the route's configured model so the
     // backend receives the correct model ID regardless of what the client sent.
     let body = match serde_json::from_slice::<serde_json::Value>(&body) {
@@ -96,3 +120,41 @@ pub async fn proxy_to_backend(
         body: resp_body,
     })
 }
+
+fn build_backend_url(endpoint: &str, path: &str) -> String {
+    let base = endpoint.trim_end_matches('/');
+    if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
+        return format!("{base}{}", &path[3..]);
+    }
+
+    format!("{base}{path}")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::build_backend_url;
+
+    #[test]
+    fn build_backend_url_dedupes_v1_prefix() {
+        assert_eq!(
+            build_backend_url("https://api.openai.com/v1", "/v1/chat/completions"),
+            "https://api.openai.com/v1/chat/completions"
+        );
+    }
+
+    #[test]
+    fn build_backend_url_preserves_non_versioned_base() {
+        assert_eq!(
+            build_backend_url("https://api.anthropic.com", "/v1/messages"),
+            "https://api.anthropic.com/v1/messages"
+        );
+    }
+
+    #[test]
+    fn build_backend_url_handles_exact_v1_path() {
+        assert_eq!(
+            build_backend_url("https://api.openai.com/v1", "/v1"),
+            "https://api.openai.com/v1"
+        );
+    }
+}
diff --git a/crates/navigator-router/src/config.rs b/crates/navigator-router/src/config.rs
index 3fab9f67..530ebc63 100644
--- a/crates/navigator-router/src/config.rs
+++ b/crates/navigator-router/src/config.rs
@@ -4,6 +4,8 @@
 use serde::Deserialize;
 use std::path::Path;
 
+pub use navigator_core::inference::AuthHeader;
+
 use crate::RouterError;
 
 #[derive(Debug, Clone, Deserialize)]
@@ -13,10 +15,12 @@ pub struct RouterConfig {
 
 #[derive(Debug, Clone, Deserialize)]
 pub struct RouteConfig {
-    pub routing_hint: String,
+    pub name: String,
     pub endpoint: String,
     pub model: String,
     #[serde(default)]
+    pub provider_type: Option<String>,
+    #[serde(default)]
     pub protocols: Vec<String>,
     #[serde(default)]
     pub api_key: Option<String>,
@@ -24,23 +28,32 @@ pub struct RouteConfig {
     pub api_key_env: Option<String>,
 }
 
+/// A fully-resolved route ready for the router to forward requests.
+///
+/// The router is provider-agnostic — all provider-specific decisions
+/// (auth header style, default headers, base URL) are made by the
+/// caller during resolution.
 #[derive(Clone)]
 pub struct ResolvedRoute {
-    pub routing_hint: String,
     pub endpoint: String,
     pub model: String,
     pub api_key: String,
     pub protocols: Vec<String>,
+    /// How to inject the API key on outgoing requests.
+    pub auth: AuthHeader,
+    /// Extra headers injected on every request (e.g. `anthropic-version`).
+    pub default_headers: Vec<(String, String)>,
 }
 
 impl std::fmt::Debug for ResolvedRoute {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("ResolvedRoute")
-            .field("routing_hint", &self.routing_hint)
             .field("endpoint", &self.endpoint)
             .field("model", &self.model)
             .field("api_key", &"[REDACTED]")
             .field("protocols", &self.protocols)
+            .field("auth", &self.auth)
+            .field("default_headers", &self.default_headers)
             .finish()
     }
 }
@@ -84,13 +97,13 @@ impl RouteConfig {
             return std::env::var(env_var).map_err(|_| {
                 RouterError::Internal(format!(
                     "environment variable {env_var} not set for route '{}'",
-                    self.routing_hint
+                    self.name
                 ))
             });
         }
         Err(RouterError::Internal(format!(
             "route '{}' has neither api_key nor api_key_env",
-            self.routing_hint
+            self.name
         )))
     }
 
@@ -99,20 +112,31 @@ impl RouteConfig {
         if protocols.is_empty() {
             return Err(RouterError::Internal(format!(
                 "route '{}' has no protocols",
-                self.routing_hint
+                self.name
             )));
         }
 
+        let (auth, default_headers) = auth_from_provider_type(self.provider_type.as_deref());
+
         Ok(ResolvedRoute {
-            routing_hint: self.routing_hint.clone(),
             endpoint: self.endpoint.clone(),
             model: self.model.clone(),
             api_key: self.resolve_api_key()?,
             protocols,
+            auth,
+            default_headers,
         })
     }
 }
 
+/// Derive auth header style and default headers from a provider type string.
+///
+/// Delegates to [`navigator_core::inference::auth_for_provider_type`] which
+/// uses the centralized `InferenceProviderProfile` registry.
+fn auth_from_provider_type(provider_type: Option<&str>) -> (AuthHeader, Vec<(String, String)>) {
+    navigator_core::inference::auth_for_provider_type(provider_type.unwrap_or(""))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -122,12 +146,12 @@ mod tests {
     fn load_from_file_valid_yaml_round_trip() {
         let yaml = r#"
 routes:
-  - routing_hint: local
+  - name: inference.local
     endpoint: http://localhost:8000/v1
     model: llama-3
     protocols: [openai_chat_completions]
     api_key: sk-test-key
-  - routing_hint: frontier
+  - name: inference.local
     endpoint: https://api.openai.com/v1
     model: gpt-4o
     protocols: [openai_chat_completions, anthropic_messages]
@@ -138,8 +162,8 @@ routes:
 
         let config = RouterConfig::load_from_file(f.path()).unwrap();
         assert_eq!(config.routes.len(), 2);
-        assert_eq!(config.routes[0].routing_hint, "local");
-        assert_eq!(config.routes[1].routing_hint, "frontier");
+        assert_eq!(config.routes[0].name, "inference.local");
+        assert_eq!(config.routes[1].name, "inference.local");
 
         let resolved = config.resolve_routes().unwrap();
         assert_eq!(resolved.len(), 2);
@@ -167,7 +191,7 @@ routes:
     fn load_from_file_missing_api_key_returns_error() {
         let yaml = r#"
 routes:
-  - routing_hint: local
+  - name: inference.local
     endpoint: http://localhost:8000/v1
     model: llama-3
     protocols: [openai_chat_completions]
@@ -191,7 +215,7 @@ routes:
     fn load_from_file_api_key_env_resolves_from_environment() {
         let yaml = r#"
 routes:
-  - routing_hint: local
+  - name: inference.local
     endpoint: http://localhost:8000/v1
     model: llama-3
     protocols: [openai_chat_completions]
@@ -221,11 +245,12 @@ routes:
     #[test]
     fn resolved_route_debug_redacts_api_key() {
         let route = ResolvedRoute {
-            routing_hint: "local".to_string(),
             endpoint: "https://api.example.com/v1".to_string(),
             model: "test-model".to_string(),
             api_key: "sk-super-secret-key-12345".to_string(),
             protocols: vec!["openai_chat_completions".to_string()],
+            auth: AuthHeader::Bearer,
+            default_headers: Vec::new(),
         };
         let debug_output = format!("{route:?}");
         assert!(
@@ -237,4 +262,25 @@ routes:
             "Debug output should show [REDACTED] for api_key: {debug_output}"
         );
     }
+
+    #[test]
+    fn auth_from_anthropic_provider_uses_custom_header() {
+        let (auth, headers) = auth_from_provider_type(Some("anthropic"));
+        assert_eq!(auth, AuthHeader::Custom("x-api-key"));
+        assert!(headers.iter().any(|(k, _)| k == "anthropic-version"));
+    }
+
+    #[test]
+    fn auth_from_openai_provider_uses_bearer() {
+        let (auth, headers) = auth_from_provider_type(Some("openai"));
+        assert_eq!(auth, AuthHeader::Bearer);
+        assert!(headers.is_empty());
+    }
+
+    #[test]
+    fn auth_from_none_defaults_to_bearer() {
+        let (auth, headers) = auth_from_provider_type(None);
+        assert_eq!(auth, AuthHeader::Bearer);
+        assert!(headers.is_empty());
+    }
 }
diff --git a/crates/navigator-router/src/lib.rs b/crates/navigator-router/src/lib.rs
index 3e69d541..835f148d 100644
--- a/crates/navigator-router/src/lib.rs
+++ b/crates/navigator-router/src/lib.rs
@@ -13,7 +13,7 @@ use tracing::info;
 
 #[derive(Debug, thiserror::Error)]
 pub enum RouterError {
-    #[error("route not found for routing_hint '{0}'")]
+    #[error("route not found for route '{0}'")]
     RouteNotFound(String),
     #[error("no compatible route for protocol '{0}'")]
     NoCompatibleRoute(String),
@@ -72,7 +72,6 @@ impl Router {
             .ok_or_else(|| RouterError::NoCompatibleRoute(source_protocol.to_string()))?;
 
         info!(
-            routing_hint = %route.routing_hint,
             protocols = %route.protocols.join(","),
             endpoint = %route.endpoint,
             method = %method,
@@ -81,7 +80,7 @@ impl Router {
         );
 
         if mock::is_mock_route(route) {
-            info!(routing_hint = %route.routing_hint, "returning mock response");
+            info!(endpoint = %route.endpoint, "returning mock response");
             return Ok(mock::mock_response(route, &normalized_source));
         }
 
@@ -105,32 +104,22 @@ mod tests {
 
     fn test_config() -> RouterConfig {
         RouterConfig {
-            routes: vec![
-                RouteConfig {
-                    routing_hint: "local".to_string(),
-                    endpoint: "http://localhost:8000/v1".to_string(),
-                    model: "meta/llama-3.1-8b-instruct".to_string(),
-                    protocols: vec!["openai_chat_completions".to_string()],
-                    api_key: Some("test-key".to_string()),
-                    api_key_env: None,
-                },
-                RouteConfig {
-                    routing_hint: "frontier".to_string(),
-                    endpoint: "http://localhost:8000/v1".to_string(),
-                    model: "meta/llama-3.1-70b-instruct".to_string(),
-                    protocols: vec!["openai_chat_completions".to_string()],
-                    api_key: Some("test-key".to_string()),
-                    api_key_env: None,
-                },
-            ],
+            routes: vec![RouteConfig {
+                name: "inference.local".to_string(),
+                endpoint: "http://localhost:8000/v1".to_string(),
+                model: "meta/llama-3.1-8b-instruct".to_string(),
+                provider_type: None,
+                protocols: vec!["openai_chat_completions".to_string()],
+                api_key: Some("test-key".to_string()),
+                api_key_env: None,
+            }],
         }
     }
 
     #[test]
     fn router_resolves_routes_from_config() {
         let router = Router::from_config(&test_config()).unwrap();
-        assert_eq!(router.routes.len(), 2);
-        assert_eq!(router.routes[0].routing_hint, "local");
+        assert_eq!(router.routes.len(), 1);
         assert_eq!(router.routes[0].protocols, vec!["openai_chat_completions"]);
     }
 
@@ -138,9 +127,10 @@ mod tests {
     fn config_missing_api_key_returns_error() {
         let config = RouterConfig {
             routes: vec![RouteConfig {
-                routing_hint: "test".to_string(),
+                name: "inference.local".to_string(),
                 endpoint: "http://localhost".to_string(),
                 model: "test-model".to_string(),
+                provider_type: None,
                 protocols: vec!["openai_chat_completions".to_string()],
                 api_key: None,
                 api_key_env: None,
diff --git a/crates/navigator-router/src/mock.rs b/crates/navigator-router/src/mock.rs
index 13215441..06a2959b 100644
--- a/crates/navigator-router/src/mock.rs
+++ b/crates/navigator-router/src/mock.rs
@@ -19,7 +19,6 @@ pub fn is_mock_route(route: &ResolvedRoute) -> bool {
 /// Anthropic response, etc. The route's `model` field is echoed in the response.
 pub fn mock_response(route: &ResolvedRoute, source_protocol: &str) -> ProxyResponse {
     tracing::warn!(
-        routing_hint = %route.routing_hint,
         endpoint = %route.endpoint,
         "Serving mock response — mock:// routes should only be used in development/testing"
     );
@@ -125,11 +124,12 @@ mod tests {
 
     fn make_route(endpoint: &str, protocols: &[&str], model: &str) -> ResolvedRoute {
         ResolvedRoute {
-            routing_hint: "test".to_string(),
             endpoint: endpoint.to_string(),
             model: model.to_string(),
             api_key: "key".to_string(),
             protocols: protocols.iter().map(ToString::to_string).collect(),
+            auth: crate::config::AuthHeader::Bearer,
+            default_headers: Vec::new(),
         }
     }
 
diff --git a/crates/navigator-router/tests/backend_integration.rs b/crates/navigator-router/tests/backend_integration.rs
index 11c51915..c8ee6c57 100644
--- a/crates/navigator-router/tests/backend_integration.rs
+++ b/crates/navigator-router/tests/backend_integration.rs
@@ -2,17 +2,18 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use navigator_router::Router;
-use navigator_router::config::{ResolvedRoute, RouteConfig, RouterConfig};
+use navigator_router::config::{AuthHeader, ResolvedRoute, RouteConfig, RouterConfig};
 use wiremock::matchers::{bearer_token, body_partial_json, header, method, path};
 use wiremock::{Mock, MockServer, ResponseTemplate};
 
 fn mock_candidates(base_url: &str) -> Vec<ResolvedRoute> {
     vec![ResolvedRoute {
-        routing_hint: "local".to_string(),
         endpoint: base_url.to_string(),
         model: "meta/llama-3.1-8b-instruct".to_string(),
         api_key: "test-api-key".to_string(),
         protocols: vec!["openai_chat_completions".to_string()],
+        auth: AuthHeader::Bearer,
+        default_headers: Vec::new(),
     }]
 }
 
@@ -108,11 +109,12 @@ async fn proxy_upstream_401_returns_error() {
 async fn proxy_no_compatible_route_returns_error() {
     let router = Router::new().unwrap();
     let candidates = vec![ResolvedRoute {
-        routing_hint: "local".to_string(),
         endpoint: "http://localhost:1234".to_string(),
         model: "test".to_string(),
         api_key: "key".to_string(),
         protocols: vec!["anthropic_messages".to_string()],
+        auth: AuthHeader::Custom("x-api-key"),
+        default_headers: Vec::new(),
     }];
 
     let err = router
@@ -167,11 +169,12 @@ async fn proxy_strips_auth_header() {
 async fn proxy_mock_route_returns_canned_response() {
     let router = Router::new().unwrap();
     let candidates = vec![ResolvedRoute {
-        routing_hint: "local".to_string(),
         endpoint: "mock://test".to_string(),
         model: "mock/test-model".to_string(),
         api_key: "unused".to_string(),
         protocols: vec!["openai_chat_completions".to_string()],
+        auth: AuthHeader::Bearer,
+        default_headers: Vec::new(),
     }];
 
     let body = serde_json::to_vec(&serde_json::json!({
@@ -279,7 +282,7 @@ async fn proxy_inserts_model_when_absent_from_body() {
 }
 
 #[tokio::test]
-async fn proxy_uses_x_api_key_for_anthropic_protocol() {
+async fn proxy_uses_x_api_key_for_anthropic_route() {
     let mock_server = MockServer::start().await;
 
     Mock::given(method("POST"))
@@ -299,11 +302,12 @@ async fn proxy_uses_x_api_key_for_anthropic_protocol() {
 
     let router = Router::new().unwrap();
     let candidates = vec![ResolvedRoute {
-        routing_hint: "frontier".to_string(),
         endpoint: mock_server.uri(),
         model: "claude-sonnet-4-20250514".to_string(),
         api_key: "test-anthropic-key".to_string(),
         protocols: vec!["anthropic_messages".to_string()],
+        auth: AuthHeader::Custom("x-api-key"),
+        default_headers: vec![("anthropic-version".to_string(), "2023-06-01".to_string())],
     }];
 
     let body = serde_json::to_vec(&serde_json::json!({
@@ -355,11 +359,12 @@ async fn proxy_anthropic_does_not_send_bearer_auth() {
 
     let router = Router::new().unwrap();
     let candidates = vec![ResolvedRoute {
-        routing_hint: "frontier".to_string(),
         endpoint: mock_server.uri(),
         model: "claude-sonnet-4-20250514".to_string(),
         api_key: "anthropic-key".to_string(),
         protocols: vec!["anthropic_messages".to_string()],
+        auth: AuthHeader::Custom("x-api-key"),
+        default_headers: vec![("anthropic-version".to_string(), "2023-06-01".to_string())],
     }];
 
     let response = router
@@ -381,9 +386,10 @@ async fn proxy_anthropic_does_not_send_bearer_auth() {
 fn config_resolves_routes_with_protocol() {
     let config = RouterConfig {
         routes: vec![RouteConfig {
-            routing_hint: "local".to_string(),
+            name: "inference.local".to_string(),
             endpoint: "http://localhost:8000".to_string(),
             model: "test-model".to_string(),
+            provider_type: None,
             protocols: vec!["openai_chat_completions".to_string()],
             api_key: Some("key".to_string()),
             api_key_env: None,
diff --git a/crates/navigator-sandbox/data/sandbox-policy.rego b/crates/navigator-sandbox/data/sandbox-policy.rego
index ef2c4d1d..92d2a9a0 100644
--- a/crates/navigator-sandbox/data/sandbox-policy.rego
+++ b/crates/navigator-sandbox/data/sandbox-policy.rego
@@ -59,7 +59,6 @@ deny_reason := "network connections not allowed by policy" if {
 	input.exec
 	not network_policy_for_request
 	count(data.network_policies) == 0
-	count(object.get(data, "inference", {}).allowed_routes) == 0
 }
 
 # --- Matched policy name (for audit logging) ---
@@ -141,13 +140,11 @@ binary_allowed(policy, exec) if {
 	glob.match(b.path, ["/"], p)
 }
 
-# --- Network action (allow / inspect_for_inference / deny) ---
+# --- Network action (allow / deny) ---
 #
 # These rules are mutually exclusive by construction:
 #   - "allow" requires `network_policy_for_request` (binary+endpoint matched)
-#   - "inspect_for_inference" requires `not network_policy_for_request`
-# They can never both be true, so OPA's complete-rule conflict semantics
-# are satisfied without an explicit `else`.
+#   - default is "deny" when no policy matches.
 
 default network_action := "deny"
 
@@ -156,14 +153,6 @@ network_action := "allow" if {
 	network_policy_for_request
 }
 
-# Binary not explicitly allowed + inference configured → inspect.
-# Fires whether the endpoint is declared in a policy or not — the key condition
-# is that THIS binary is not allowed for this endpoint.
-network_action := "inspect_for_inference" if {
-	not network_policy_for_request
-	count(data.inference.allowed_routes) > 0
-}
-
 # ===========================================================================
 # L7 request evaluation (queried per-request within a tunnel)
 # ===========================================================================
diff --git a/crates/navigator-sandbox/src/grpc_client.rs b/crates/navigator-sandbox/src/grpc_client.rs
index 167abd42..75525743 100644
--- a/crates/navigator-sandbox/src/grpc_client.rs
+++ b/crates/navigator-sandbox/src/grpc_client.rs
@@ -9,7 +9,7 @@ use std::time::Duration;
 
 use miette::{IntoDiagnostic, Result, WrapErr};
 use navigator_core::proto::{
-    GetSandboxInferenceBundleRequest, GetSandboxInferenceBundleResponse, GetSandboxPolicyRequest,
+    GetInferenceBundleRequest, GetInferenceBundleResponse, GetSandboxPolicyRequest,
     GetSandboxProviderEnvironmentRequest, PolicyStatus, ReportPolicyStatusRequest,
     SandboxPolicy as ProtoSandboxPolicy, UpdateSandboxPolicyRequest,
     inference_client::InferenceClient, navigator_client::NavigatorClient,
@@ -160,6 +160,16 @@ pub async fn discover_and_sync_policy(
         })
 }
 
+/// Sync an enriched policy back to the gateway.
+///
+/// Used by the supervisor to push baseline-path-enriched policies so the
+/// gateway stores the effective policy users see via `nemoclaw sandbox get`.
+pub async fn sync_policy(endpoint: &str, sandbox: &str, policy: &ProtoSandboxPolicy) -> Result<()> {
+    debug!(endpoint = %endpoint, sandbox = %sandbox, "Syncing enriched policy to gateway");
+    let mut client = connect(endpoint).await?;
+    sync_policy_with_client(&mut client, sandbox, policy).await
+}
+
 /// Fetch provider environment variables for a sandbox from NemoClaw server via gRPC.
 ///
 /// Returns a map of environment variable names to values derived from provider
@@ -264,20 +274,15 @@ impl CachedNavigatorClient {
     }
 }
 
-/// Fetch the pre-filtered inference route bundle for a sandbox.
-pub async fn fetch_inference_bundle(
-    endpoint: &str,
-    sandbox_id: &str,
-) -> Result<GetSandboxInferenceBundleResponse> {
-    debug!(endpoint = %endpoint, sandbox_id = %sandbox_id, "Fetching inference route bundle");
+/// Fetch the resolved inference route bundle from the server.
+pub async fn fetch_inference_bundle(endpoint: &str) -> Result<GetInferenceBundleResponse> {
+    debug!(endpoint = %endpoint, "Fetching inference route bundle");
 
     let channel = connect_channel(endpoint).await?;
     let mut client = InferenceClient::new(channel);
 
     let response = client
-        .get_sandbox_inference_bundle(GetSandboxInferenceBundleRequest {
-            sandbox_id: sandbox_id.to_string(),
-        })
+        .get_inference_bundle(GetInferenceBundleRequest {})
         .await
         .into_diagnostic()?;
 
diff --git a/crates/navigator-sandbox/src/l7/inference.rs b/crates/navigator-sandbox/src/l7/inference.rs
index 255ae5ad..adc7437f 100644
--- a/crates/navigator-sandbox/src/l7/inference.rs
+++ b/crates/navigator-sandbox/src/l7/inference.rs
@@ -3,10 +3,9 @@
 
 //! Inference API pattern detection and gateway rerouting.
 //!
-//! When the proxy intercepts a connection (action = `InspectForInference`),
-//! this module detects whether the HTTP request is a known inference API call
-//! and reroutes it through the gateway's `ProxyInference` gRPC endpoint
-//! instead of forwarding to the original upstream.
+//! For requests targeting `inference.local`, this module detects whether the
+//! HTTP request is a known inference API call and routes it through the local
+//! sandbox router.
 
 /// An inference API pattern for detecting inference calls in intercepted traffic.
 #[derive(Debug, Clone)]
@@ -44,6 +43,18 @@ pub fn default_patterns() -> Vec<InferenceApiPattern> {
             protocol: "anthropic_messages".to_string(),
             kind: "messages".to_string(),
         },
+        InferenceApiPattern {
+            method: "GET".to_string(),
+            path_glob: "/v1/models".to_string(),
+            protocol: "model_discovery".to_string(),
+            kind: "models_list".to_string(),
+        },
+        InferenceApiPattern {
+            method: "GET".to_string(),
+            path_glob: "/v1/models/*".to_string(),
+            protocol: "model_discovery".to_string(),
+            kind: "models_get".to_string(),
+        },
     ]
 }
 
@@ -55,9 +66,20 @@ pub fn detect_inference_pattern<'a>(
 ) -> Option<&'a InferenceApiPattern> {
     // Strip query string for matching
     let path_only = path.split('?').next().unwrap_or(path);
-    patterns
-        .iter()
-        .find(|p| method.eq_ignore_ascii_case(&p.method) && path_only == p.path_glob)
+    patterns.iter().find(|p| {
+        if !method.eq_ignore_ascii_case(&p.method) {
+            return false;
+        }
+
+        if let Some(prefix) = p.path_glob.strip_suffix("/*") {
+            return path_only == prefix
+                || path_only
+                    .strip_prefix(prefix)
+                    .is_some_and(|suffix| suffix.starts_with('/'));
+        }
+
+        path_only == p.path_glob
+    })
 }
 
 /// A parsed HTTP request from the intercepted tunnel.
@@ -273,10 +295,19 @@ mod tests {
     }
 
     #[test]
-    fn no_match_for_unknown_path() {
+    fn detect_get_models() {
         let patterns = default_patterns();
-        let result = detect_inference_pattern("POST", "/v1/models", &patterns);
-        assert!(result.is_none());
+        let result = detect_inference_pattern("GET", "/v1/models", &patterns);
+        assert!(result.is_some());
+        assert_eq!(result.unwrap().protocol, "model_discovery");
+    }
+
+    #[test]
+    fn detect_get_model_details() {
+        let patterns = default_patterns();
+        let result = detect_inference_pattern("GET", "/v1/models/gpt-4.1", &patterns);
+        assert!(result.is_some());
+        assert_eq!(result.unwrap().protocol, "model_discovery");
     }
 
     #[test]
diff --git a/crates/navigator-sandbox/src/lib.rs b/crates/navigator-sandbox/src/lib.rs
index 5b65a7f9..fd69d51e 100644
--- a/crates/navigator-sandbox/src/lib.rs
+++ b/crates/navigator-sandbox/src/lib.rs
@@ -137,7 +137,7 @@ pub async fn run_sandbox(
 
     // Load policy and initialize OPA engine
     let navigator_endpoint_for_proxy = navigator_endpoint.clone();
-    let (mut policy, opa_engine) = load_policy(
+    let (policy, opa_engine) = load_policy(
         sandbox_id.clone(),
         sandbox,
         navigator_endpoint.clone(),
@@ -180,8 +180,9 @@ pub async fn run_sandbox(
                 let tls_dir = std::path::Path::new("/etc/navigator-tls");
                 match write_ca_files(&ca, tls_dir) {
                     Ok(paths) => {
-                        // Make the TLS directory readable under Landlock
-                        policy.filesystem.read_only.push(tls_dir.to_path_buf());
+                        // /etc/navigator-tls is subsumed by the /etc baseline
+                        // path injected by enrich_*_baseline_paths(), so no
+                        // explicit Landlock entry is needed here.
 
                         let upstream_config = build_upstream_client_config();
                         let cert_cache = CertCache::new(ca);
@@ -562,13 +563,13 @@ async fn build_inference_context(
                 .map_err(|e| miette::miette!("failed to resolve routes from {path}: {e}"))?
         }
         InferenceRouteSource::Cluster => {
-            let (Some(id), Some(endpoint)) = (sandbox_id, navigator_endpoint) else {
+            let (Some(_id), Some(endpoint)) = (sandbox_id, navigator_endpoint) else {
                 return Ok(None);
             };
 
             // Cluster mode: fetch bundle from gateway
-            info!(sandbox_id = %id, endpoint = %endpoint, "Fetching inference route bundle from gateway");
-            match grpc_client::fetch_inference_bundle(endpoint, id).await {
+            info!(endpoint = %endpoint, "Fetching inference route bundle from gateway");
+            match grpc_client::fetch_inference_bundle(endpoint).await {
                 Ok(bundle) => {
                     info!(
                         route_count = bundle.routes.len(),
@@ -578,12 +579,12 @@ async fn build_inference_context(
                     bundle_to_resolved_routes(&bundle)
                 }
                 Err(e) => {
-                    // Distinguish "no inference policy" (expected) from server errors.
-                    // gRPC PermissionDenied/NotFound means inference is not configured
+                    // Distinguish expected "not configured" states from server errors.
+                    // gRPC PermissionDenied/NotFound means inference bundle is unavailable
                     // for this sandbox — skip gracefully. Other errors are unexpected.
                     let msg = e.to_string();
                     if msg.contains("permission denied") || msg.contains("not found") {
-                        info!(error = %e, "Sandbox has no inference policy, inference routing disabled");
+                        info!(error = %e, "Inference bundle unavailable, routing disabled");
                         return Ok(None);
                     }
                     warn!(error = %e, "Failed to fetch inference bundle, inference routing disabled");
@@ -618,9 +619,9 @@ async fn build_inference_context(
 
     // Spawn background route cache refresh for cluster mode
     if matches!(source, InferenceRouteSource::Cluster)
-        && let (Some(id), Some(endpoint)) = (sandbox_id, navigator_endpoint)
+        && let (Some(_id), Some(endpoint)) = (sandbox_id, navigator_endpoint)
     {
-        spawn_route_refresh(ctx.route_cache(), id.to_string(), endpoint.to_string());
+        spawn_route_refresh(ctx.route_cache(), endpoint.to_string());
     }
 
     Ok(Some(ctx))
@@ -628,17 +629,22 @@ async fn build_inference_context(
 
 /// Convert a proto bundle response into resolved routes for the router.
 fn bundle_to_resolved_routes(
-    bundle: &navigator_core::proto::GetSandboxInferenceBundleResponse,
+    bundle: &navigator_core::proto::GetInferenceBundleResponse,
 ) -> Vec<navigator_router::config::ResolvedRoute> {
     bundle
         .routes
         .iter()
-        .map(|r| navigator_router::config::ResolvedRoute {
-            routing_hint: r.routing_hint.clone(),
-            endpoint: r.base_url.clone(),
-            model: r.model_id.clone(),
-            api_key: r.api_key.clone(),
-            protocols: r.protocols.clone(),
+        .map(|r| {
+            let (auth, default_headers) =
+                navigator_core::inference::auth_for_provider_type(&r.provider_type);
+            navigator_router::config::ResolvedRoute {
+                endpoint: r.base_url.clone(),
+                model: r.model_id.clone(),
+                api_key: r.api_key.clone(),
+                protocols: r.protocols.clone(),
+                auth,
+                default_headers,
+            }
         })
         .collect()
 }
@@ -646,7 +652,6 @@ fn bundle_to_resolved_routes(
 /// Spawn a background task that periodically refreshes the route cache from the gateway.
 fn spawn_route_refresh(
     cache: Arc<tokio::sync::RwLock<Vec<navigator_router::config::ResolvedRoute>>>,
-    sandbox_id: String,
     endpoint: String,
 ) {
     tokio::spawn(async move {
@@ -658,7 +663,7 @@ fn spawn_route_refresh(
         loop {
             tick.tick().await;
 
-            match grpc_client::fetch_inference_bundle(&endpoint, &sandbox_id).await {
+            match grpc_client::fetch_inference_bundle(&endpoint).await {
                 Ok(bundle) => {
                     let routes = bundle_to_resolved_routes(&bundle);
                     debug!(
@@ -676,6 +681,87 @@ fn spawn_route_refresh(
     });
 }
 
+// ============================================================================
+// Baseline filesystem path enrichment
+// ============================================================================
+
+/// Minimum read-only paths required for a proxy-mode sandbox child process to
+/// function: dynamic linker, shared libraries, DNS resolution, CA certs,
+/// Python venv, and navigator logs.
+const PROXY_BASELINE_READ_ONLY: &[&str] = &["/usr", "/lib", "/etc", "/app", "/var/log"];
+
+/// Minimum read-write paths required for a proxy-mode sandbox child process:
+/// user working directory and temporary files.
+const PROXY_BASELINE_READ_WRITE: &[&str] = &["/sandbox", "/tmp"];
+
+/// Ensure a proto `SandboxPolicy` includes the baseline filesystem paths
+/// required for proxy-mode sandboxes.  Paths are only added if missing;
+/// user-specified paths are never removed.
+///
+/// Returns `true` if the policy was modified (caller may want to sync back).
+fn enrich_proto_baseline_paths(proto: &mut navigator_core::proto::SandboxPolicy) -> bool {
+    // Only enrich if network_policies are present (proxy mode indicator).
+    if proto.network_policies.is_empty() {
+        return false;
+    }
+
+    let fs = proto
+        .filesystem
+        .get_or_insert_with(|| navigator_core::proto::FilesystemPolicy {
+            include_workdir: true,
+            ..Default::default()
+        });
+
+    let mut modified = false;
+    for &path in PROXY_BASELINE_READ_ONLY {
+        if !fs.read_only.iter().any(|p| p.as_str() == path) {
+            fs.read_only.push(path.to_string());
+            modified = true;
+        }
+    }
+    for &path in PROXY_BASELINE_READ_WRITE {
+        if !fs.read_write.iter().any(|p| p.as_str() == path) {
+            fs.read_write.push(path.to_string());
+            modified = true;
+        }
+    }
+
+    if modified {
+        info!("Enriched policy with baseline filesystem paths for proxy mode");
+    }
+
+    modified
+}
+
+/// Ensure a `SandboxPolicy` (Rust type) includes the baseline filesystem
+/// paths required for proxy-mode sandboxes.  Used for the local-file code
+/// path where no proto is available.
+fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) {
+    if !matches!(policy.network.mode, NetworkMode::Proxy) {
+        return;
+    }
+
+    let mut modified = false;
+    for &path in PROXY_BASELINE_READ_ONLY {
+        let p = std::path::PathBuf::from(path);
+        if !policy.filesystem.read_only.contains(&p) {
+            policy.filesystem.read_only.push(p);
+            modified = true;
+        }
+    }
+    for &path in PROXY_BASELINE_READ_WRITE {
+        let p = std::path::PathBuf::from(path);
+        if !policy.filesystem.read_write.contains(&p) {
+            policy.filesystem.read_write.push(p);
+            modified = true;
+        }
+    }
+
+    if modified {
+        info!("Enriched policy with baseline filesystem paths for proxy mode");
+    }
+}
+
 /// Load sandbox policy from local files or gRPC.
 ///
 /// Priority:
@@ -702,7 +788,7 @@ async fn load_policy(
             std::path::Path::new(data_file),
         )?;
         let config = engine.query_sandbox_config()?;
-        let policy = SandboxPolicy {
+        let mut policy = SandboxPolicy {
             version: 1,
             filesystem: config.filesystem,
             network: NetworkPolicy {
@@ -712,6 +798,7 @@ async fn load_policy(
             landlock: config.landlock,
             process: config.process,
         };
+        enrich_sandbox_baseline_paths(&mut policy);
         return Ok((policy, Some(Arc::new(engine))));
     }
 
@@ -724,14 +811,17 @@ async fn load_policy(
         );
         let proto_policy = grpc_client::fetch_policy(endpoint, id).await?;
 
-        let proto_policy = match proto_policy {
+        let mut proto_policy = match proto_policy {
             Some(p) => p,
             None => {
                 // No policy configured on the server. Discover from disk or
                 // fall back to the restrictive default, then sync to the
                 // gateway so it becomes the authoritative baseline.
                 info!("Server returned no policy; attempting local discovery");
-                let discovered = discover_policy_from_disk_or_default();
+                let mut discovered = discover_policy_from_disk_or_default();
+                // Enrich before syncing so the gateway baseline includes
+                // baseline paths from the start.
+                enrich_proto_baseline_paths(&mut discovered);
                 let sandbox = sandbox.as_deref().ok_or_else(|| {
                     miette::miette!(
                         "Cannot sync discovered policy: sandbox not available.\n\
@@ -745,21 +835,28 @@ async fn load_policy(
             }
         };
 
+        // Ensure baseline filesystem paths are present for proxy-mode
+        // sandboxes.  If the policy was enriched, sync the updated version
+        // back to the gateway so users can see the effective policy.
+        let enriched = enrich_proto_baseline_paths(&mut proto_policy);
+        if enriched {
+            if let Some(sandbox_name) = sandbox.as_deref() {
+                if let Err(e) =
+                    grpc_client::sync_policy(endpoint, sandbox_name, &proto_policy).await
+                {
+                    warn!(
+                        error = %e,
+                        "Failed to sync enriched policy back to gateway (non-fatal)"
+                    );
+                }
+            }
+        }
+
         // Build OPA engine from baked-in rules + typed proto data.
-        // The engine is needed when network policies exist OR inference routing
-        // is configured (inference routing uses OPA to decide inspect_for_inference).
-        let has_network_policies = !proto_policy.network_policies.is_empty();
-        let has_inference = proto_policy
-            .inference
-            .as_ref()
-            .is_some_and(|inf| !inf.allowed_routes.is_empty());
-        let opa_engine = if has_network_policies || has_inference {
-            info!("Creating OPA engine from proto policy data");
-            Some(Arc::new(OpaEngine::from_proto(&proto_policy)?))
-        } else {
-            info!("No network policies or inference config in proto, skipping OPA engine");
-            None
-        };
+        // In cluster mode, proxy networking is always enabled so OPA is
+        // always required for allow/deny decisions.
+        info!("Creating OPA engine from proto policy data");
+        let opa_engine = Some(Arc::new(OpaEngine::from_proto(&proto_policy)?));
 
         let policy = SandboxPolicy::try_from(proto_policy)?;
         return Ok((policy, opa_engine));
@@ -784,7 +881,9 @@ fn discover_policy_from_disk_or_default() -> navigator_core::proto::SandboxPolic
 /// Try to read a sandbox policy YAML from `path`, falling back to the
 /// hardcoded restrictive default if the file is missing or invalid.
 fn discover_policy_from_path(path: &std::path::Path) -> navigator_core::proto::SandboxPolicy {
-    use navigator_policy::{parse_sandbox_policy, restrictive_default_policy};
+    use navigator_policy::{
+        parse_sandbox_policy, restrictive_default_policy, validate_sandbox_policy,
+    };
 
     match std::fs::read_to_string(path) {
         Ok(yaml) => {
@@ -793,7 +892,20 @@ fn discover_policy_from_path(path: &std::path::Path) -> navigator_core::proto::S
                 "Loaded sandbox policy from container disk"
             );
             match parse_sandbox_policy(&yaml) {
-                Ok(policy) => policy,
+                Ok(policy) => {
+                    // Validate the disk-loaded policy for safety.
+                    if let Err(violations) = validate_sandbox_policy(&policy) {
+                        let messages: Vec<String> =
+                            violations.iter().map(ToString::to_string).collect();
+                        warn!(
+                            path = %path.display(),
+                            violations = %messages.join("; "),
+                            "Disk policy contains unsafe content, using restrictive default"
+                        );
+                        return restrictive_default_policy();
+                    }
+                    policy
+                }
                 Err(e) => {
                     warn!(
                         path = %path.display(),
@@ -967,10 +1079,10 @@ mod tests {
 
     #[test]
     fn bundle_to_resolved_routes_converts_all_fields() {
-        let bundle = navigator_core::proto::GetSandboxInferenceBundleResponse {
+        let bundle = navigator_core::proto::GetInferenceBundleResponse {
             routes: vec![
-                navigator_core::proto::SandboxResolvedRoute {
-                    routing_hint: "frontier".to_string(),
+                navigator_core::proto::ResolvedRoute {
+                    name: "frontier".to_string(),
                     base_url: "https://api.example.com/v1".to_string(),
                     api_key: "sk-test-key".to_string(),
                     model_id: "gpt-4".to_string(),
@@ -978,13 +1090,15 @@ mod tests {
                         "openai_chat_completions".to_string(),
                         "openai_responses".to_string(),
                     ],
+                    provider_type: "openai".to_string(),
                 },
-                navigator_core::proto::SandboxResolvedRoute {
-                    routing_hint: "local".to_string(),
+                navigator_core::proto::ResolvedRoute {
+                    name: "local".to_string(),
                     base_url: "http://vllm:8000/v1".to_string(),
                     api_key: "local-key".to_string(),
                     model_id: "llama-3".to_string(),
                     protocols: vec!["openai_chat_completions".to_string()],
+                    provider_type: String::new(),
                 },
             ],
             revision: "abc123".to_string(),
@@ -994,21 +1108,27 @@ mod tests {
         let routes = bundle_to_resolved_routes(&bundle);
 
         assert_eq!(routes.len(), 2);
-        assert_eq!(routes[0].routing_hint, "frontier");
         assert_eq!(routes[0].endpoint, "https://api.example.com/v1");
         assert_eq!(routes[0].model, "gpt-4");
         assert_eq!(routes[0].api_key, "sk-test-key");
+        assert_eq!(
+            routes[0].auth,
+            navigator_core::inference::AuthHeader::Bearer
+        );
         assert_eq!(
             routes[0].protocols,
             vec!["openai_chat_completions", "openai_responses"]
         );
-        assert_eq!(routes[1].routing_hint, "local");
         assert_eq!(routes[1].endpoint, "http://vllm:8000/v1");
+        assert_eq!(
+            routes[1].auth,
+            navigator_core::inference::AuthHeader::Bearer
+        );
     }
 
     #[test]
     fn bundle_to_resolved_routes_handles_empty_bundle() {
-        let bundle = navigator_core::proto::GetSandboxInferenceBundleResponse {
+        let bundle = navigator_core::proto::GetInferenceBundleResponse {
             routes: vec![],
             revision: "empty".to_string(),
             generated_at_ms: 0,
@@ -1026,7 +1146,7 @@ mod tests {
 
         let yaml = r#"
 routes:
-  - routing_hint: local
+  - name: inference.local
     endpoint: http://localhost:8000/v1
     model: llama-3
     protocols: [openai_chat_completions]
@@ -1044,7 +1164,7 @@ routes:
         let cache = ctx.route_cache();
         let routes = cache.read().await;
         assert_eq!(routes.len(), 1);
-        assert_eq!(routes[0].routing_hint, "local");
+        assert_eq!(routes[0].endpoint, "http://localhost:8000/v1");
     }
 
     #[tokio::test]
@@ -1081,7 +1201,7 @@ routes:
 
         let yaml = r#"
 routes:
-  - routing_hint: file-route
+  - name: inference.local
     endpoint: http://localhost:9999/v1
     model: file-model
     protocols: [openai_chat_completions]
@@ -1099,7 +1219,7 @@ routes:
         let ctx = ctx.expect("context should be Some");
         let cache = ctx.route_cache();
         let routes = cache.read().await;
-        assert_eq!(routes[0].routing_hint, "file-route");
+        assert_eq!(routes[0].endpoint, "http://localhost:9999/v1");
     }
 
     #[test]
@@ -1151,7 +1271,6 @@ routes:
         let policy = discover_policy_from_path(path);
         // Restrictive default has no network policies.
         assert!(policy.network_policies.is_empty());
-        assert!(policy.inference.is_none());
         // But does have filesystem and process policies.
         assert!(policy.filesystem.is_some());
         assert!(policy.process.is_some());
@@ -1201,12 +1320,40 @@ network_policies:
         assert!(policy.filesystem.is_some());
     }
 
+    #[test]
+    fn discover_policy_from_unsafe_yaml_falls_back_to_default() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("policy.yaml");
+        std::fs::write(
+            &path,
+            r#"
+version: 1
+process:
+  run_as_user: root
+  run_as_group: root
+filesystem_policy:
+  include_workdir: true
+  read_only:
+    - /usr
+  read_write:
+    - /tmp
+"#,
+        )
+        .unwrap();
+
+        let policy = discover_policy_from_path(&path);
+        // Falls back to restrictive default because of root user.
+        let proc = policy.process.unwrap();
+        assert_eq!(proc.run_as_user, "sandbox");
+        assert_eq!(proc.run_as_group, "sandbox");
+    }
+
     #[test]
     fn discover_policy_restrictive_default_blocks_network() {
-        // Verify that the restrictive default results in NetworkMode::Block
-        // when converted to the sandbox-local SandboxPolicy type.
+        // In cluster mode we keep proxy mode enabled so `inference.local`
+        // can always be routed through proxy/OPA controls.
         let proto = navigator_policy::restrictive_default_policy();
         let local_policy = SandboxPolicy::try_from(proto).expect("conversion should succeed");
-        assert!(matches!(local_policy.network.mode, NetworkMode::Block));
+        assert!(matches!(local_policy.network.mode, NetworkMode::Proxy));
     }
 }
diff --git a/crates/navigator-sandbox/src/opa.rs b/crates/navigator-sandbox/src/opa.rs
index 94eae6b1..2cd1f942 100644
--- a/crates/navigator-sandbox/src/opa.rs
+++ b/crates/navigator-sandbox/src/opa.rs
@@ -28,13 +28,9 @@ pub struct PolicyDecision {
 /// Network action returned by OPA `network_action` rule.
 ///
 /// - `Allow`: endpoint + binary explicitly matched in a network policy
-/// - `InspectForInference`: no policy match but inference routing is configured —
-///   TLS-terminate and check if the request is an inference call to reroute
-///   through the gateway
-/// - `Deny`: no matching policy and no inference routing configured
+/// - `Deny`: no matching policy
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum NetworkAction {
-    InspectForInference { matched_policy: Option<String> },
     Allow { matched_policy: Option<String> },
     Deny { reason: String },
 }
@@ -216,7 +212,7 @@ impl OpaEngine {
     /// Evaluate a network access request and return a routing action.
     ///
     /// Uses the OPA `network_action` rule which returns one of:
-    /// `"inspect_for_inference"`, `"allow"`, or `"deny"`.
+    /// `"allow"` or `"deny"`.
     pub fn evaluate_network_action(&self, input: &NetworkInput) -> Result<NetworkAction> {
         let ancestor_strs: Vec<String> = input
             .ancestors
@@ -264,7 +260,6 @@ impl OpaEngine {
         };
 
         match action_str.as_str() {
-            "inspect_for_inference" => Ok(NetworkAction::InspectForInference { matched_policy }),
             "allow" => Ok(NetworkAction::Allow { matched_policy }),
             _ => {
                 let reason_val = engine
@@ -548,7 +543,6 @@ fn preprocess_yaml_data(yaml_str: &str) -> Result<String> {
 /// - `data.landlock`
 /// - `data.process`
 /// - `data.network_policies`
-/// - `data.inference`
 fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy) -> String {
     let filesystem_policy = proto.filesystem.as_ref().map_or_else(
         || {
@@ -647,17 +641,11 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy) -> String {
         })
         .collect();
 
-    let inference = proto.inference.as_ref().map_or_else(
-        || serde_json::json!({"allowed_routes": []}),
-        |inf| serde_json::json!({"allowed_routes": inf.allowed_routes}),
-    );
-
     serde_json::json!({
         "filesystem_policy": filesystem_policy,
         "landlock": landlock,
         "process": process,
         "network_policies": network_policies,
-        "inference": inference,
     })
     .to_string()
 }
@@ -732,7 +720,6 @@ mod tests {
                 run_as_group: "sandbox".to_string(),
             }),
             network_policies,
-            inference: None,
         }
     }
 
@@ -1524,7 +1511,7 @@ process:
     }
 
     // ========================================================================
-    // network_action (inference routing) tests
+    // network_action tests
     // ========================================================================
 
     const INFERENCE_TEST_DATA: &str = r#"
@@ -1541,9 +1528,6 @@ network_policies:
       - { host: gitlab.com, port: 443 }
     binaries:
       - { path: /usr/bin/glab }
-inference:
-  allowed_routes:
-    - local
 filesystem_policy:
   include_workdir: true
   read_only: []
@@ -1563,8 +1547,6 @@ network_policies:
       - { host: gitlab.com, port: 443 }
     binaries:
       - { path: /usr/bin/glab }
-inference:
-  allowed_routes: []
 filesystem_policy:
   include_workdir: true
   read_only: []
@@ -1607,7 +1589,7 @@ process:
     }
 
     #[test]
-    fn unknown_endpoint_with_inference_returns_inspect() {
+    fn unknown_endpoint_returns_deny() {
         let engine = inference_engine();
         let input = NetworkInput {
             host: "api.openai.com".into(),
@@ -1618,12 +1600,10 @@ process:
             cmdline_paths: vec![],
         };
         let action = engine.evaluate_network_action(&input).unwrap();
-        assert_eq!(
-            action,
-            NetworkAction::InspectForInference {
-                matched_policy: None
-            },
-        );
+        match &action {
+            NetworkAction::Deny { .. } => {}
+            other => panic!("Expected Deny, got: {other:?}"),
+        }
     }
 
     #[test]
@@ -1645,9 +1625,9 @@ process:
     }
 
     #[test]
-    fn endpoint_in_policy_binary_not_allowed_with_inference_returns_inspect() {
+    fn endpoint_in_policy_binary_not_allowed_returns_deny() {
         // api.anthropic.com is declared but python3 is not in the binary list.
-        // With inference configured, this falls through to inference interception.
+        // With binary allow/deny, this is denied.
         let engine = inference_engine();
         let input = NetworkInput {
             host: "api.anthropic.com".into(),
@@ -1658,12 +1638,10 @@ process:
             cmdline_paths: vec![],
         };
         let action = engine.evaluate_network_action(&input).unwrap();
-        assert_eq!(
-            action,
-            NetworkAction::InspectForInference {
-                matched_policy: None
-            },
-        );
+        match &action {
+            NetworkAction::Deny { .. } => {}
+            other => panic!("Expected Deny, got: {other:?}"),
+        }
     }
 
     #[test]
@@ -1706,8 +1684,7 @@ process:
     }
 
     #[test]
-    fn from_proto_unknown_endpoint_no_inference_returns_deny() {
-        // test_proto() has inference: None → defaults to empty allowed_routes
+    fn from_proto_unknown_endpoint_returns_deny() {
         let proto = test_proto();
         let engine = OpaEngine::from_proto(&proto).expect("engine from proto");
         let input = NetworkInput {
@@ -1725,32 +1702,6 @@ process:
         }
     }
 
-    #[test]
-    fn from_proto_with_inference_unknown_endpoint_returns_inspect() {
-        use navigator_core::proto::InferencePolicy;
-        let mut proto = test_proto();
-        proto.inference = Some(InferencePolicy {
-            allowed_routes: vec!["local".to_string()],
-            ..Default::default()
-        });
-        let engine = OpaEngine::from_proto(&proto).expect("engine from proto");
-        let input = NetworkInput {
-            host: "api.openai.com".into(),
-            port: 443,
-            binary_path: PathBuf::from("/usr/bin/python3"),
-            binary_sha256: "unused".into(),
-            ancestors: vec![],
-            cmdline_paths: vec![],
-        };
-        let action = engine.evaluate_network_action(&input).unwrap();
-        assert_eq!(
-            action,
-            NetworkAction::InspectForInference {
-                matched_policy: None
-            },
-        );
-    }
-
     #[test]
     fn network_action_with_dev_policy() {
         let engine = test_engine();
@@ -1971,7 +1922,6 @@ process:
                 run_as_group: "".to_string(),
             }),
             network_policies,
-            inference: None,
         };
         let engine = OpaEngine::from_proto(&proto).expect("Failed to create engine from proto");
 
diff --git a/crates/navigator-sandbox/src/policy.rs b/crates/navigator-sandbox/src/policy.rs
index 5a2c2306..8f27f46d 100644
--- a/crates/navigator-sandbox/src/policy.rs
+++ b/crates/navigator-sandbox/src/policy.rs
@@ -99,21 +99,11 @@ impl TryFrom<ProtoSandboxPolicy> for SandboxPolicy {
     type Error = miette::Report;
 
     fn try_from(proto: ProtoSandboxPolicy) -> Result<Self, Self::Error> {
-        // Derive network mode: use proxy mode when network policies exist
-        // OR inference routing is configured (inference needs the proxy to
-        // intercept and reroute connections). Otherwise block all network.
-        let has_network_policies = !proto.network_policies.is_empty();
-        let has_inference = proto
-            .inference
-            .as_ref()
-            .is_some_and(|inf| !inf.allowed_routes.is_empty());
-        let network = if has_network_policies || has_inference {
-            NetworkPolicy {
-                mode: NetworkMode::Proxy,
-                proxy: Some(ProxyPolicy { http_addr: None }),
-            }
-        } else {
-            NetworkPolicy::default()
+        // In cluster mode we always run with proxy networking so all egress
+        // can be evaluated by OPA and `inference.local` is always addressable.
+        let network = NetworkPolicy {
+            mode: NetworkMode::Proxy,
+            proxy: Some(ProxyPolicy { http_addr: None }),
         };
 
         Ok(Self {
@@ -132,8 +122,16 @@ impl TryFrom<ProtoSandboxPolicy> for SandboxPolicy {
 impl From<ProtoFilesystemPolicy> for FilesystemPolicy {
     fn from(proto: ProtoFilesystemPolicy) -> Self {
         Self {
-            read_only: proto.read_only.into_iter().map(PathBuf::from).collect(),
-            read_write: proto.read_write.into_iter().map(PathBuf::from).collect(),
+            read_only: proto
+                .read_only
+                .into_iter()
+                .map(|p| PathBuf::from(navigator_policy::normalize_path(&p)))
+                .collect(),
+            read_write: proto
+                .read_write
+                .into_iter()
+                .map(|p| PathBuf::from(navigator_policy::normalize_path(&p)))
+                .collect(),
             include_workdir: proto.include_workdir,
         }
     }
diff --git a/crates/navigator-sandbox/src/process.rs b/crates/navigator-sandbox/src/process.rs
index a38b53b1..74469b83 100644
--- a/crates/navigator-sandbox/src/process.rs
+++ b/crates/navigator-sandbox/src/process.rs
@@ -408,8 +408,39 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> {
 
     nix::unistd::setgid(group.gid).into_diagnostic()?;
 
+    // Verify effective GID actually changed (defense-in-depth, CWE-250 / CERT POS37-C)
+    let effective_gid = nix::unistd::getegid();
+    if effective_gid != group.gid {
+        return Err(miette::miette!(
+            "Privilege drop verification failed: expected effective GID {}, got {}",
+            group.gid,
+            effective_gid
+        ));
+    }
+
     if user_name.is_some() {
         nix::unistd::setuid(user.uid).into_diagnostic()?;
+
+        // Verify effective UID actually changed (defense-in-depth, CWE-250 / CERT POS37-C)
+        let effective_uid = nix::unistd::geteuid();
+        if effective_uid != user.uid {
+            return Err(miette::miette!(
+                "Privilege drop verification failed: expected effective UID {}, got {}",
+                user.uid,
+                effective_uid
+            ));
+        }
+
+        // Verify root cannot be re-acquired (CERT POS37-C hardening).
+        // If we dropped from root, setuid(0) must fail; success means privileges
+        // were not fully relinquished.
+        if nix::unistd::setuid(nix::unistd::Uid::from_raw(0)).is_ok() && user.uid.as_raw() != 0 {
+            return Err(miette::miette!(
+                "Privilege drop verification failed: process can still re-acquire root (UID 0) \
+                 after switching to UID {}",
+                user.uid
+            ));
+        }
     }
 
     Ok(())
@@ -464,3 +495,92 @@ impl From<std::process::ExitStatus> for ProcessStatus {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::policy::{
+        FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, SandboxPolicy,
+    };
+
+    /// Helper to create a minimal `SandboxPolicy` with the given process policy.
+    fn policy_with_process(process: ProcessPolicy) -> SandboxPolicy {
+        SandboxPolicy {
+            version: 1,
+            filesystem: FilesystemPolicy::default(),
+            network: NetworkPolicy::default(),
+            landlock: LandlockPolicy::default(),
+            process,
+        }
+    }
+
+    #[test]
+    fn drop_privileges_noop_when_no_user_or_group() {
+        let policy = policy_with_process(ProcessPolicy {
+            run_as_user: None,
+            run_as_group: None,
+        });
+        assert!(drop_privileges(&policy).is_ok());
+    }
+
+    #[test]
+    fn drop_privileges_noop_when_empty_strings() {
+        let policy = policy_with_process(ProcessPolicy {
+            run_as_user: Some(String::new()),
+            run_as_group: Some(String::new()),
+        });
+        assert!(drop_privileges(&policy).is_ok());
+    }
+
+    #[test]
+    fn drop_privileges_succeeds_for_current_user() {
+        // Resolve the current user's name so we can ask drop_privileges to
+        // "switch" to the user we're already running as.  This exercises the
+        // full verification path (getegid/geteuid checks) without needing root.
+        let current_user = User::from_uid(nix::unistd::geteuid())
+            .expect("getpwuid")
+            .expect("current user entry");
+        let current_group = Group::from_gid(nix::unistd::getegid())
+            .expect("getgrgid")
+            .expect("current group entry");
+
+        let policy = policy_with_process(ProcessPolicy {
+            run_as_user: Some(current_user.name),
+            run_as_group: Some(current_group.name),
+        });
+
+        assert!(drop_privileges(&policy).is_ok());
+    }
+
+    #[test]
+    fn drop_privileges_fails_for_nonexistent_user() {
+        let policy = policy_with_process(ProcessPolicy {
+            run_as_user: Some("__nonexistent_test_user_42__".to_string()),
+            run_as_group: None,
+        });
+
+        let result = drop_privileges(&policy);
+        assert!(result.is_err());
+        let msg = format!("{}", result.unwrap_err());
+        assert!(
+            msg.contains("not found"),
+            "expected 'not found' in error: {msg}"
+        );
+    }
+
+    #[test]
+    fn drop_privileges_fails_for_nonexistent_group() {
+        let policy = policy_with_process(ProcessPolicy {
+            run_as_user: None,
+            run_as_group: Some("__nonexistent_test_group_42__".to_string()),
+        });
+
+        let result = drop_privileges(&policy);
+        assert!(result.is_err());
+        let msg = format!("{}", result.unwrap_err());
+        assert!(
+            msg.contains("not found"),
+            "expected 'not found' in error: {msg}"
+        );
+    }
+}
diff --git a/crates/navigator-sandbox/src/proxy.rs b/crates/navigator-sandbox/src/proxy.rs
index b57bcd61..ea562db9 100644
--- a/crates/navigator-sandbox/src/proxy.rs
+++ b/crates/navigator-sandbox/src/proxy.rs
@@ -18,6 +18,7 @@ use tokio::task::JoinHandle;
 use tracing::{debug, info, warn};
 
 const MAX_HEADER_BYTES: usize = 8192;
+const INFERENCE_LOCAL_HOST: &str = "inference.local";
 
 /// Result of a proxy CONNECT policy decision.
 struct ConnectDecision {
@@ -197,19 +198,38 @@ async fn handle_tcp_connection(
     let target = parts.next().unwrap_or("");
 
     if method != "CONNECT" {
-        let target_host = extract_host_from_uri(target);
-        info!(
-            method = %method,
-            target_host = %target_host,
-            "Non-CONNECT proxy request denied"
-        );
-        respond(&mut client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
-        return Ok(());
+        return handle_forward_proxy(
+            method,
+            target,
+            &buf[..],
+            used,
+            &mut client,
+            opa_engine,
+            identity_cache,
+            entrypoint_pid,
+        )
+        .await;
     }
 
     let (host, port) = parse_target(target)?;
     let host_lc = host.to_ascii_lowercase();
 
+    if host_lc == INFERENCE_LOCAL_HOST {
+        respond(&mut client, b"HTTP/1.1 200 Connection Established\r\n\r\n").await?;
+        let outcome = handle_inference_interception(
+            client,
+            INFERENCE_LOCAL_HOST,
+            port,
+            tls_state.as_ref(),
+            inference_ctx.as_ref(),
+        )
+        .await?;
+        if let InferenceOutcome::Denied { reason } = outcome {
+            info!(action = "deny", reason = %reason, host = INFERENCE_LOCAL_HOST, "Inference interception denied");
+        }
+        return Ok(());
+    }
+
     let peer_addr = client.peer_addr().into_diagnostic()?;
     let local_addr = client.local_addr().into_diagnostic()?;
 
@@ -225,11 +245,6 @@ async fn handle_tcp_connection(
 
     // Extract action string and matched policy for logging
     let (action_str, matched_policy, deny_reason) = match &decision.action {
-        NetworkAction::InspectForInference { matched_policy } => (
-            "inspect_for_inference",
-            matched_policy.clone(),
-            String::new(),
-        ),
         NetworkAction::Allow { matched_policy } => ("allow", matched_policy.clone(), String::new()),
         NetworkAction::Deny { reason } => ("deny", None, reason.clone()),
     };
@@ -286,50 +301,6 @@ async fn handle_tcp_connection(
         return Ok(());
     }
 
-    // InspectForInference: intercept the connection, don't connect upstream.
-    // TLS-terminate the client side, parse HTTP requests, and reroute inference
-    // calls through the gateway's ProxyInference gRPC endpoint.
-    if matches!(decision.action, NetworkAction::InspectForInference { .. }) {
-        respond(&mut client, b"HTTP/1.1 200 Connection Established\r\n\r\n").await?;
-        let outcome = handle_inference_interception(
-            client,
-            &host_lc,
-            port,
-            tls_state.as_ref(),
-            inference_ctx.as_ref(),
-        )
-        .await;
-
-        let deny_reason = match &outcome {
-            Ok(InferenceOutcome::Routed) => None,
-            Ok(InferenceOutcome::Denied { reason }) => Some(reason.clone()),
-            Err(e) => Some(format!("{e}")),
-        };
-
-        if let Some(reason) = deny_reason {
-            info!(
-                src_addr = %peer_addr.ip(),
-                src_port = peer_addr.port(),
-                proxy_addr = %local_addr,
-                dst_host = %host_lc,
-                dst_port = port,
-                binary = %binary_str,
-                binary_pid = %pid_str,
-                ancestors = %ancestors_str,
-                cmdline = %cmdline_str,
-                action = "deny",
-                engine = "opa",
-                policy = %policy_str,
-                reason = %reason,
-                "CONNECT",
-            );
-        }
-
-        // Propagate errors; Ok(Routed|Denied) are both terminal
-        outcome?;
-        return Ok(());
-    }
-
     // Query allowed_ips from the matched endpoint config (if any).
     // When present, the SSRF check validates resolved IPs against this
     // allowlist instead of blanket-blocking all private IPs.
@@ -688,7 +659,7 @@ async fn handle_inference_interception(
 
     let Some(ctx) = inference_ctx else {
         return Ok(InferenceOutcome::Denied {
-            reason: "connection not allowed by policy".to_string(),
+            reason: "cluster inference context not configured".to_string(),
         });
     };
 
@@ -786,10 +757,14 @@ async fn route_inference_request(
 ) -> Result<bool> {
     use crate::l7::inference::{detect_inference_pattern, format_http_response};
 
-    if let Some(pattern) = detect_inference_pattern(&request.method, &request.path, &ctx.patterns) {
+    let normalized_path = normalize_inference_path(&request.path);
+
+    if let Some(pattern) =
+        detect_inference_pattern(&request.method, &normalized_path, &ctx.patterns)
+    {
         info!(
             method = %request.method,
-            path = %request.path,
+            path = %normalized_path,
             protocol = %pattern.protocol,
             kind = %pattern.kind,
             "Intercepted inference request, routing locally"
@@ -801,7 +776,10 @@ async fn route_inference_request(
         let routes = ctx.routes.read().await;
 
         if routes.is_empty() {
-            let body = serde_json::json!({"error": "inference endpoint detected without matching inference route"});
+            let body = serde_json::json!({
+                "error": "cluster inference is not configured",
+                "hint": "run: nemoclaw cluster inference set --help"
+            });
             let body_bytes = body.to_string();
             let response = format_http_response(
                 503,
@@ -817,7 +795,7 @@ async fn route_inference_request(
             .proxy_with_candidates(
                 &pattern.protocol,
                 &request.method,
-                &request.path,
+                &normalized_path,
                 filtered_headers,
                 bytes::Bytes::from(request.body.clone()),
                 &routes,
@@ -848,7 +826,7 @@ async fn route_inference_request(
         // Not an inference request — deny
         info!(
             method = %request.method,
-            path = %request.path,
+            path = %normalized_path,
             "connection not allowed by policy"
         );
         let body = serde_json::json!({"error": "connection not allowed by policy"});
@@ -866,10 +844,9 @@ async fn route_inference_request(
 fn router_error_to_http(err: &navigator_router::RouterError) -> (u16, String) {
     use navigator_router::RouterError;
     match err {
-        RouterError::RouteNotFound(hint) => (
-            400,
-            format!("no route configured for routing_hint '{hint}'"),
-        ),
+        RouterError::RouteNotFound(hint) => {
+            (400, format!("no route configured for route '{hint}'"))
+        }
         RouterError::NoCompatibleRoute(protocol) => (
             400,
             format!("no compatible route for source protocol '{protocol}'"),
@@ -941,7 +918,7 @@ fn query_l7_config(
     host: &str,
     port: u16,
 ) -> Option<crate::l7::L7EndpointConfig> {
-    // Only query if action is Allow (not Deny or InspectForInference)
+    // Only query if action is Allow (not Deny)
     let has_policy = match &decision.action {
         NetworkAction::Allow { matched_policy } => matched_policy.is_some(),
         _ => false,
@@ -1165,11 +1142,23 @@ fn query_allowed_ips(
     }
 }
 
+fn normalize_inference_path(path: &str) -> String {
+    if let Some(scheme_idx) = path.find("://") {
+        let after_scheme = &path[scheme_idx + 3..];
+        if let Some(path_start) = after_scheme.find('/') {
+            return after_scheme[path_start..].to_string();
+        }
+        return "/".to_string();
+    }
+    path.to_string()
+}
+
 /// Extract the hostname from an absolute-form URI used in plain HTTP proxy requests.
 ///
 /// For example, `"http://example.com/path"` yields `"example.com"` and
 /// `"http://example.com:8080/path"` yields `"example.com"`. Returns `"unknown"`
 /// if the URI cannot be parsed.
+#[cfg(test)]
 fn extract_host_from_uri(uri: &str) -> String {
     // Absolute-form URIs look like "http://host[:port]/path"
     // Strip the scheme prefix, then extract the authority (host[:port]) before the first '/'.
@@ -1192,6 +1181,405 @@ fn extract_host_from_uri(uri: &str) -> String {
     }
 }
 
+/// Parse an absolute-form proxy request URI into its components.
+///
+/// For example, `"http://10.86.8.223:8000/screenshot/"` yields
+/// `("http", "10.86.8.223", 8000, "/screenshot/")`.
+///
+/// Handles:
+/// - Default port 80 for `http`, 443 for `https`
+/// - IPv6 bracket notation (`[::1]`)
+/// - Missing path (defaults to `/`)
+/// - Query strings (preserved in path)
+fn parse_proxy_uri(uri: &str) -> Result<(String, String, u16, String)> {
+    // Extract scheme
+    let (scheme, rest) = uri
+        .split_once("://")
+        .ok_or_else(|| miette::miette!("Missing scheme in proxy URI: {uri}"))?;
+    let scheme = scheme.to_ascii_lowercase();
+
+    // Split authority from path
+    let (authority, path) = if rest.starts_with('[') {
+        // IPv6: [::1]:port/path
+        let bracket_end = rest
+            .find(']')
+            .ok_or_else(|| miette::miette!("Unclosed IPv6 bracket in URI: {uri}"))?;
+        let after_bracket = &rest[bracket_end + 1..];
+        if let Some(slash_pos) = after_bracket.find('/') {
+            (
+                &rest[..bracket_end + 1 + slash_pos],
+                &after_bracket[slash_pos..],
+            )
+        } else {
+            (&rest[..], "/")
+        }
+    } else if let Some(slash_pos) = rest.find('/') {
+        (&rest[..slash_pos], &rest[slash_pos..])
+    } else {
+        (rest, "/")
+    };
+
+    // Parse host and port from authority
+    let (host, port) = if authority.starts_with('[') {
+        // IPv6: [::1]:port or [::1]
+        let bracket_end = authority
+            .find(']')
+            .ok_or_else(|| miette::miette!("Unclosed IPv6 bracket: {uri}"))?;
+        let host = &authority[1..bracket_end]; // strip brackets
+        let port_str = &authority[bracket_end + 1..];
+        let port = if let Some(port_str) = port_str.strip_prefix(':') {
+            port_str
+                .parse::<u16>()
+                .map_err(|_| miette::miette!("Invalid port in URI: {uri}"))?
+        } else {
+            match scheme.as_str() {
+                "https" => 443,
+                _ => 80,
+            }
+        };
+        (host.to_string(), port)
+    } else if let Some((h, p)) = authority.rsplit_once(':') {
+        let port = p
+            .parse::<u16>()
+            .map_err(|_| miette::miette!("Invalid port in URI: {uri}"))?;
+        (h.to_string(), port)
+    } else {
+        let port = match scheme.as_str() {
+            "https" => 443,
+            _ => 80,
+        };
+        (authority.to_string(), port)
+    };
+
+    if host.is_empty() {
+        return Err(miette::miette!("Empty host in URI: {uri}"));
+    }
+
+    let path = if path.is_empty() { "/" } else { path };
+
+    Ok((scheme, host, port, path.to_string()))
+}
+
+/// Rewrite an absolute-form HTTP proxy request to origin-form for upstream.
+///
+/// Transforms `GET http://host:port/path HTTP/1.1` into `GET /path HTTP/1.1`,
+/// strips proxy hop-by-hop headers, injects `Connection: close` and `Via`.
+///
+/// Returns the rewritten request bytes (headers + any overflow body bytes).
+fn rewrite_forward_request(raw: &[u8], used: usize, path: &str) -> Vec<u8> {
+    let header_end = raw[..used]
+        .windows(4)
+        .position(|w| w == b"\r\n\r\n")
+        .map_or(used, |p| p + 4);
+
+    let header_str = String::from_utf8_lossy(&raw[..header_end]);
+    let mut lines = header_str.split("\r\n").collect::<Vec<_>>();
+
+    // Rewrite request line: METHOD absolute-uri HTTP/1.1 → METHOD path HTTP/1.1
+    if let Some(first_line) = lines.first_mut() {
+        let parts: Vec<&str> = first_line.splitn(3, ' ').collect();
+        if parts.len() == 3 {
+            let new_line = format!("{} {} {}", parts[0], path, parts[2]);
+            *first_line = Box::leak(new_line.into_boxed_str()); // safe: short-lived
+        }
+    }
+
+    // Rebuild headers, stripping hop-by-hop and adding proxy headers
+    let mut output = Vec::with_capacity(header_end + 128);
+    let mut has_connection = false;
+    let mut has_via = false;
+
+    for (i, line) in lines.iter().enumerate() {
+        if i == 0 {
+            // Request line — already rewritten
+            output.extend_from_slice(line.as_bytes());
+            output.extend_from_slice(b"\r\n");
+            continue;
+        }
+        if line.is_empty() {
+            // End of headers
+            break;
+        }
+
+        let lower = line.to_ascii_lowercase();
+
+        // Strip proxy hop-by-hop headers
+        if lower.starts_with("proxy-connection:")
+            || lower.starts_with("proxy-authorization:")
+            || lower.starts_with("proxy-authenticate:")
+        {
+            continue;
+        }
+
+        // Replace Connection header
+        if lower.starts_with("connection:") {
+            has_connection = true;
+            output.extend_from_slice(b"Connection: close\r\n");
+            continue;
+        }
+
+        // Pass through other headers
+        output.extend_from_slice(line.as_bytes());
+        output.extend_from_slice(b"\r\n");
+
+        if lower.starts_with("via:") {
+            has_via = true;
+        }
+    }
+
+    // Inject missing headers
+    if !has_connection {
+        output.extend_from_slice(b"Connection: close\r\n");
+    }
+    if !has_via {
+        output.extend_from_slice(b"Via: 1.1 navigator-sandbox\r\n");
+    }
+
+    // End of headers
+    output.extend_from_slice(b"\r\n");
+
+    // Append any overflow body bytes from the original buffer
+    if header_end < used {
+        output.extend_from_slice(&raw[header_end..used]);
+    }
+
+    output
+}
+
+/// Handle a plain HTTP forward proxy request (non-CONNECT).
+///
+/// Restricted to private IP endpoints with explicit `allowed_ips` policy.
+/// Rewrites the absolute-form request to origin-form, connects upstream,
+/// and relays the response using `copy_bidirectional` for streaming support.
+async fn handle_forward_proxy(
+    method: &str,
+    target_uri: &str,
+    buf: &[u8],
+    used: usize,
+    client: &mut TcpStream,
+    opa_engine: Arc<OpaEngine>,
+    identity_cache: Arc<BinaryIdentityCache>,
+    entrypoint_pid: Arc<AtomicU32>,
+) -> Result<()> {
+    // 1. Parse the absolute-form URI
+    let (scheme, host, port, path) = match parse_proxy_uri(target_uri) {
+        Ok(parsed) => parsed,
+        Err(e) => {
+            warn!(target_uri = %target_uri, error = %e, "FORWARD parse error");
+            respond(client, b"HTTP/1.1 400 Bad Request\r\n\r\n").await?;
+            return Ok(());
+        }
+    };
+    let host_lc = host.to_ascii_lowercase();
+
+    // 2. Reject HTTPS — must use CONNECT for TLS
+    if scheme == "https" {
+        info!(
+            dst_host = %host_lc,
+            dst_port = port,
+            "FORWARD rejected: HTTPS requires CONNECT"
+        );
+        respond(
+            client,
+            b"HTTP/1.1 400 Bad Request\r\nContent-Length: 27\r\n\r\nUse CONNECT for HTTPS URLs",
+        )
+        .await?;
+        return Ok(());
+    }
+
+    // 3. Evaluate OPA policy (same identity binding as CONNECT)
+    let peer_addr = client.peer_addr().into_diagnostic()?;
+    let local_addr = client.local_addr().into_diagnostic()?;
+
+    let decision = evaluate_opa_tcp(
+        peer_addr,
+        &opa_engine,
+        &identity_cache,
+        &entrypoint_pid,
+        &host_lc,
+        port,
+    );
+
+    // Build log context
+    let binary_str = decision
+        .binary
+        .as_ref()
+        .map_or_else(|| "-".to_string(), |p| p.display().to_string());
+    let pid_str = decision
+        .binary_pid
+        .map_or_else(|| "-".to_string(), |p| p.to_string());
+    let ancestors_str = if decision.ancestors.is_empty() {
+        "-".to_string()
+    } else {
+        decision
+            .ancestors
+            .iter()
+            .map(|p| p.display().to_string())
+            .collect::<Vec<_>>()
+            .join(" -> ")
+    };
+    let cmdline_str = if decision.cmdline_paths.is_empty() {
+        "-".to_string()
+    } else {
+        decision
+            .cmdline_paths
+            .iter()
+            .map(|p| p.display().to_string())
+            .collect::<Vec<_>>()
+            .join(", ")
+    };
+
+    // 4. Only proceed on explicit Allow — reject Deny
+    let matched_policy = match &decision.action {
+        NetworkAction::Allow { matched_policy } => matched_policy.clone(),
+        NetworkAction::Deny { reason } => {
+            info!(
+                src_addr = %peer_addr.ip(),
+                src_port = peer_addr.port(),
+                proxy_addr = %local_addr,
+                dst_host = %host_lc,
+                dst_port = port,
+                method = %method,
+                path = %path,
+                binary = %binary_str,
+                binary_pid = %pid_str,
+                ancestors = %ancestors_str,
+                cmdline = %cmdline_str,
+                action = "deny",
+                engine = "opa",
+                policy = "-",
+                reason = %reason,
+                "FORWARD",
+            );
+            respond(client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
+            return Ok(());
+        }
+    };
+    let policy_str = matched_policy.as_deref().unwrap_or("-");
+
+    // 5. Require allowed_ips (forward proxy only works with explicit SSRF override)
+    let raw_allowed_ips = query_allowed_ips(&opa_engine, &decision, &host_lc, port);
+    if raw_allowed_ips.is_empty() {
+        info!(
+            src_addr = %peer_addr.ip(),
+            src_port = peer_addr.port(),
+            proxy_addr = %local_addr,
+            dst_host = %host_lc,
+            dst_port = port,
+            method = %method,
+            path = %path,
+            binary = %binary_str,
+            binary_pid = %pid_str,
+            ancestors = %ancestors_str,
+            cmdline = %cmdline_str,
+            action = "deny",
+            engine = "opa",
+            policy = %policy_str,
+            reason = "forward proxy requires allowed_ips on endpoint",
+            "FORWARD",
+        );
+        respond(client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
+        return Ok(());
+    }
+
+    // 6. Resolve DNS and validate against allowed_ips
+    let addrs = match parse_allowed_ips(&raw_allowed_ips) {
+        Ok(nets) => match resolve_and_check_allowed_ips(&host, port, &nets).await {
+            Ok(addrs) => addrs,
+            Err(reason) => {
+                warn!(
+                    dst_host = %host_lc,
+                    dst_port = port,
+                    reason = %reason,
+                    "FORWARD blocked: allowed_ips check failed"
+                );
+                respond(client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
+                return Ok(());
+            }
+        },
+        Err(reason) => {
+            warn!(
+                dst_host = %host_lc,
+                dst_port = port,
+                reason = %reason,
+                "FORWARD blocked: invalid allowed_ips in policy"
+            );
+            respond(client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
+            return Ok(());
+        }
+    };
+
+    // 7. Private-IP gate: forward proxy only to RFC 1918 private addresses
+    if !addrs.iter().all(|a| is_internal_ip(a.ip())) {
+        info!(
+            src_addr = %peer_addr.ip(),
+            src_port = peer_addr.port(),
+            proxy_addr = %local_addr,
+            dst_host = %host_lc,
+            dst_port = port,
+            method = %method,
+            path = %path,
+            binary = %binary_str,
+            binary_pid = %pid_str,
+            ancestors = %ancestors_str,
+            cmdline = %cmdline_str,
+            action = "deny",
+            engine = "opa",
+            policy = %policy_str,
+            reason = "forward proxy restricted to private IP endpoints",
+            "FORWARD",
+        );
+        respond(client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
+        return Ok(());
+    }
+
+    // 8. Connect upstream
+    let mut upstream = match TcpStream::connect(addrs.as_slice()).await {
+        Ok(s) => s,
+        Err(e) => {
+            warn!(
+                dst_host = %host_lc,
+                dst_port = port,
+                error = %e,
+                "FORWARD upstream connect failed"
+            );
+            respond(client, b"HTTP/1.1 502 Bad Gateway\r\n\r\n").await?;
+            return Ok(());
+        }
+    };
+
+    // Log success
+    info!(
+        src_addr = %peer_addr.ip(),
+        src_port = peer_addr.port(),
+        proxy_addr = %local_addr,
+        dst_host = %host_lc,
+        dst_port = port,
+        method = %method,
+        path = %path,
+        binary = %binary_str,
+        binary_pid = %pid_str,
+        ancestors = %ancestors_str,
+        cmdline = %cmdline_str,
+        action = "allow",
+        engine = "opa",
+        policy = %policy_str,
+        reason = "",
+        "FORWARD",
+    );
+
+    // 9. Rewrite request and forward to upstream
+    let rewritten = rewrite_forward_request(buf, used, &path);
+    upstream.write_all(&rewritten).await.into_diagnostic()?;
+
+    // 10. Relay remaining traffic bidirectionally (supports streaming)
+    let _ = tokio::io::copy_bidirectional(client, &mut upstream)
+        .await
+        .into_diagnostic()?;
+
+    Ok(())
+}
+
 fn parse_target(target: &str) -> Result<(String, u16)> {
     let (host, port_str) = target
         .split_once(':')
@@ -1712,4 +2100,133 @@ mod tests {
         let result = extract_host_from_uri("not-a-uri");
         assert!(!result.is_empty());
     }
+
+    // --- parse_proxy_uri tests ---
+
+    #[test]
+    fn test_parse_proxy_uri_standard() {
+        let (scheme, host, port, path) =
+            parse_proxy_uri("http://10.86.8.223:8000/screenshot/").unwrap();
+        assert_eq!(scheme, "http");
+        assert_eq!(host, "10.86.8.223");
+        assert_eq!(port, 8000);
+        assert_eq!(path, "/screenshot/");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_default_port() {
+        let (scheme, host, port, path) = parse_proxy_uri("http://example.com/path").unwrap();
+        assert_eq!(scheme, "http");
+        assert_eq!(host, "example.com");
+        assert_eq!(port, 80);
+        assert_eq!(path, "/path");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_https_default_port() {
+        let (scheme, host, port, path) =
+            parse_proxy_uri("https://api.example.com/v1/chat").unwrap();
+        assert_eq!(scheme, "https");
+        assert_eq!(host, "api.example.com");
+        assert_eq!(port, 443);
+        assert_eq!(path, "/v1/chat");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_missing_path() {
+        let (_, host, port, path) = parse_proxy_uri("http://10.0.0.1:9090").unwrap();
+        assert_eq!(host, "10.0.0.1");
+        assert_eq!(port, 9090);
+        assert_eq!(path, "/");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_with_query() {
+        let (_, _, _, path) = parse_proxy_uri("http://host:80/api?key=val&foo=bar").unwrap();
+        assert_eq!(path, "/api?key=val&foo=bar");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_ipv6() {
+        let (_, host, port, path) = parse_proxy_uri("http://[::1]:8080/test").unwrap();
+        assert_eq!(host, "::1");
+        assert_eq!(port, 8080);
+        assert_eq!(path, "/test");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_ipv6_default_port() {
+        let (_, host, port, path) = parse_proxy_uri("http://[fe80::1]/path").unwrap();
+        assert_eq!(host, "fe80::1");
+        assert_eq!(port, 80);
+        assert_eq!(path, "/path");
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_missing_scheme() {
+        let result = parse_proxy_uri("example.com/path");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_parse_proxy_uri_empty_host() {
+        let result = parse_proxy_uri("http:///path");
+        assert!(result.is_err());
+    }
+
+    // --- rewrite_forward_request tests ---
+
+    #[test]
+    fn test_rewrite_get_request() {
+        let raw =
+            b"GET http://10.0.0.1:8000/api HTTP/1.1\r\nHost: 10.0.0.1:8000\r\nAccept: */*\r\n\r\n";
+        let result = rewrite_forward_request(raw, raw.len(), "/api");
+        let result_str = String::from_utf8_lossy(&result);
+        assert!(result_str.starts_with("GET /api HTTP/1.1\r\n"));
+        assert!(result_str.contains("Host: 10.0.0.1:8000"));
+        assert!(result_str.contains("Connection: close"));
+        assert!(result_str.contains("Via: 1.1 navigator-sandbox"));
+    }
+
+    #[test]
+    fn test_rewrite_strips_proxy_headers() {
+        let raw = b"GET http://host/p HTTP/1.1\r\nHost: host\r\nProxy-Authorization: Basic abc\r\nProxy-Connection: keep-alive\r\nAccept: */*\r\n\r\n";
+        let result = rewrite_forward_request(raw, raw.len(), "/p");
+        let result_str = String::from_utf8_lossy(&result);
+        assert!(
+            !result_str
+                .to_ascii_lowercase()
+                .contains("proxy-authorization")
+        );
+        assert!(!result_str.to_ascii_lowercase().contains("proxy-connection"));
+        assert!(result_str.contains("Accept: */*"));
+    }
+
+    #[test]
+    fn test_rewrite_replaces_connection_header() {
+        let raw = b"GET http://host/p HTTP/1.1\r\nHost: host\r\nConnection: keep-alive\r\n\r\n";
+        let result = rewrite_forward_request(raw, raw.len(), "/p");
+        let result_str = String::from_utf8_lossy(&result);
+        assert!(result_str.contains("Connection: close"));
+        assert!(!result_str.contains("keep-alive"));
+    }
+
+    #[test]
+    fn test_rewrite_preserves_body_overflow() {
+        let raw = b"POST http://host/api HTTP/1.1\r\nHost: host\r\nContent-Length: 13\r\n\r\n{\"key\":\"val\"}";
+        let result = rewrite_forward_request(raw, raw.len(), "/api");
+        let result_str = String::from_utf8_lossy(&result);
+        assert!(result_str.contains("{\"key\":\"val\"}"));
+        assert!(result_str.contains("POST /api HTTP/1.1"));
+    }
+
+    #[test]
+    fn test_rewrite_preserves_existing_via() {
+        let raw = b"GET http://host/p HTTP/1.1\r\nHost: host\r\nVia: 1.0 upstream\r\n\r\n";
+        let result = rewrite_forward_request(raw, raw.len(), "/p");
+        let result_str = String::from_utf8_lossy(&result);
+        assert!(result_str.contains("Via: 1.0 upstream"));
+        // Should not add a second Via header
+        assert!(!result_str.contains("Via: 1.1 navigator-sandbox"));
+    }
 }
diff --git a/crates/navigator-sandbox/src/sandbox/linux/landlock.rs b/crates/navigator-sandbox/src/sandbox/linux/landlock.rs
index d669be74..2b9873b5 100644
--- a/crates/navigator-sandbox/src/sandbox/linux/landlock.rs
+++ b/crates/navigator-sandbox/src/sandbox/linux/landlock.rs
@@ -30,7 +30,7 @@ pub fn apply(policy: &SandboxPolicy, workdir: Option<&str>) -> Result<()> {
     }
 
     let result: Result<()> = (|| {
-        let abi = ABI::V1;
+        let abi = ABI::V2;
         let access_all = AccessFs::from_all(abi);
         let access_read = AccessFs::from_read(abi);
 
diff --git a/crates/navigator-sandbox/src/ssh.rs b/crates/navigator-sandbox/src/ssh.rs
index 99d143d7..ab75eea4 100644
--- a/crates/navigator-sandbox/src/ssh.rs
+++ b/crates/navigator-sandbox/src/ssh.rs
@@ -645,6 +645,12 @@ fn spawn_pty_shell(
         pty.term.as_str()
     };
 
+    // Inherit PATH from the container (set via Dockerfile ENV) so that
+    // sandbox sessions see the same tool layout without hardcoding paths.
+    // Tool-specific env vars (VIRTUAL_ENV, UV_PYTHON_INSTALL_DIR, etc.) are
+    // set in /sandbox/.bashrc by the Dockerfile and sourced via login shell.
+    let path = std::env::var("PATH").unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into());
+
     cmd.env_clear()
         .stdin(stdin)
         .stdout(stdout)
@@ -653,7 +659,7 @@ fn spawn_pty_shell(
         .env("HOME", "/sandbox")
         .env("USER", "sandbox")
         .env("SHELL", "/bin/bash")
-        .env("PATH", "/app/.venv/bin:/usr/local/bin:/usr/bin:/bin")
+        .env("PATH", &path)
         .env("TERM", term);
 
     // Set proxy environment variables so cooperative tools (curl, wget, etc.)
@@ -797,11 +803,16 @@ fn spawn_pipe_exec(
         },
         |command| {
             let mut c = Command::new("/bin/bash");
-            c.arg("-c").arg(command);
+            // Use login shell (-l) so that .profile/.bashrc are sourced and
+            // tool-specific env vars (VIRTUAL_ENV, UV_PYTHON_INSTALL_DIR, etc.)
+            // are available without hardcoding them here.
+            c.arg("-lc").arg(command);
             c
         },
     );
 
+    let path = std::env::var("PATH").unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into());
+
     cmd.env_clear()
         .stdin(std::process::Stdio::piped())
         .stdout(std::process::Stdio::piped())
@@ -810,7 +821,7 @@ fn spawn_pipe_exec(
         .env("HOME", "/sandbox")
         .env("USER", "sandbox")
         .env("SHELL", "/bin/bash")
-        .env("PATH", "/app/.venv/bin:/usr/local/bin:/usr/bin:/bin")
+        .env("PATH", &path)
         .env("TERM", "dumb");
 
     if let Some(ref url) = proxy_url {
diff --git a/crates/navigator-sandbox/testdata/sandbox-policy.yaml b/crates/navigator-sandbox/testdata/sandbox-policy.yaml
index 55ceaad2..6f0011ce 100644
--- a/crates/navigator-sandbox/testdata/sandbox-policy.yaml
+++ b/crates/navigator-sandbox/testdata/sandbox-policy.yaml
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Minimal sandbox policy fixture for OPA engine tests.
-# Covers the network policies, filesystem, process, and inference config
+# Covers the network policies, filesystem, and process config
 # exercised by opa.rs unit tests.
 
 version: 1
@@ -63,7 +63,3 @@ network_policies:
       - { host: gitlab.com, port: 443 }
     binaries:
       - { path: /usr/bin/glab }
-
-inference:
-  allowed_routes:
-    - local
diff --git a/crates/navigator-server/Cargo.toml b/crates/navigator-server/Cargo.toml
index d221f493..d7d185e9 100644
--- a/crates/navigator-server/Cargo.toml
+++ b/crates/navigator-server/Cargo.toml
@@ -16,6 +16,7 @@ path = "src/main.rs"
 
 [dependencies]
 navigator-core = { path = "../navigator-core" }
+navigator-policy = { path = "../navigator-policy" }
 
 # Async runtime
 tokio = { workspace = true }
diff --git a/crates/navigator-server/src/grpc.rs b/crates/navigator-server/src/grpc.rs
index ca8311ad..86094b9c 100644
--- a/crates/navigator-server/src/grpc.rs
+++ b/crates/navigator-server/src/grpc.rs
@@ -42,6 +42,67 @@ use russh::client::AuthResult;
 
 use crate::ServerState;
 
+/// Maximum number of records a single list RPC may return.
+///
+/// Client-provided `limit` values are clamped to this ceiling to prevent
+/// unbounded memory allocation from an excessively large page request.
+pub(crate) const MAX_PAGE_SIZE: u32 = 1000;
+
+// ---------------------------------------------------------------------------
+// Field-level size limits
+//
+// Named constants for easy tuning. Each limit is chosen to be generous
+// enough for legitimate payloads while capping resource-exhaustion vectors.
+// ---------------------------------------------------------------------------
+
+/// Maximum length for a sandbox or provider name (Kubernetes name limit).
+const MAX_NAME_LEN: usize = 253;
+
+/// Maximum number of providers that can be attached to a sandbox.
+const MAX_PROVIDERS: usize = 32;
+
+/// Maximum length for the `log_level` field.
+const MAX_LOG_LEVEL_LEN: usize = 32;
+
+/// Maximum number of entries in `spec.environment`.
+const MAX_ENVIRONMENT_ENTRIES: usize = 128;
+
+/// Maximum length for an environment map key (bytes).
+const MAX_MAP_KEY_LEN: usize = 256;
+
+/// Maximum length for an environment map value (bytes).
+const MAX_MAP_VALUE_LEN: usize = 8192;
+
+/// Maximum length for template string fields (`image`, `runtime_class_name`, `agent_socket`).
+const MAX_TEMPLATE_STRING_LEN: usize = 1024;
+
+/// Maximum number of entries in template map fields (`labels`, `annotations`, `environment`).
+const MAX_TEMPLATE_MAP_ENTRIES: usize = 128;
+
+/// Maximum serialized size (bytes) for template Struct fields (`resources`, `pod_template`,
+/// `volume_claim_templates`).
+const MAX_TEMPLATE_STRUCT_SIZE: usize = 65_536;
+
+/// Maximum serialized size (bytes) for the policy field.
+const MAX_POLICY_SIZE: usize = 262_144;
+
+/// Maximum length for a provider type slug.
+const MAX_PROVIDER_TYPE_LEN: usize = 64;
+
+/// Maximum number of entries in the provider `credentials` map.
+const MAX_PROVIDER_CREDENTIALS_ENTRIES: usize = 32;
+
+/// Maximum number of entries in the provider `config` map.
+const MAX_PROVIDER_CONFIG_ENTRIES: usize = 64;
+
+/// Clamp a client-provided page `limit`.
+///
+/// Returns `default` when `raw` is 0 (the protobuf zero-value convention),
+/// otherwise returns the smaller of `raw` and `max`.
+pub(crate) fn clamp_limit(raw: u32, default: u32, max: u32) -> u32 {
+    if raw == 0 { default } else { raw.min(max) }
+}
+
 /// Navigator gRPC service implementation.
 #[derive(Debug, Clone)]
 pub struct NavigatorService {
@@ -77,6 +138,10 @@ impl Navigator for NavigatorService {
         let spec = request
             .spec
             .ok_or_else(|| Status::invalid_argument("spec is required"))?;
+
+        // Validate field sizes before any I/O (fail fast on oversized payloads).
+        validate_sandbox_spec(&request.name, &spec)?;
+
         // Validate provider names exist (fail fast). Credentials are fetched at
         // runtime by the sandbox supervisor via GetSandboxProviderEnvironment.
         for name in &spec.providers {
@@ -98,6 +163,11 @@ impl Navigator for NavigatorService {
             template.image = self.state.sandbox_client.default_image().to_string();
         }
 
+        // Validate policy safety before persisting.
+        if let Some(ref policy) = spec.policy {
+            validate_policy_safety(policy)?;
+        }
+
         let id = uuid::Uuid::new_v4().to_string();
         let name = if request.name.is_empty() {
             petname::petname(2, "-").unwrap_or_else(generate_name)
@@ -200,7 +270,23 @@ impl Navigator for NavigatorService {
 
         // Spawn producer task.
         tokio::spawn(async move {
-            // Subscribe to all buses BEFORE reading the initial snapshot to avoid
+            // Validate that the sandbox exists BEFORE subscribing to any buses.
+            // This prevents creating bus entries for non-existent sandbox IDs.
+            match state.store.get_message::<Sandbox>(&sandbox_id).await {
+                Ok(Some(_)) => {} // sandbox exists, proceed
+                Ok(None) => {
+                    let _ = tx.send(Err(Status::not_found("sandbox not found"))).await;
+                    return;
+                }
+                Err(e) => {
+                    let _ = tx
+                        .send(Err(Status::internal(format!("fetch sandbox failed: {e}"))))
+                        .await;
+                    return;
+                }
+            }
+
+            // Subscribe to all buses BEFORE reading the snapshot to avoid
             // missing notifications that fire between the snapshot read and subscribe.
             let mut status_rx = if follow_status {
                 Some(state.sandbox_watch_bus.subscribe(&sandbox_id))
@@ -223,7 +309,8 @@ impl Navigator for NavigatorService {
                 None
             };
 
-            // Always start with a snapshot if present.
+            // Re-read the snapshot now that we have subscriptions active
+            // (avoids missing notifications between validate and subscribe).
             match state.store.get_message::<Sandbox>(&sandbox_id).await {
                 Ok(Some(sandbox)) => {
                     state.sandbox_index.update_from_sandbox(&sandbox);
@@ -248,6 +335,7 @@ impl Navigator for NavigatorService {
                     }
                 }
                 Ok(None) => {
+                    // Sandbox was deleted between validate and subscribe — end stream.
                     let _ = tx.send(Err(Status::not_found("sandbox not found"))).await;
                     return;
                 }
@@ -402,11 +490,7 @@ impl Navigator for NavigatorService {
         request: Request<ListSandboxesRequest>,
     ) -> Result<Response<ListSandboxesResponse>, Status> {
         let request = request.into_inner();
-        let limit = if request.limit == 0 {
-            100
-        } else {
-            request.limit
-        };
+        let limit = clamp_limit(request.limit, 100, MAX_PAGE_SIZE);
         let records = self
             .state
             .store
@@ -476,6 +560,11 @@ impl Navigator for NavigatorService {
             warn!(sandbox_id = %id, error = %e, "Failed to clean up store after delete");
         }
 
+        // Clean up bus entries to prevent unbounded memory growth.
+        self.state.tracing_log_bus.remove(&id);
+        self.state.tracing_log_bus.platform_event_bus.remove(&id);
+        self.state.sandbox_watch_bus.remove(&id);
+
         info!(
             sandbox_id = %id,
             sandbox_name = %sandbox.name,
@@ -516,15 +605,9 @@ impl Navigator for NavigatorService {
         request: Request<ListProvidersRequest>,
     ) -> Result<Response<ListProvidersResponse>, Status> {
         let request = request.into_inner();
-        let (limit, offset) = (
-            if request.limit == 0 {
-                100
-            } else {
-                request.limit
-            },
-            request.offset,
-        );
-        let providers = list_provider_records(self.state.store.as_ref(), limit, offset).await?;
+        let limit = clamp_limit(request.limit, 100, MAX_PAGE_SIZE);
+        let providers =
+            list_provider_records(self.state.store.as_ref(), limit, request.offset).await?;
 
         Ok(Response::new(ListProvidersResponse { providers }))
     }
@@ -858,6 +941,9 @@ impl Navigator for NavigatorService {
 
             // Validate network mode hasn't changed (Block ↔ Proxy).
             validate_network_mode_unchanged(baseline_policy, &new_policy)?;
+
+            // Validate policy safety (no root, no path traversal, etc.).
+            validate_policy_safety(&new_policy)?;
         } else {
             // No baseline policy exists (sandbox created without one). The
             // sandbox is syncing a locally-discovered or restrictive-default
@@ -992,7 +1078,7 @@ impl Navigator for NavigatorService {
             .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?
             .ok_or_else(|| Status::not_found("sandbox not found"))?;
 
-        let limit = if req.limit == 0 { 50 } else { req.limit };
+        let limit = clamp_limit(req.limit, 50, MAX_PAGE_SIZE);
         let records = self
             .state
             .store
@@ -1139,6 +1225,7 @@ impl Navigator for NavigatorService {
         request: Request<tonic::Streaming<PushSandboxLogsRequest>>,
     ) -> Result<Response<PushSandboxLogsResponse>, Status> {
         let mut stream = request.into_inner();
+        let mut validated = false;
 
         while let Some(batch) = stream
             .message()
@@ -1149,6 +1236,20 @@ impl Navigator for NavigatorService {
                 continue;
             }
 
+            // Validate sandbox existence once at stream open (first batch).
+            // Subsequent batches trust the validated sandbox_id. If the sandbox
+            // is deleted mid-stream, bus remove() drops the sender and publish
+            // silently discards via `let _ = tx.send(...)`.
+            if !validated {
+                self.state
+                    .store
+                    .get_message::<Sandbox>(&batch.sandbox_id)
+                    .await
+                    .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?
+                    .ok_or_else(|| Status::not_found("sandbox not found"))?;
+                validated = true;
+            }
+
             // Cap lines per batch to prevent abuse.
             for log in batch.logs.into_iter().take(100) {
                 let mut log = log;
@@ -1188,9 +1289,6 @@ fn deterministic_policy_hash(policy: &ProtoSandboxPolicy) -> String {
         hasher.update(key.as_bytes());
         hasher.update(value.encode_to_vec());
     }
-    if let Some(inf) = &policy.inference {
-        hasher.update(inf.encode_to_vec());
-    }
     hex::encode(hasher.finalize())
 }
 
@@ -1226,17 +1324,231 @@ fn level_matches(log_level: &str, min_level: &str) -> bool {
 // Policy helper functions
 // ---------------------------------------------------------------------------
 
+// ---------------------------------------------------------------------------
+// Sandbox spec validation
+// ---------------------------------------------------------------------------
+
+/// Validate field sizes on a `CreateSandboxRequest` before persisting.
+///
+/// Returns `INVALID_ARGUMENT` on the first field that exceeds its limit.
+fn validate_sandbox_spec(
+    name: &str,
+    spec: &navigator_core::proto::SandboxSpec,
+) -> Result<(), Status> {
+    // --- request.name ---
+    if name.len() > MAX_NAME_LEN {
+        return Err(Status::invalid_argument(format!(
+            "name exceeds maximum length ({} > {MAX_NAME_LEN})",
+            name.len()
+        )));
+    }
+
+    // --- spec.providers ---
+    if spec.providers.len() > MAX_PROVIDERS {
+        return Err(Status::invalid_argument(format!(
+            "providers list exceeds maximum ({} > {MAX_PROVIDERS})",
+            spec.providers.len()
+        )));
+    }
+
+    // --- spec.log_level ---
+    if spec.log_level.len() > MAX_LOG_LEVEL_LEN {
+        return Err(Status::invalid_argument(format!(
+            "log_level exceeds maximum length ({} > {MAX_LOG_LEVEL_LEN})",
+            spec.log_level.len()
+        )));
+    }
+
+    // --- spec.environment ---
+    validate_string_map(
+        &spec.environment,
+        MAX_ENVIRONMENT_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "spec.environment",
+    )?;
+
+    // --- spec.template ---
+    if let Some(ref tmpl) = spec.template {
+        validate_sandbox_template(tmpl)?;
+    }
+
+    // --- spec.policy serialized size ---
+    if let Some(ref policy) = spec.policy {
+        let size = policy.encoded_len();
+        if size > MAX_POLICY_SIZE {
+            return Err(Status::invalid_argument(format!(
+                "policy serialized size exceeds maximum ({size} > {MAX_POLICY_SIZE})"
+            )));
+        }
+    }
+
+    Ok(())
+}
+
+/// Validate template-level field sizes.
+fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> {
+    // String fields.
+    for (field, value) in [
+        ("template.image", &tmpl.image),
+        ("template.runtime_class_name", &tmpl.runtime_class_name),
+        ("template.agent_socket", &tmpl.agent_socket),
+    ] {
+        if value.len() > MAX_TEMPLATE_STRING_LEN {
+            return Err(Status::invalid_argument(format!(
+                "{field} exceeds maximum length ({} > {MAX_TEMPLATE_STRING_LEN})",
+                value.len()
+            )));
+        }
+    }
+
+    // Map fields.
+    validate_string_map(
+        &tmpl.labels,
+        MAX_TEMPLATE_MAP_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "template.labels",
+    )?;
+    validate_string_map(
+        &tmpl.annotations,
+        MAX_TEMPLATE_MAP_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "template.annotations",
+    )?;
+    validate_string_map(
+        &tmpl.environment,
+        MAX_TEMPLATE_MAP_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "template.environment",
+    )?;
+
+    // Struct fields (serialized size).
+    if let Some(ref s) = tmpl.resources {
+        let size = s.encoded_len();
+        if size > MAX_TEMPLATE_STRUCT_SIZE {
+            return Err(Status::invalid_argument(format!(
+                "template.resources serialized size exceeds maximum ({size} > {MAX_TEMPLATE_STRUCT_SIZE})"
+            )));
+        }
+    }
+    if let Some(ref s) = tmpl.pod_template {
+        let size = s.encoded_len();
+        if size > MAX_TEMPLATE_STRUCT_SIZE {
+            return Err(Status::invalid_argument(format!(
+                "template.pod_template serialized size exceeds maximum ({size} > {MAX_TEMPLATE_STRUCT_SIZE})"
+            )));
+        }
+    }
+    if let Some(ref s) = tmpl.volume_claim_templates {
+        let size = s.encoded_len();
+        if size > MAX_TEMPLATE_STRUCT_SIZE {
+            return Err(Status::invalid_argument(format!(
+                "template.volume_claim_templates serialized size exceeds maximum ({size} > {MAX_TEMPLATE_STRUCT_SIZE})"
+            )));
+        }
+    }
+
+    Ok(())
+}
+
+/// Validate a `map<string, string>` field: entry count, key length, value length.
+fn validate_string_map(
+    map: &std::collections::HashMap<String, String>,
+    max_entries: usize,
+    max_key_len: usize,
+    max_value_len: usize,
+    field_name: &str,
+) -> Result<(), Status> {
+    if map.len() > max_entries {
+        return Err(Status::invalid_argument(format!(
+            "{field_name} exceeds maximum entries ({} > {max_entries})",
+            map.len()
+        )));
+    }
+    for (key, value) in map {
+        if key.len() > max_key_len {
+            return Err(Status::invalid_argument(format!(
+                "{field_name} key exceeds maximum length ({} > {max_key_len})",
+                key.len()
+            )));
+        }
+        if value.len() > max_value_len {
+            return Err(Status::invalid_argument(format!(
+                "{field_name} value exceeds maximum length ({} > {max_value_len})",
+                value.len()
+            )));
+        }
+    }
+    Ok(())
+}
+
+// ---------------------------------------------------------------------------
+// Provider field validation
+// ---------------------------------------------------------------------------
+
+/// Validate field sizes on a `Provider` before persisting.
+fn validate_provider_fields(provider: &Provider) -> Result<(), Status> {
+    if provider.name.len() > MAX_NAME_LEN {
+        return Err(Status::invalid_argument(format!(
+            "provider.name exceeds maximum length ({} > {MAX_NAME_LEN})",
+            provider.name.len()
+        )));
+    }
+    if provider.r#type.len() > MAX_PROVIDER_TYPE_LEN {
+        return Err(Status::invalid_argument(format!(
+            "provider.type exceeds maximum length ({} > {MAX_PROVIDER_TYPE_LEN})",
+            provider.r#type.len()
+        )));
+    }
+    validate_string_map(
+        &provider.credentials,
+        MAX_PROVIDER_CREDENTIALS_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "provider.credentials",
+    )?;
+    validate_string_map(
+        &provider.config,
+        MAX_PROVIDER_CONFIG_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "provider.config",
+    )?;
+    Ok(())
+}
+
+/// Validate that a policy does not contain unsafe content.
+///
+/// Delegates to [`navigator_policy::validate_sandbox_policy`] and converts
+/// violations into a gRPC `INVALID_ARGUMENT` status.
+fn validate_policy_safety(policy: &ProtoSandboxPolicy) -> Result<(), Status> {
+    if let Err(violations) = navigator_policy::validate_sandbox_policy(policy) {
+        let messages: Vec<String> = violations.iter().map(ToString::to_string).collect();
+        return Err(Status::invalid_argument(format!(
+            "policy contains unsafe content: {}",
+            messages.join("; ")
+        )));
+    }
+    Ok(())
+}
+
 /// Validate that static policy fields (filesystem, landlock, process) haven't changed
 /// from the baseline (version 1) policy.
 fn validate_static_fields_unchanged(
     baseline: &ProtoSandboxPolicy,
     new: &ProtoSandboxPolicy,
 ) -> Result<(), Status> {
-    if baseline.filesystem != new.filesystem {
-        return Err(Status::invalid_argument(
-            "filesystem policy cannot be changed on a live sandbox (applied at startup)",
-        ));
-    }
+    // Filesystem: allow additive changes (new paths can be added, but
+    // existing paths cannot be removed and include_workdir cannot change).
+    // This supports the supervisor's baseline path enrichment at startup.
+    // Note: Landlock is a one-way door — adding paths to the stored policy
+    // has no effect on a running child process; the enriched paths only
+    // take effect on the next restart.
+    validate_filesystem_additive(baseline.filesystem.as_ref(), new.filesystem.as_ref())?;
+
     if baseline.landlock != new.landlock {
         return Err(Status::invalid_argument(
             "landlock policy cannot be changed on a live sandbox (applied at startup)",
@@ -1250,6 +1562,49 @@ fn validate_static_fields_unchanged(
     Ok(())
 }
 
+/// Validate that a filesystem policy update is purely additive: all baseline
+/// paths must still be present, `include_workdir` must not change, but new
+/// paths may be added.
+fn validate_filesystem_additive(
+    baseline: Option<&navigator_core::proto::FilesystemPolicy>,
+    new: Option<&navigator_core::proto::FilesystemPolicy>,
+) -> Result<(), Status> {
+    match (baseline, new) {
+        (Some(base), Some(upd)) => {
+            if base.include_workdir != upd.include_workdir {
+                return Err(Status::invalid_argument(
+                    "filesystem include_workdir cannot be changed on a live sandbox",
+                ));
+            }
+            for path in &base.read_only {
+                if !upd.read_only.contains(path) {
+                    return Err(Status::invalid_argument(format!(
+                        "filesystem read_only path '{path}' cannot be removed on a live sandbox"
+                    )));
+                }
+            }
+            for path in &base.read_write {
+                if !upd.read_write.contains(path) {
+                    return Err(Status::invalid_argument(format!(
+                        "filesystem read_write path '{path}' cannot be removed on a live sandbox"
+                    )));
+                }
+            }
+        }
+        (None, Some(_)) => {
+            // Baseline had no filesystem policy, new one adds it — allowed
+            // (enrichment from empty).
+        }
+        (Some(_), None) => {
+            return Err(Status::invalid_argument(
+                "filesystem policy cannot be removed on a live sandbox",
+            ));
+        }
+        (None, None) => {}
+    }
+    Ok(())
+}
+
 /// Validate that network mode hasn't changed (Block ↔ Proxy).
 /// Adding network_policies when none existed (or removing all) changes the mode.
 fn validate_network_mode_unchanged(
@@ -1791,6 +2146,9 @@ async fn create_provider_record(
         ));
     }
 
+    // Validate field sizes before any I/O.
+    validate_provider_fields(&provider)?;
+
     let existing = store
         .get_message_by_name::<Provider>(&provider.name)
         .await
@@ -1916,11 +2274,16 @@ impl ObjectName for Provider {
 #[cfg(test)]
 mod tests {
     use super::{
+        MAX_ENVIRONMENT_ENTRIES, MAX_LOG_LEVEL_LEN, MAX_MAP_KEY_LEN, MAX_MAP_VALUE_LEN,
+        MAX_NAME_LEN, MAX_PAGE_SIZE, MAX_POLICY_SIZE, MAX_PROVIDER_CONFIG_ENTRIES,
+        MAX_PROVIDER_CREDENTIALS_ENTRIES, MAX_PROVIDER_TYPE_LEN, MAX_PROVIDERS,
+        MAX_TEMPLATE_MAP_ENTRIES, MAX_TEMPLATE_STRING_LEN, MAX_TEMPLATE_STRUCT_SIZE, clamp_limit,
         create_provider_record, delete_provider_record, get_provider_record, is_valid_env_key,
         list_provider_records, resolve_provider_environment, update_provider_record,
+        validate_provider_fields, validate_sandbox_spec,
     };
     use crate::persistence::Store;
-    use navigator_core::proto::Provider;
+    use navigator_core::proto::{Provider, SandboxSpec, SandboxTemplate};
     use std::collections::HashMap;
     use tonic::Code;
 
@@ -1941,6 +2304,33 @@ mod tests {
         assert!(!is_valid_env_key("X;rm -rf /"));
     }
 
+    // ---- clamp_limit tests ----
+
+    #[test]
+    fn clamp_limit_zero_returns_default() {
+        assert_eq!(clamp_limit(0, 100, MAX_PAGE_SIZE), 100);
+        assert_eq!(clamp_limit(0, 50, MAX_PAGE_SIZE), 50);
+    }
+
+    #[test]
+    fn clamp_limit_within_range_passes_through() {
+        assert_eq!(clamp_limit(1, 100, MAX_PAGE_SIZE), 1);
+        assert_eq!(clamp_limit(500, 100, MAX_PAGE_SIZE), 500);
+        assert_eq!(
+            clamp_limit(MAX_PAGE_SIZE, 100, MAX_PAGE_SIZE),
+            MAX_PAGE_SIZE
+        );
+    }
+
+    #[test]
+    fn clamp_limit_exceeding_max_is_capped() {
+        assert_eq!(
+            clamp_limit(MAX_PAGE_SIZE + 1, 100, MAX_PAGE_SIZE),
+            MAX_PAGE_SIZE
+        );
+        assert_eq!(clamp_limit(u32::MAX, 100, MAX_PAGE_SIZE), MAX_PAGE_SIZE);
+    }
+
     fn provider_with_values(name: &str, provider_type: &str) -> Provider {
         Provider {
             id: String::new(),
@@ -2320,7 +2710,75 @@ mod tests {
         assert!(result.is_none());
     }
 
-    // ---- Policy validation tests ----
+    // ---- Policy safety validation tests ----
+
+    #[test]
+    fn validate_policy_safety_rejects_root_user() {
+        use navigator_core::proto::{
+            FilesystemPolicy, ProcessPolicy, SandboxPolicy as ProtoSandboxPolicy,
+        };
+
+        let policy = ProtoSandboxPolicy {
+            version: 1,
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: true,
+                read_only: vec!["/usr".into()],
+                read_write: vec!["/tmp".into()],
+            }),
+            process: Some(ProcessPolicy {
+                run_as_user: "root".into(),
+                run_as_group: "sandbox".into(),
+            }),
+            ..Default::default()
+        };
+        let err = super::validate_policy_safety(&policy).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("root"));
+    }
+
+    #[test]
+    fn validate_policy_safety_rejects_path_traversal() {
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let policy = ProtoSandboxPolicy {
+            version: 1,
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: true,
+                read_only: vec!["/usr/../etc/shadow".into()],
+                read_write: vec!["/tmp".into()],
+            }),
+            ..Default::default()
+        };
+        let err = super::validate_policy_safety(&policy).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("traversal"));
+    }
+
+    #[test]
+    fn validate_policy_safety_rejects_overly_broad_path() {
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let policy = ProtoSandboxPolicy {
+            version: 1,
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: true,
+                read_only: vec!["/usr".into()],
+                read_write: vec!["/".into()],
+            }),
+            ..Default::default()
+        };
+        let err = super::validate_policy_safety(&policy).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("broad"));
+    }
+
+    #[test]
+    fn validate_policy_safety_accepts_valid_policy() {
+        let policy = navigator_policy::restrictive_default_policy();
+        assert!(super::validate_policy_safety(&policy).is_ok());
+    }
+
+    // ---- Static field validation tests ----
 
     #[test]
     fn validate_static_fields_allows_unchanged() {
@@ -2350,7 +2808,7 @@ mod tests {
     }
 
     #[test]
-    fn validate_static_fields_rejects_filesystem_change() {
+    fn validate_static_fields_allows_additive_filesystem() {
         use super::validate_static_fields_unchanged;
         use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
 
@@ -2361,16 +2819,107 @@ mod tests {
             }),
             ..Default::default()
         };
-        let changed = ProtoSandboxPolicy {
+        // Adding /lib is purely additive — should be allowed.
+        let additive = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                read_only: vec!["/usr".into(), "/lib".into()],
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        assert!(validate_static_fields_unchanged(&baseline, &additive).is_ok());
+    }
+
+    #[test]
+    fn validate_static_fields_rejects_filesystem_removal() {
+        use super::validate_static_fields_unchanged;
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let baseline = ProtoSandboxPolicy {
             filesystem: Some(FilesystemPolicy {
                 read_only: vec!["/usr".into(), "/lib".into()],
                 ..Default::default()
             }),
             ..Default::default()
         };
+        // Removing /lib should be rejected.
+        let removed = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                read_only: vec!["/usr".into()],
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let result = validate_static_fields_unchanged(&baseline, &removed);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().message().contains("/lib"));
+    }
+
+    #[test]
+    fn validate_static_fields_rejects_filesystem_deletion() {
+        use super::validate_static_fields_unchanged;
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let baseline = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                read_only: vec!["/usr".into()],
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        // Removing filesystem entirely should be rejected.
+        let deleted = ProtoSandboxPolicy {
+            filesystem: None,
+            ..Default::default()
+        };
+        let result = validate_static_fields_unchanged(&baseline, &deleted);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().message().contains("removed"));
+    }
+
+    #[test]
+    fn validate_static_fields_allows_filesystem_enrichment_from_none() {
+        use super::validate_static_fields_unchanged;
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let baseline = ProtoSandboxPolicy {
+            filesystem: None,
+            ..Default::default()
+        };
+        // Adding filesystem when baseline had none — enrichment, allowed.
+        let enriched = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                read_only: vec!["/usr".into(), "/lib".into(), "/etc".into()],
+                read_write: vec!["/sandbox".into(), "/tmp".into()],
+                include_workdir: true,
+            }),
+            ..Default::default()
+        };
+        assert!(validate_static_fields_unchanged(&baseline, &enriched).is_ok());
+    }
+
+    #[test]
+    fn validate_static_fields_rejects_include_workdir_change() {
+        use super::validate_static_fields_unchanged;
+        use navigator_core::proto::{FilesystemPolicy, SandboxPolicy as ProtoSandboxPolicy};
+
+        let baseline = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: true,
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let changed = ProtoSandboxPolicy {
+            filesystem: Some(FilesystemPolicy {
+                include_workdir: false,
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
         let result = validate_static_fields_unchanged(&baseline, &changed);
         assert!(result.is_err());
-        assert!(result.unwrap_err().message().contains("filesystem"));
+        assert!(result.unwrap_err().message().contains("include_workdir"));
     }
 
     #[test]
@@ -2519,4 +3068,320 @@ mod tests {
             );
         }
     }
+
+    // ---- Field-level size limit tests ----
+
+    fn default_spec() -> SandboxSpec {
+        SandboxSpec::default()
+    }
+
+    #[test]
+    fn validate_sandbox_spec_accepts_empty_defaults() {
+        assert!(validate_sandbox_spec("", &default_spec()).is_ok());
+    }
+
+    #[test]
+    fn validate_sandbox_spec_accepts_at_limit_name() {
+        let name = "a".repeat(MAX_NAME_LEN);
+        assert!(validate_sandbox_spec(&name, &default_spec()).is_ok());
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_over_limit_name() {
+        let name = "a".repeat(MAX_NAME_LEN + 1);
+        let err = validate_sandbox_spec(&name, &default_spec()).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("name"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_accepts_at_limit_providers() {
+        let spec = SandboxSpec {
+            providers: (0..MAX_PROVIDERS).map(|i| format!("p-{i}")).collect(),
+            ..Default::default()
+        };
+        assert!(validate_sandbox_spec("ok", &spec).is_ok());
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_over_limit_providers() {
+        let spec = SandboxSpec {
+            providers: (0..=MAX_PROVIDERS).map(|i| format!("p-{i}")).collect(),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("providers"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_over_limit_log_level() {
+        let spec = SandboxSpec {
+            log_level: "x".repeat(MAX_LOG_LEVEL_LEN + 1),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("log_level"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_too_many_env_entries() {
+        let env: HashMap<String, String> = (0..=MAX_ENVIRONMENT_ENTRIES)
+            .map(|i| (format!("K{i}"), "v".to_string()))
+            .collect();
+        let spec = SandboxSpec {
+            environment: env,
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("environment"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_oversized_env_key() {
+        let mut env = HashMap::new();
+        env.insert("k".repeat(MAX_MAP_KEY_LEN + 1), "v".to_string());
+        let spec = SandboxSpec {
+            environment: env,
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("key"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_oversized_env_value() {
+        let mut env = HashMap::new();
+        env.insert("KEY".to_string(), "v".repeat(MAX_MAP_VALUE_LEN + 1));
+        let spec = SandboxSpec {
+            environment: env,
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("value"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_oversized_template_image() {
+        let spec = SandboxSpec {
+            template: Some(SandboxTemplate {
+                image: "x".repeat(MAX_TEMPLATE_STRING_LEN + 1),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("template.image"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_too_many_template_labels() {
+        let labels: HashMap<String, String> = (0..=MAX_TEMPLATE_MAP_ENTRIES)
+            .map(|i| (format!("k{i}"), "v".to_string()))
+            .collect();
+        let spec = SandboxSpec {
+            template: Some(SandboxTemplate {
+                labels,
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("template.labels"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_oversized_template_struct() {
+        use prost_types::{Struct, Value, value::Kind};
+
+        // Build a Struct with enough data to exceed MAX_TEMPLATE_STRUCT_SIZE.
+        let mut fields = std::collections::BTreeMap::new();
+        let big_str = "x".repeat(MAX_TEMPLATE_STRUCT_SIZE);
+        fields.insert(
+            "big".to_string(),
+            Value {
+                kind: Some(Kind::StringValue(big_str)),
+            },
+        );
+        let big_struct = Struct { fields };
+        let spec = SandboxSpec {
+            template: Some(SandboxTemplate {
+                resources: Some(big_struct),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("template.resources"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_oversized_policy() {
+        use navigator_core::proto::NetworkPolicyRule;
+        use navigator_core::proto::SandboxPolicy as ProtoSandboxPolicy;
+
+        // Build a policy large enough to exceed MAX_POLICY_SIZE.
+        let mut policy = ProtoSandboxPolicy::default();
+        let big_name = "x".repeat(MAX_POLICY_SIZE);
+        policy
+            .network_policies
+            .insert(big_name, NetworkPolicyRule::default());
+        let spec = SandboxSpec {
+            policy: Some(policy),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("ok", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("policy"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_accepts_valid_spec() {
+        let spec = SandboxSpec {
+            log_level: "debug".to_string(),
+            providers: vec!["p1".to_string()],
+            environment: std::iter::once(("KEY".to_string(), "val".to_string())).collect(),
+            template: Some(SandboxTemplate {
+                image: "nvcr.io/test:latest".to_string(),
+                runtime_class_name: "kata".to_string(),
+                labels: std::iter::once(("app".to_string(), "test".to_string())).collect(),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        assert!(validate_sandbox_spec("my-sandbox", &spec).is_ok());
+    }
+
+    // ---- Provider field limit tests ----
+
+    /// Helper: a single-entry credentials map for test providers.
+    fn one_credential() -> HashMap<String, String> {
+        std::iter::once(("KEY".to_string(), "val".to_string())).collect()
+    }
+
+    #[test]
+    fn validate_provider_fields_accepts_valid() {
+        let provider = Provider {
+            id: String::new(),
+            name: "my-provider".to_string(),
+            r#type: "claude".to_string(),
+            credentials: one_credential(),
+            config: std::iter::once(("endpoint".to_string(), "https://example.com".to_string()))
+                .collect(),
+        };
+        assert!(validate_provider_fields(&provider).is_ok());
+    }
+
+    #[test]
+    fn validate_provider_fields_rejects_over_limit_name() {
+        let provider = Provider {
+            id: String::new(),
+            name: "a".repeat(MAX_NAME_LEN + 1),
+            r#type: "claude".to_string(),
+            credentials: one_credential(),
+            config: HashMap::new(),
+        };
+        let err = validate_provider_fields(&provider).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("provider.name"));
+    }
+
+    #[test]
+    fn validate_provider_fields_rejects_over_limit_type() {
+        let provider = Provider {
+            id: String::new(),
+            name: "ok".to_string(),
+            r#type: "x".repeat(MAX_PROVIDER_TYPE_LEN + 1),
+            credentials: one_credential(),
+            config: HashMap::new(),
+        };
+        let err = validate_provider_fields(&provider).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("provider.type"));
+    }
+
+    #[test]
+    fn validate_provider_fields_rejects_too_many_credentials() {
+        let creds: HashMap<String, String> = (0..=MAX_PROVIDER_CREDENTIALS_ENTRIES)
+            .map(|i| (format!("K{i}"), "v".to_string()))
+            .collect();
+        let provider = Provider {
+            id: String::new(),
+            name: "ok".to_string(),
+            r#type: "claude".to_string(),
+            credentials: creds,
+            config: HashMap::new(),
+        };
+        let err = validate_provider_fields(&provider).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("provider.credentials"));
+    }
+
+    #[test]
+    fn validate_provider_fields_rejects_too_many_config() {
+        let config: HashMap<String, String> = (0..=MAX_PROVIDER_CONFIG_ENTRIES)
+            .map(|i| (format!("K{i}"), "v".to_string()))
+            .collect();
+        let provider = Provider {
+            id: String::new(),
+            name: "ok".to_string(),
+            r#type: "claude".to_string(),
+            credentials: one_credential(),
+            config,
+        };
+        let err = validate_provider_fields(&provider).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("provider.config"));
+    }
+
+    #[test]
+    fn validate_provider_fields_at_limit_name_accepted() {
+        let provider = Provider {
+            id: String::new(),
+            name: "a".repeat(MAX_NAME_LEN),
+            r#type: "claude".to_string(),
+            credentials: one_credential(),
+            config: HashMap::new(),
+        };
+        assert!(validate_provider_fields(&provider).is_ok());
+    }
+
+    #[test]
+    fn validate_provider_fields_rejects_oversized_credential_key() {
+        let mut creds = HashMap::new();
+        creds.insert("k".repeat(MAX_MAP_KEY_LEN + 1), "v".to_string());
+        let provider = Provider {
+            id: String::new(),
+            name: "ok".to_string(),
+            r#type: "claude".to_string(),
+            credentials: creds,
+            config: HashMap::new(),
+        };
+        let err = validate_provider_fields(&provider).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("key"));
+    }
+
+    #[test]
+    fn validate_provider_fields_rejects_oversized_config_value() {
+        let mut config = HashMap::new();
+        config.insert("k".to_string(), "v".repeat(MAX_MAP_VALUE_LEN + 1));
+        let provider = Provider {
+            id: String::new(),
+            name: "ok".to_string(),
+            r#type: "claude".to_string(),
+            credentials: one_credential(),
+            config,
+        };
+        let err = validate_provider_fields(&provider).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("value"));
+    }
 }
diff --git a/crates/navigator-server/src/inference.rs b/crates/navigator-server/src/inference.rs
index 178938dc..201e619e 100644
--- a/crates/navigator-server/src/inference.rs
+++ b/crates/navigator-server/src/inference.rs
@@ -2,18 +2,16 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use navigator_core::proto::{
-    DeleteInferenceRouteRequest, DeleteInferenceRouteResponse, GetSandboxInferenceBundleRequest,
-    GetSandboxInferenceBundleResponse, InferenceRoute, InferenceRouteResponse,
-    ListInferenceRoutesRequest, ListInferenceRoutesResponse, Sandbox, SandboxResolvedRoute,
-    UpdateInferenceRouteRequest, inference_server::Inference,
+    ClusterInferenceConfig, GetClusterInferenceRequest, GetClusterInferenceResponse,
+    GetInferenceBundleRequest, GetInferenceBundleResponse, InferenceRoute, Provider, ResolvedRoute,
+    SetClusterInferenceRequest, SetClusterInferenceResponse, inference_server::Inference,
 };
-use prost::Message;
 use std::sync::Arc;
 use tonic::{Request, Response, Status};
 
 use crate::{
     ServerState,
-    persistence::{ObjectId, ObjectName, ObjectType, Store, generate_name},
+    persistence::{ObjectId, ObjectName, ObjectType, Store},
 };
 
 #[derive(Debug)]
@@ -27,6 +25,8 @@ impl InferenceService {
     }
 }
 
+const CLUSTER_INFERENCE_ROUTE_NAME: &str = "inference.local";
+
 impl ObjectType for InferenceRoute {
     fn object_type() -> &'static str {
         "inference_route"
@@ -47,210 +47,221 @@ impl ObjectName for InferenceRoute {
 
 #[tonic::async_trait]
 impl Inference for InferenceService {
-    async fn get_sandbox_inference_bundle(
+    async fn get_inference_bundle(
         &self,
-        request: Request<GetSandboxInferenceBundleRequest>,
-    ) -> Result<Response<GetSandboxInferenceBundleResponse>, Status> {
-        let req = request.into_inner();
-        resolve_sandbox_inference_bundle(self.state.store.as_ref(), &req.sandbox_id)
+        _request: Request<GetInferenceBundleRequest>,
+    ) -> Result<Response<GetInferenceBundleResponse>, Status> {
+        resolve_inference_bundle(self.state.store.as_ref())
             .await
             .map(Response::new)
     }
 
-    async fn create_inference_route(
+    async fn set_cluster_inference(
         &self,
-        request: Request<navigator_core::proto::CreateInferenceRouteRequest>,
-    ) -> Result<Response<InferenceRouteResponse>, Status> {
+        request: Request<SetClusterInferenceRequest>,
+    ) -> Result<Response<SetClusterInferenceResponse>, Status> {
         let req = request.into_inner();
-        let mut spec = req
-            .route
-            .ok_or_else(|| Status::invalid_argument("route is required"))?;
-        normalize_route_protocols(&mut spec);
-        validate_route_spec(&spec)?;
-
-        let name = if req.name.is_empty() {
-            generate_name()
-        } else {
-            req.name
-        };
+        let route = upsert_cluster_inference_route(
+            self.state.store.as_ref(),
+            &req.provider_name,
+            &req.model_id,
+        )
+        .await?;
+
+        let config = route
+            .config
+            .as_ref()
+            .ok_or_else(|| Status::internal("managed route missing config"))?;
+
+        Ok(Response::new(SetClusterInferenceResponse {
+            provider_name: config.provider_name.clone(),
+            model_id: config.model_id.clone(),
+            version: route.version,
+        }))
+    }
 
-        let existing = self
+    async fn get_cluster_inference(
+        &self,
+        _request: Request<GetClusterInferenceRequest>,
+    ) -> Result<Response<GetClusterInferenceResponse>, Status> {
+        let route = self
             .state
             .store
-            .get_message_by_name::<InferenceRoute>(&name)
+            .get_message_by_name::<InferenceRoute>(CLUSTER_INFERENCE_ROUTE_NAME)
             .await
-            .map_err(|e| Status::internal(format!("fetch route failed: {e}")))?;
-
-        if existing.is_some() {
-            return Err(Status::already_exists("route already exists"));
+            .map_err(|e| Status::internal(format!("fetch route failed: {e}")))?
+            .ok_or_else(|| {
+                Status::not_found(
+                    "cluster inference is not configured; run 'nemoclaw cluster inference set --provider <name> --model <id>'",
+                )
+            })?;
+
+        let config = route
+            .config
+            .as_ref()
+            .ok_or_else(|| Status::internal("managed route missing config"))?;
+
+        if config.provider_name.trim().is_empty() || config.model_id.trim().is_empty() {
+            return Err(Status::failed_precondition(
+                "managed route is missing provider/model metadata",
+            ));
         }
 
-        let route = InferenceRoute {
-            id: uuid::Uuid::new_v4().to_string(),
-            name,
-            spec: Some(spec),
-        };
-
-        self.state
-            .store
-            .put_message(&route)
-            .await
-            .map_err(|e| Status::internal(format!("persist route failed: {e}")))?;
+        Ok(Response::new(GetClusterInferenceResponse {
+            provider_name: config.provider_name.clone(),
+            model_id: config.model_id.clone(),
+            version: route.version,
+        }))
+    }
+}
 
-        Ok(Response::new(InferenceRouteResponse { route: Some(route) }))
+async fn upsert_cluster_inference_route(
+    store: &Store,
+    provider_name: &str,
+    model_id: &str,
+) -> Result<InferenceRoute, Status> {
+    if provider_name.trim().is_empty() {
+        return Err(Status::invalid_argument("provider_name is required"));
+    }
+    if model_id.trim().is_empty() {
+        return Err(Status::invalid_argument("model_id is required"));
     }
 
-    async fn update_inference_route(
-        &self,
-        request: Request<UpdateInferenceRouteRequest>,
-    ) -> Result<Response<InferenceRouteResponse>, Status> {
-        let request = request.into_inner();
-        if request.name.is_empty() {
-            return Err(Status::invalid_argument("name is required"));
-        }
-        let mut spec = request
-            .route
-            .ok_or_else(|| Status::invalid_argument("route is required"))?;
-        normalize_route_protocols(&mut spec);
-        validate_route_spec(&spec)?;
+    let provider = store
+        .get_message_by_name::<Provider>(provider_name)
+        .await
+        .map_err(|e| Status::internal(format!("fetch provider failed: {e}")))?
+        .ok_or_else(|| {
+            Status::failed_precondition(format!("provider '{provider_name}' not found"))
+        })?;
 
-        let existing = self
-            .state
-            .store
-            .get_message_by_name::<InferenceRoute>(&request.name)
-            .await
-            .map_err(|e| Status::internal(format!("fetch route failed: {e}")))?;
+    // Validate provider shape at set time; endpoint/auth are resolved from the
+    // provider record when generating sandbox bundles.
+    let _ = resolve_provider_route(&provider)?;
 
-        let Some(existing) = existing else {
-            return Err(Status::not_found("route not found"));
-        };
+    let config = build_cluster_inference_config(&provider, model_id);
 
-        // Preserve the stored id; update payload fields only.
-        let route = InferenceRoute {
+    let existing = store
+        .get_message_by_name::<InferenceRoute>(CLUSTER_INFERENCE_ROUTE_NAME)
+        .await
+        .map_err(|e| Status::internal(format!("fetch route failed: {e}")))?;
+
+    let route = if let Some(existing) = existing {
+        InferenceRoute {
             id: existing.id,
             name: existing.name,
-            spec: Some(spec),
-        };
+            config: Some(config),
+            version: existing.version.saturating_add(1),
+        }
+    } else {
+        InferenceRoute {
+            id: uuid::Uuid::new_v4().to_string(),
+            name: CLUSTER_INFERENCE_ROUTE_NAME.to_string(),
+            config: Some(config),
+            version: 1,
+        }
+    };
 
-        self.state
-            .store
-            .put_message(&route)
-            .await
-            .map_err(|e| Status::internal(format!("persist route failed: {e}")))?;
+    store
+        .put_message(&route)
+        .await
+        .map_err(|e| Status::internal(format!("persist route failed: {e}")))?;
 
-        Ok(Response::new(InferenceRouteResponse { route: Some(route) }))
-    }
+    Ok(route)
+}
 
-    async fn delete_inference_route(
-        &self,
-        request: Request<DeleteInferenceRouteRequest>,
-    ) -> Result<Response<DeleteInferenceRouteResponse>, Status> {
-        let name = request.into_inner().name;
-        if name.is_empty() {
-            return Err(Status::invalid_argument("name is required"));
-        }
+fn build_cluster_inference_config(provider: &Provider, model_id: &str) -> ClusterInferenceConfig {
+    ClusterInferenceConfig {
+        provider_name: provider.name.clone(),
+        model_id: model_id.to_string(),
+    }
+}
 
-        let deleted = self
-            .state
-            .store
-            .delete_by_name(InferenceRoute::object_type(), &name)
-            .await
-            .map_err(|e| Status::internal(format!("delete route failed: {e}")))?;
+struct ResolvedProviderRoute {
+    provider_type: String,
+    base_url: String,
+    protocols: Vec<String>,
+    api_key: String,
+}
 
-        Ok(Response::new(DeleteInferenceRouteResponse { deleted }))
+fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute, Status> {
+    let provider_type = provider.r#type.trim().to_ascii_lowercase();
+
+    let profile = navigator_core::inference::profile_for(&provider_type).ok_or_else(|| {
+        Status::invalid_argument(format!(
+            "provider '{name}' has unsupported type '{provider_type}' for cluster inference \
+                 (supported: openai, anthropic, nvidia)",
+            name = provider.name
+        ))
+    })?;
+
+    let api_key =
+        find_provider_api_key(provider, profile.credential_key_names).ok_or_else(|| {
+            Status::invalid_argument(format!(
+                "provider '{name}' has no usable API key credential",
+                name = provider.name
+            ))
+        })?;
+
+    let base_url = find_provider_config_value(provider, profile.base_url_config_keys)
+        .unwrap_or_else(|| profile.default_base_url.to_string())
+        .trim()
+        .to_string();
+
+    if base_url.is_empty() {
+        return Err(Status::invalid_argument(format!(
+            "provider '{name}' resolved to empty base_url",
+            name = provider.name
+        )));
     }
 
-    async fn list_inference_routes(
-        &self,
-        request: Request<ListInferenceRoutesRequest>,
-    ) -> Result<Response<ListInferenceRoutesResponse>, Status> {
-        let request = request.into_inner();
-        let limit = if request.limit == 0 {
-            100
-        } else {
-            request.limit
-        };
-
-        let records = self
-            .state
-            .store
-            .list(InferenceRoute::object_type(), limit, request.offset)
-            .await
-            .map_err(|e| Status::internal(format!("list routes failed: {e}")))?;
+    Ok(ResolvedProviderRoute {
+        provider_type,
+        base_url,
+        protocols: profile.protocols.iter().map(|p| (*p).to_string()).collect(),
+        api_key,
+    })
+}
 
-        let mut routes = Vec::with_capacity(records.len());
-        for record in records {
-            let route = InferenceRoute::decode(record.payload.as_slice())
-                .map_err(|e| Status::internal(format!("decode route failed: {e}")))?;
-            routes.push(route);
+fn find_provider_api_key(provider: &Provider, preferred_key_names: &[&str]) -> Option<String> {
+    for key in preferred_key_names {
+        if let Some(value) = provider.credentials.get(*key)
+            && !value.trim().is_empty()
+        {
+            return Some(value.clone());
         }
-
-        Ok(Response::new(ListInferenceRoutesResponse { routes }))
     }
-}
 
-#[allow(clippy::result_large_err)]
-fn validate_route_spec(spec: &navigator_core::proto::InferenceRouteSpec) -> Result<(), Status> {
-    if spec.routing_hint.trim().is_empty() {
-        return Err(Status::invalid_argument("route.routing_hint is required"));
-    }
-    if spec.base_url.trim().is_empty() {
-        return Err(Status::invalid_argument("route.base_url is required"));
-    }
-    if navigator_core::inference::normalize_protocols(&spec.protocols).is_empty() {
-        return Err(Status::invalid_argument("route.protocols is required"));
-    }
-    if spec.model_id.trim().is_empty() {
-        return Err(Status::invalid_argument("route.model_id is required"));
+    let mut keys = provider.credentials.keys().collect::<Vec<_>>();
+    keys.sort();
+    for key in keys {
+        if let Some(value) = provider.credentials.get(key)
+            && !value.trim().is_empty()
+        {
+            return Some(value.clone());
+        }
     }
-    Ok(())
-}
 
-fn normalize_route_protocols(spec: &mut navigator_core::proto::InferenceRouteSpec) {
-    spec.protocols = navigator_core::inference::normalize_protocols(&spec.protocols);
+    None
 }
 
-/// Resolve a full inference bundle for a sandbox.
-///
-/// Loads the sandbox from the store, extracts the inference policy, filters
-/// routes by `allowed_routes`, and computes a revision hash.
-async fn resolve_sandbox_inference_bundle(
-    store: &Store,
-    sandbox_id: &str,
-) -> Result<GetSandboxInferenceBundleResponse, Status> {
-    if sandbox_id.is_empty() {
-        return Err(Status::invalid_argument("sandbox_id is required"));
-    }
-
-    let sandbox = store
-        .get_message::<Sandbox>(sandbox_id)
-        .await
-        .map_err(|e| Status::internal(format!("failed to load sandbox: {e}")))?
-        .ok_or_else(|| Status::not_found(format!("sandbox {sandbox_id} not found")))?;
-
-    let policy = sandbox
-        .spec
-        .as_ref()
-        .and_then(|s| s.policy.as_ref())
-        .and_then(|p| p.inference.as_ref());
-
-    let allowed_routes = match policy {
-        Some(inference_policy) => {
-            if inference_policy.allowed_routes.is_empty() {
-                return Err(Status::permission_denied(
-                    "sandbox inference policy has no allowed routes",
-                ));
-            }
-            inference_policy.allowed_routes.clone()
+fn find_provider_config_value(provider: &Provider, preferred_keys: &[&str]) -> Option<String> {
+    for key in preferred_keys {
+        if let Some(value) = provider.config.get(*key)
+            && !value.trim().is_empty()
+        {
+            return Some(value.clone());
         }
-        None => {
-            return Err(Status::permission_denied(
-                "sandbox has no inference policy configured",
-            ));
-        }
-    };
+    }
+    None
+}
 
-    let routes = list_sandbox_routes(store, &allowed_routes).await?;
+/// Resolve the inference bundle (managed cluster route + revision hash).
+async fn resolve_inference_bundle(store: &Store) -> Result<GetInferenceBundleResponse, Status> {
+    let routes = resolve_managed_cluster_route(store)
+        .await?
+        .into_iter()
+        .collect::<Vec<_>>();
 
     let now_ms = std::time::SystemTime::now()
         .duration_since(std::time::UNIX_EPOCH)
@@ -262,351 +273,309 @@ async fn resolve_sandbox_inference_bundle(
         use std::hash::{Hash, Hasher};
         let mut hasher = std::collections::hash_map::DefaultHasher::new();
         for r in &routes {
-            r.routing_hint.hash(&mut hasher);
+            r.name.hash(&mut hasher);
             r.base_url.hash(&mut hasher);
             r.model_id.hash(&mut hasher);
             r.api_key.hash(&mut hasher);
             r.protocols.hash(&mut hasher);
+            r.provider_type.hash(&mut hasher);
         }
         format!("{:016x}", hasher.finish())
     };
 
-    Ok(GetSandboxInferenceBundleResponse {
+    Ok(GetInferenceBundleResponse {
         routes,
         revision,
         generated_at_ms: now_ms,
     })
 }
 
-/// Resolve inference routes from the store as sandbox-ready bundle entries.
-///
-/// Routes are matched by `routing_hint` against the `allowed_routes` list
-/// from the sandbox's inference policy. Only enabled routes are returned.
-async fn list_sandbox_routes(
-    store: &Store,
-    allowed_routes: &[String],
-) -> Result<Vec<SandboxResolvedRoute>, Status> {
-    let mut allowed_set = std::collections::HashSet::new();
-    for name in allowed_routes {
-        allowed_set.insert(name.as_str());
-    }
-
-    let records = store
-        .list(InferenceRoute::object_type(), 500, 0)
+async fn resolve_managed_cluster_route(store: &Store) -> Result<Option<ResolvedRoute>, Status> {
+    let route = store
+        .get_message_by_name::<InferenceRoute>(CLUSTER_INFERENCE_ROUTE_NAME)
         .await
-        .map_err(|e| Status::internal(format!("list routes failed: {e}")))?;
-
-    let mut routes = Vec::new();
-    for record in records {
-        let route = InferenceRoute::decode(record.payload.as_slice())
-            .map_err(|e| Status::internal(format!("decode route failed: {e}")))?;
-        let Some(spec) = route.spec.as_ref() else {
-            continue;
-        };
-        if !spec.enabled {
-            continue;
-        }
-        if !allowed_set.contains(spec.routing_hint.as_str()) {
-            continue;
-        }
+        .map_err(|e| Status::internal(format!("fetch route failed: {e}")))?;
 
-        let protocols = navigator_core::inference::normalize_protocols(&spec.protocols);
-        if protocols.is_empty() {
-            continue;
-        }
+    let Some(route) = route else {
+        return Ok(None);
+    };
+
+    let Some(config) = route.config.as_ref() else {
+        return Ok(None);
+    };
 
-        routes.push(SandboxResolvedRoute {
-            routing_hint: spec.routing_hint.clone(),
-            base_url: spec.base_url.clone(),
-            model_id: spec.model_id.clone(),
-            api_key: spec.api_key.clone(),
-            protocols,
-        });
+    if config.provider_name.trim().is_empty() {
+        return Err(Status::failed_precondition(
+            "managed route is missing provider_name",
+        ));
     }
 
-    Ok(routes)
+    if config.model_id.trim().is_empty() {
+        return Err(Status::failed_precondition(
+            "managed route is missing model_id",
+        ));
+    }
+
+    let provider = store
+        .get_message_by_name::<Provider>(&config.provider_name)
+        .await
+        .map_err(|e| Status::internal(format!("fetch provider failed: {e}")))?
+        .ok_or_else(|| {
+            Status::failed_precondition(format!(
+                "configured provider '{}' was not found",
+                config.provider_name
+            ))
+        })?;
+
+    let resolved = resolve_provider_route(&provider)?;
+
+    Ok(Some(ResolvedRoute {
+        name: CLUSTER_INFERENCE_ROUTE_NAME.to_string(),
+        base_url: resolved.base_url,
+        model_id: config.model_id.clone(),
+        api_key: resolved.api_key,
+        protocols: resolved.protocols,
+        provider_type: resolved.provider_type,
+    }))
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use navigator_core::proto::InferenceRouteSpec;
 
-    fn make_route(id: &str, name: &str, routing_hint: &str, enabled: bool) -> InferenceRoute {
+    fn make_route(name: &str, provider_name: &str, model_id: &str) -> InferenceRoute {
         InferenceRoute {
-            id: id.to_string(),
+            id: format!("id-{name}"),
             name: name.to_string(),
-            spec: Some(InferenceRouteSpec {
-                routing_hint: routing_hint.to_string(),
-                base_url: "https://example.com/v1".to_string(),
-                api_key: "test-key".to_string(),
-                model_id: "test/model".to_string(),
-                enabled,
-                protocols: vec!["openai_chat_completions".to_string()],
+            config: Some(ClusterInferenceConfig {
+                provider_name: provider_name.to_string(),
+                model_id: model_id.to_string(),
             }),
+            version: 1,
         }
     }
 
-    #[test]
-    fn validate_route_spec_requires_fields() {
-        let spec = InferenceRouteSpec {
-            routing_hint: String::new(),
-            base_url: String::new(),
-            api_key: String::new(),
-            model_id: String::new(),
-            enabled: true,
-            protocols: Vec::new(),
-        };
-        let err = validate_route_spec(&spec).unwrap_err();
-        assert_eq!(err.code(), tonic::Code::InvalidArgument);
-    }
-
-    #[test]
-    fn normalize_route_protocols_dedupes_and_lowercases() {
-        let mut spec = InferenceRouteSpec {
-            routing_hint: "local".to_string(),
-            base_url: "https://example.com/v1".to_string(),
-            api_key: "test-key".to_string(),
-            model_id: "model".to_string(),
-            enabled: true,
-            protocols: vec![
-                "OpenAI_Chat_Completions".to_string(),
-                "openai_chat_completions".to_string(),
-                "anthropic_messages".to_string(),
-            ],
-        };
-
-        normalize_route_protocols(&mut spec);
-
-        assert_eq!(
-            spec.protocols,
-            vec![
-                "openai_chat_completions".to_string(),
-                "anthropic_messages".to_string()
-            ]
-        );
+    fn make_provider(name: &str, provider_type: &str, key_name: &str, key_value: &str) -> Provider {
+        Provider {
+            id: format!("provider-{name}"),
+            name: name.to_string(),
+            r#type: provider_type.to_string(),
+            credentials: std::iter::once((key_name.to_string(), key_value.to_string())).collect(),
+            config: std::collections::HashMap::new(),
+        }
     }
 
     #[tokio::test]
-    async fn list_sandbox_routes_returns_enabled_allowed_routes() {
+    async fn upsert_cluster_route_creates_and_increments_version() {
         let store = Store::connect("sqlite::memory:?cache=shared")
             .await
             .expect("store should connect");
 
-        let route_disabled = make_route("r-1", "route-a", "local", false);
+        let provider = make_provider("openai-dev", "openai", "OPENAI_API_KEY", "sk-test");
         store
-            .put_message(&route_disabled)
+            .put_message(&provider)
             .await
-            .expect("disabled route should persist");
+            .expect("provider should persist");
 
-        let route_enabled = make_route("r-2", "route-b", "local", true);
-        store
-            .put_message(&route_enabled)
+        let first = upsert_cluster_inference_route(&store, "openai-dev", "gpt-4o")
             .await
-            .expect("enabled route should persist");
+            .expect("first set should succeed");
+        assert_eq!(first.name, CLUSTER_INFERENCE_ROUTE_NAME);
+        assert_eq!(first.version, 1);
 
-        let routes = list_sandbox_routes(&store, &["local".to_string()])
+        let second = upsert_cluster_inference_route(&store, "openai-dev", "gpt-4.1")
             .await
-            .expect("routes should resolve");
+            .expect("second set should succeed");
+        assert_eq!(second.version, 2);
+        assert_eq!(second.id, first.id);
 
-        assert_eq!(routes.len(), 1);
-        assert_eq!(routes[0].routing_hint, "local");
-        assert_eq!(routes[0].protocols, vec!["openai_chat_completions"]);
+        let config = second.config.as_ref().expect("config");
+        assert_eq!(config.provider_name, "openai-dev");
+        assert_eq!(config.model_id, "gpt-4.1");
     }
 
     #[tokio::test]
-    async fn list_sandbox_routes_filters_by_allowed_routes() {
+    async fn resolve_managed_route_returns_none_when_missing() {
         let store = Store::connect("sqlite::memory:?cache=shared")
             .await
             .expect("store should connect");
 
-        let route = make_route("r-1", "route-c", "frontier", true);
-        store
-            .put_message(&route)
-            .await
-            .expect("route should persist");
-
-        let routes = list_sandbox_routes(&store, &["local".to_string()])
+        let route = resolve_managed_cluster_route(&store)
             .await
-            .expect("routes should resolve");
-        assert!(routes.is_empty());
-    }
-
-    // -- resolve_sandbox_inference_bundle tests --
-
-    fn make_sandbox(id: &str, allowed_routes: Option<Vec<String>>) -> Sandbox {
-        use navigator_core::proto::SandboxSpec;
-
-        let policy =
-            allowed_routes.map(|routes| navigator_core::proto::sandbox::v1::SandboxPolicy {
-                inference: Some(navigator_core::proto::sandbox::v1::InferencePolicy {
-                    allowed_routes: routes,
-                    ..Default::default()
-                }),
-                ..Default::default()
-            });
-
-        Sandbox {
-            id: id.to_string(),
-            name: format!("sandbox-{id}"),
-            spec: Some(SandboxSpec {
-                policy,
-                ..Default::default()
-            }),
-            ..Default::default()
-        }
+            .expect("resolution should not fail");
+        assert!(route.is_none());
     }
 
     #[tokio::test]
-    async fn bundle_happy_path_returns_matching_routes() {
+    async fn bundle_happy_path_returns_managed_route() {
         let store = Store::connect("sqlite::memory:?cache=shared")
             .await
             .expect("store");
 
-        let sandbox = make_sandbox("sb-1", Some(vec!["local".into()]));
-        store.put_message(&sandbox).await.expect("persist sandbox");
+        let provider = make_provider("openai-dev", "openai", "OPENAI_API_KEY", "sk-test");
+        store
+            .put_message(&provider)
+            .await
+            .expect("persist provider");
 
-        let route = make_route("r-1", "route-a", "local", true);
+        let route = make_route(CLUSTER_INFERENCE_ROUTE_NAME, "openai-dev", "mock/model-a");
         store.put_message(&route).await.expect("persist route");
 
-        let resp = resolve_sandbox_inference_bundle(&store, "sb-1")
+        let resp = resolve_inference_bundle(&store)
             .await
             .expect("bundle should resolve");
 
         assert_eq!(resp.routes.len(), 1);
-        assert_eq!(resp.routes[0].routing_hint, "local");
+        assert_eq!(resp.routes[0].name, CLUSTER_INFERENCE_ROUTE_NAME);
+        assert_eq!(resp.routes[0].model_id, "mock/model-a");
+        assert_eq!(resp.routes[0].provider_type, "openai");
+        assert_eq!(resp.routes[0].api_key, "sk-test");
+        assert_eq!(resp.routes[0].base_url, "https://api.openai.com/v1");
         assert!(!resp.revision.is_empty());
         assert!(resp.generated_at_ms > 0);
     }
 
     #[tokio::test]
-    async fn bundle_missing_sandbox_id_returns_invalid_argument() {
+    async fn bundle_without_cluster_route_returns_empty_routes() {
         let store = Store::connect("sqlite::memory:?cache=shared")
             .await
             .expect("store");
 
-        let err = resolve_sandbox_inference_bundle(&store, "")
+        let resp = resolve_inference_bundle(&store)
             .await
-            .unwrap_err();
-        assert_eq!(err.code(), tonic::Code::InvalidArgument);
-    }
-
-    #[tokio::test]
-    async fn bundle_sandbox_not_found_returns_not_found() {
-        let store = Store::connect("sqlite::memory:?cache=shared")
-            .await
-            .expect("store");
-
-        let err = resolve_sandbox_inference_bundle(&store, "nonexistent")
-            .await
-            .unwrap_err();
-        assert_eq!(err.code(), tonic::Code::NotFound);
+            .expect("bundle should resolve");
+        assert!(resp.routes.is_empty());
     }
 
     #[tokio::test]
-    async fn bundle_no_inference_policy_returns_permission_denied() {
+    async fn bundle_revision_is_stable_for_same_route() {
         let store = Store::connect("sqlite::memory:?cache=shared")
             .await
             .expect("store");
 
-        // Sandbox with no inference policy (None)
-        let sandbox = make_sandbox("sb-2", None);
-        store.put_message(&sandbox).await.expect("persist sandbox");
-
-        let err = resolve_sandbox_inference_bundle(&store, "sb-2")
-            .await
-            .unwrap_err();
-        assert_eq!(err.code(), tonic::Code::PermissionDenied);
-        assert!(
-            err.message().contains("no inference policy"),
-            "message: {}",
-            err.message()
-        );
-    }
-
-    #[tokio::test]
-    async fn bundle_empty_allowed_routes_returns_permission_denied() {
-        let store = Store::connect("sqlite::memory:?cache=shared")
+        let provider = make_provider("openai-dev", "openai", "OPENAI_API_KEY", "sk-test");
+        store
+            .put_message(&provider)
             .await
-            .expect("store");
+            .expect("persist provider");
 
-        // Sandbox with empty allowed_routes
-        let sandbox = make_sandbox("sb-3", Some(vec![]));
-        store.put_message(&sandbox).await.expect("persist sandbox");
-
-        let err = resolve_sandbox_inference_bundle(&store, "sb-3")
-            .await
-            .unwrap_err();
-        assert_eq!(err.code(), tonic::Code::PermissionDenied);
-        assert!(
-            err.message().contains("no allowed routes"),
-            "message: {}",
-            err.message()
+        let route = make_route(
+            CLUSTER_INFERENCE_ROUTE_NAME,
+            "openai-dev",
+            "mock/model-stable",
         );
-    }
-
-    #[tokio::test]
-    async fn bundle_revision_is_stable_for_same_routes() {
-        let store = Store::connect("sqlite::memory:?cache=shared")
-            .await
-            .expect("store");
-
-        let sandbox = make_sandbox("sb-4", Some(vec!["local".into()]));
-        store.put_message(&sandbox).await.expect("persist sandbox");
-
-        let route = make_route("r-1", "route-a", "local", true);
         store.put_message(&route).await.expect("persist route");
 
-        let resp1 = resolve_sandbox_inference_bundle(&store, "sb-4")
+        let resp1 = resolve_inference_bundle(&store)
             .await
             .expect("first resolve");
-        let resp2 = resolve_sandbox_inference_bundle(&store, "sb-4")
+        let resp2 = resolve_inference_bundle(&store)
             .await
             .expect("second resolve");
 
         assert_eq!(
             resp1.revision, resp2.revision,
-            "same routes should produce same revision"
+            "same route should produce same revision"
         );
     }
 
     #[tokio::test]
-    async fn list_sandbox_routes_keeps_multi_protocols_in_single_route() {
+    async fn resolve_managed_route_derives_from_provider() {
         let store = Store::connect("sqlite::memory:?cache=shared")
             .await
             .expect("store should connect");
 
+        let provider = Provider {
+            id: "provider-1".to_string(),
+            name: "openai-dev".to_string(),
+            r#type: "openai".to_string(),
+            credentials: std::iter::once(("OPENAI_API_KEY".to_string(), "sk-test".to_string()))
+                .collect(),
+            config: std::iter::once((
+                "OPENAI_BASE_URL".to_string(),
+                "https://station.example.com/v1".to_string(),
+            ))
+            .collect(),
+        };
+        store
+            .put_message(&provider)
+            .await
+            .expect("provider should persist");
+
         let route = InferenceRoute {
             id: "r-1".to_string(),
-            name: "route-multi".to_string(),
-            spec: Some(InferenceRouteSpec {
-                routing_hint: "local".to_string(),
-                base_url: "https://example.com/v1".to_string(),
-                api_key: "test-key".to_string(),
+            name: CLUSTER_INFERENCE_ROUTE_NAME.to_string(),
+            config: Some(ClusterInferenceConfig {
+                provider_name: "openai-dev".to_string(),
                 model_id: "test/model".to_string(),
-                enabled: true,
-                protocols: vec![
-                    "openai_chat_completions".to_string(),
-                    "anthropic_messages".to_string(),
-                ],
             }),
+            version: 7,
         };
         store
             .put_message(&route)
             .await
             .expect("route should persist");
 
-        let routes = list_sandbox_routes(&store, &["local".to_string()])
+        let managed = resolve_managed_cluster_route(&store)
             .await
-            .expect("routes should resolve");
+            .expect("route should resolve")
+            .expect("managed route should exist");
 
-        assert_eq!(routes.len(), 1);
+        assert_eq!(managed.base_url, "https://station.example.com/v1");
+        assert_eq!(managed.api_key, "sk-test");
+        assert_eq!(managed.provider_type, "openai");
         assert_eq!(
-            routes[0].protocols,
+            managed.protocols,
             vec![
                 "openai_chat_completions".to_string(),
-                "anthropic_messages".to_string()
+                "openai_completions".to_string(),
+                "openai_responses".to_string(),
+                "model_discovery".to_string(),
             ]
         );
     }
+
+    #[tokio::test]
+    async fn resolve_managed_route_reflects_provider_key_rotation() {
+        let store = Store::connect("sqlite::memory:?cache=shared")
+            .await
+            .expect("store should connect");
+
+        let provider = make_provider("openai-dev", "openai", "OPENAI_API_KEY", "sk-initial");
+        store
+            .put_message(&provider)
+            .await
+            .expect("provider should persist");
+
+        let route = make_route(CLUSTER_INFERENCE_ROUTE_NAME, "openai-dev", "test/model");
+        store
+            .put_message(&route)
+            .await
+            .expect("route should persist");
+
+        let first = resolve_managed_cluster_route(&store)
+            .await
+            .expect("route should resolve")
+            .expect("managed route should exist");
+        assert_eq!(first.api_key, "sk-initial");
+
+        let rotated_provider = Provider {
+            id: provider.id,
+            name: provider.name,
+            r#type: provider.r#type,
+            credentials: std::iter::once(("OPENAI_API_KEY".to_string(), "sk-rotated".to_string()))
+                .collect(),
+            config: provider.config,
+        };
+        store
+            .put_message(&rotated_provider)
+            .await
+            .expect("provider rotation should persist");
+
+        let second = resolve_managed_cluster_route(&store)
+            .await
+            .expect("route should resolve")
+            .expect("managed route should exist");
+        assert_eq!(second.api_key, "sk-rotated");
+    }
 }
diff --git a/crates/navigator-server/src/lib.rs b/crates/navigator-server/src/lib.rs
index 262aabdb..398acd7d 100644
--- a/crates/navigator-server/src/lib.rs
+++ b/crates/navigator-server/src/lib.rs
@@ -128,6 +128,7 @@ pub async fn run_server(config: Config, tracing_log_bus: TracingLogBus) -> Resul
         state.sandbox_client.clone(),
         state.sandbox_index.clone(),
         state.sandbox_watch_bus.clone(),
+        state.tracing_log_bus.clone(),
     );
     spawn_kube_event_tailer(state.clone());
 
diff --git a/crates/navigator-server/src/multiplex.rs b/crates/navigator-server/src/multiplex.rs
index e058a69a..0113f3c6 100644
--- a/crates/navigator-server/src/multiplex.rs
+++ b/crates/navigator-server/src/multiplex.rs
@@ -25,6 +25,13 @@ use tower::ServiceExt;
 
 use crate::{NavigatorService, ServerState, http_router, inference::InferenceService};
 
+/// Maximum inbound gRPC message size (1 MB).
+///
+/// Replaces tonic's implicit 4 MB default with a conservative limit to
+/// bound memory allocation from a single request. Sandbox creation is
+/// the largest payload and well within this cap under normal use.
+const MAX_GRPC_DECODE_SIZE: usize = 1_048_576;
+
 /// Multiplexed gRPC/HTTP service.
 #[derive(Clone)]
 pub struct MultiplexService {
@@ -44,8 +51,10 @@ impl MultiplexService {
     where
         S: AsyncRead + AsyncWrite + Unpin + Send + 'static,
     {
-        let navigator = NavigatorServer::new(NavigatorService::new(self.state.clone()));
-        let inference = InferenceServer::new(InferenceService::new(self.state.clone()));
+        let navigator = NavigatorServer::new(NavigatorService::new(self.state.clone()))
+            .max_decoding_message_size(MAX_GRPC_DECODE_SIZE);
+        let inference = InferenceServer::new(InferenceService::new(self.state.clone()))
+            .max_decoding_message_size(MAX_GRPC_DECODE_SIZE);
         let grpc_service = GrpcRouter::new(navigator, inference);
         let http_service = http_router(self.state.clone());
 
diff --git a/crates/navigator-server/src/sandbox/mod.rs b/crates/navigator-server/src/sandbox/mod.rs
index bee41f68..2700f625 100644
--- a/crates/navigator-server/src/sandbox/mod.rs
+++ b/crates/navigator-server/src/sandbox/mod.rs
@@ -217,6 +217,7 @@ pub fn spawn_sandbox_watcher(
     client: SandboxClient,
     index: crate::sandbox_index::SandboxIndex,
     watch_bus: crate::sandbox_watch::SandboxWatchBus,
+    tracing_log_bus: crate::tracing_bus::TracingLogBus,
 ) {
     let namespace = client.namespace().to_string();
     info!(namespace = %namespace, "Starting sandbox watcher");
@@ -240,7 +241,9 @@ pub fn spawn_sandbox_watcher(
                     Event::Deleted(obj) => {
                         let obj_name = obj.metadata.name.clone().unwrap_or_default();
                         debug!(sandbox_name = %obj_name, "Received Deleted event from Kubernetes");
-                        if let Err(err) = handle_deleted(&store, &index, &watch_bus, obj).await {
+                        if let Err(err) =
+                            handle_deleted(&store, &index, &watch_bus, &tracing_log_bus, obj).await
+                        {
                             warn!(sandbox_name = %obj_name, error = %err, "Failed to delete sandbox record");
                         }
                     }
@@ -363,6 +366,7 @@ async fn handle_deleted(
     store: &Store,
     index: &crate::sandbox_index::SandboxIndex,
     watch_bus: &crate::sandbox_watch::SandboxWatchBus,
+    tracing_log_bus: &crate::tracing_bus::TracingLogBus,
     obj: DynamicObject,
 ) -> Result<(), String> {
     let id = sandbox_id_from_object(&obj)?;
@@ -373,6 +377,12 @@ async fn handle_deleted(
     debug!(sandbox_id = %id, deleted, "Deleted sandbox record");
     index.remove_sandbox(&id);
     watch_bus.notify(&id);
+
+    // Clean up bus entries to prevent unbounded memory growth.
+    tracing_log_bus.remove(&id);
+    tracing_log_bus.platform_event_bus.remove(&id);
+    watch_bus.remove(&id);
+
     Ok(())
 }
 
diff --git a/crates/navigator-server/src/sandbox_watch.rs b/crates/navigator-server/src/sandbox_watch.rs
index 9c789aee..78a8cb26 100644
--- a/crates/navigator-server/src/sandbox_watch.rs
+++ b/crates/navigator-server/src/sandbox_watch.rs
@@ -57,6 +57,15 @@ impl SandboxWatchBus {
     pub fn subscribe(&self, sandbox_id: &str) -> broadcast::Receiver<()> {
         self.sender_for(sandbox_id).subscribe()
     }
+
+    /// Remove the bus entry for the given sandbox id.
+    ///
+    /// This drops the broadcast sender, closing any active receivers with
+    /// `RecvError::Closed`.
+    pub fn remove(&self, sandbox_id: &str) {
+        let mut inner = self.inner.lock().expect("sandbox watch bus lock poisoned");
+        inner.remove(sandbox_id);
+    }
 }
 
 /// Spawn a background Kubernetes Event tailer.
@@ -160,6 +169,53 @@ fn map_kube_event_to_platform(
     ))
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sandbox_watch_bus_remove_cleans_up() {
+        let bus = SandboxWatchBus::new();
+        let sandbox_id = "sb-1";
+
+        let mut rx = bus.subscribe(sandbox_id);
+
+        // Notify and receive
+        bus.notify(sandbox_id);
+        assert!(rx.try_recv().is_ok());
+
+        // Remove
+        bus.remove(sandbox_id);
+
+        // Receiver should be closed
+        match rx.try_recv() {
+            Err(broadcast::error::TryRecvError::Closed) => {} // expected
+            other => panic!("expected Closed, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_watch_bus_subscribe_after_remove_creates_fresh_channel() {
+        let bus = SandboxWatchBus::new();
+        let sandbox_id = "sb-2";
+
+        let _old_rx = bus.subscribe(sandbox_id);
+        bus.remove(sandbox_id);
+
+        // New subscription should work
+        let mut new_rx = bus.subscribe(sandbox_id);
+        bus.notify(sandbox_id);
+        assert!(new_rx.try_recv().is_ok());
+    }
+
+    #[test]
+    fn sandbox_watch_bus_remove_nonexistent_is_noop() {
+        let bus = SandboxWatchBus::new();
+        // Should not panic
+        bus.remove("nonexistent");
+    }
+}
+
 /// Helper to translate broadcast lag into a gRPC status.
 pub fn broadcast_to_status(err: broadcast::error::RecvError) -> Status {
     match err {
diff --git a/crates/navigator-server/src/tracing_bus.rs b/crates/navigator-server/src/tracing_bus.rs
index 3ad5200e..f159e546 100644
--- a/crates/navigator-server/src/tracing_bus.rs
+++ b/crates/navigator-server/src/tracing_bus.rs
@@ -75,6 +75,16 @@ impl TracingLogBus {
         self.sender_for(sandbox_id).subscribe()
     }
 
+    /// Remove all bus entries for the given sandbox id.
+    ///
+    /// This drops the broadcast sender (closing any active receivers with
+    /// `RecvError::Closed`) and frees the tail buffer.
+    pub fn remove(&self, sandbox_id: &str) {
+        let mut inner = self.inner.lock().expect("tracing bus lock poisoned");
+        inner.per_id.remove(sandbox_id);
+        inner.tails.remove(sandbox_id);
+    }
+
     pub fn tail(&self, sandbox_id: &str, max: usize) -> Vec<SandboxStreamEvent> {
         let inner = self.inner.lock().expect("tracing bus lock poisoned");
         inner
@@ -186,6 +196,129 @@ fn current_time_ms() -> Option<i64> {
     i64::try_from(now.as_millis()).ok()
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_log_event(sandbox_id: &str, message: &str) -> SandboxLogLine {
+        SandboxLogLine {
+            sandbox_id: sandbox_id.to_string(),
+            timestamp_ms: 1000,
+            level: "INFO".to_string(),
+            target: "test".to_string(),
+            message: message.to_string(),
+            source: "gateway".to_string(),
+            fields: HashMap::new(),
+        }
+    }
+
+    #[test]
+    fn tracing_log_bus_remove_cleans_up_all_maps() {
+        let bus = TracingLogBus::new();
+        let sandbox_id = "sb-1";
+
+        // Create entries via subscribe and publish
+        let _rx = bus.subscribe(sandbox_id);
+        bus.publish_external(make_log_event(sandbox_id, "hello"));
+
+        // Verify entries exist
+        assert_eq!(bus.tail(sandbox_id, 10).len(), 1);
+
+        // Remove
+        bus.remove(sandbox_id);
+
+        // Verify entries are gone
+        assert!(bus.tail(sandbox_id, 10).is_empty());
+    }
+
+    #[test]
+    fn tracing_log_bus_subscribe_after_remove_creates_fresh_channel() {
+        let bus = TracingLogBus::new();
+        let sandbox_id = "sb-2";
+
+        // Create and remove
+        bus.publish_external(make_log_event(sandbox_id, "old message"));
+        bus.remove(sandbox_id);
+
+        // Subscribe again — should get a fresh channel with no history
+        let mut rx = bus.subscribe(sandbox_id);
+        assert!(bus.tail(sandbox_id, 10).is_empty());
+
+        // New publish should reach the new subscriber
+        bus.publish_external(make_log_event(sandbox_id, "new message"));
+        let evt = rx.try_recv().expect("should receive new event");
+        assert!(evt.payload.is_some());
+    }
+
+    #[test]
+    fn tracing_log_bus_remove_closes_active_receivers() {
+        let bus = TracingLogBus::new();
+        let sandbox_id = "sb-3";
+
+        let mut rx = bus.subscribe(sandbox_id);
+
+        // Remove drops the sender
+        bus.remove(sandbox_id);
+
+        // Existing receiver should get Closed error
+        match rx.try_recv() {
+            Err(broadcast::error::TryRecvError::Closed) => {} // expected
+            other => panic!("expected Closed, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn tracing_log_bus_remove_nonexistent_is_noop() {
+        let bus = TracingLogBus::new();
+        // Should not panic
+        bus.remove("nonexistent");
+    }
+
+    #[test]
+    fn platform_event_bus_remove_cleans_up() {
+        let bus = PlatformEventBus::new();
+        let sandbox_id = "sb-4";
+
+        let mut rx = bus.subscribe(sandbox_id);
+
+        // Publish an event
+        let evt = SandboxStreamEvent { payload: None };
+        bus.publish(sandbox_id, evt);
+        assert!(rx.try_recv().is_ok());
+
+        // Remove
+        bus.remove(sandbox_id);
+
+        // Receiver should be closed
+        match rx.try_recv() {
+            Err(broadcast::error::TryRecvError::Closed) => {} // expected
+            other => panic!("expected Closed, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn platform_event_bus_subscribe_after_remove_creates_fresh_channel() {
+        let bus = PlatformEventBus::new();
+        let sandbox_id = "sb-5";
+
+        let _old_rx = bus.subscribe(sandbox_id);
+        bus.remove(sandbox_id);
+
+        // New subscription should work
+        let mut new_rx = bus.subscribe(sandbox_id);
+        let evt = SandboxStreamEvent { payload: None };
+        bus.publish(sandbox_id, evt);
+        assert!(new_rx.try_recv().is_ok());
+    }
+
+    #[test]
+    fn platform_event_bus_remove_nonexistent_is_noop() {
+        let bus = PlatformEventBus::new();
+        // Should not panic
+        bus.remove("nonexistent");
+    }
+}
+
 /// Separate bus for platform event stream events.
 ///
 /// This keeps platform events isolated from tracing capture.
@@ -220,4 +353,12 @@ impl PlatformEventBus {
         let tx = self.sender_for(sandbox_id);
         let _ = tx.send(event);
     }
+
+    /// Remove the bus entry for the given sandbox id.
+    ///
+    /// This drops the broadcast sender, closing any active receivers.
+    pub(crate) fn remove(&self, sandbox_id: &str) {
+        let mut inner = self.inner.lock().expect("platform event bus lock poisoned");
+        inner.remove(sandbox_id);
+    }
 }
diff --git a/crates/navigator-tui/Cargo.toml b/crates/navigator-tui/Cargo.toml
index 2f8def04..a5075009 100644
--- a/crates/navigator-tui/Cargo.toml
+++ b/crates/navigator-tui/Cargo.toml
@@ -3,7 +3,7 @@
 
 [package]
 name = "navigator-tui"
-description = "Gator – NemoClaw interactive TUI"
+description = "NemoClaw interactive TUI"
 version.workspace = true
 edition.workspace = true
 rust-version.workspace = true
diff --git a/crates/navigator-tui/src/lib.rs b/crates/navigator-tui/src/lib.rs
index 1638a3d4..d498989d 100644
--- a/crates/navigator-tui/src/lib.rs
+++ b/crates/navigator-tui/src/lib.rs
@@ -25,7 +25,7 @@ use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity
 use app::{App, ClusterEntry, Focus, LogLine, Screen};
 use event::{Event, EventHandler};
 
-/// Launch the Gator TUI.
+/// Launch the NemoClaw TUI.
 ///
 /// `channel` must be a connected gRPC channel to the NemoClaw gateway.
 pub async fn run(channel: Channel, cluster_name: &str, endpoint: &str) -> Result<()> {
@@ -963,19 +963,6 @@ fn render_policy_lines(
         lines.push(Line::from(""));
     }
 
-    // --- Inference ---
-    if let Some(inference) = &policy.inference {
-        if !inference.allowed_routes.is_empty() {
-            lines.push(Line::from(Span::styled("Inference", styles::HEADING)));
-            let routes = inference.allowed_routes.join(", ");
-            lines.push(Line::from(vec![
-                Span::styled("  Allowed routes: ", styles::MUTED),
-                Span::styled(routes, styles::TEXT),
-            ]));
-            lines.push(Line::from(""));
-        }
-    }
-
     // --- Network Rules ---
     if !policy.network_policies.is_empty() {
         // Sort keys for deterministic display.
diff --git a/crates/navigator-tui/src/ui/mod.rs b/crates/navigator-tui/src/ui/mod.rs
index b6e1cc47..a0c45a3b 100644
--- a/crates/navigator-tui/src/ui/mod.rs
+++ b/crates/navigator-tui/src/ui/mod.rs
@@ -98,7 +98,7 @@ fn draw_title_bar(frame: &mut Frame<'_>, app: &App, area: Rect) {
     };
 
     let mut parts: Vec<Span<'_>> = vec![
-        Span::styled(" Gator", styles::ACCENT_BOLD),
+        Span::styled(" NemoClaw", styles::ACCENT_BOLD),
         Span::styled(" | ", styles::MUTED),
         Span::styled("Current Cluster: ", styles::TEXT),
         Span::styled(&app.cluster_name, styles::HEADING),
diff --git a/deploy/docker/Dockerfile.ci b/deploy/docker/Dockerfile.ci
index 4bd2a595..935573cd 100644
--- a/deploy/docker/Dockerfile.ci
+++ b/deploy/docker/Dockerfile.ci
@@ -8,8 +8,8 @@
 
 FROM ubuntu:24.04
 
-ARG DOCKER_VERSION=27.5.1
-ARG BUILDX_VERSION=v0.21.1
+ARG DOCKER_VERSION=29.3.0
+ARG BUILDX_VERSION=v0.32.1
 ARG TARGETARCH
 
 ENV DEBIAN_FRONTEND=noninteractive
@@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     pkg-config \
     libssl-dev \
+    openssh-client \
     python3 \
     python3-venv \
     cmake \
diff --git a/deploy/docker/Dockerfile.cluster b/deploy/docker/Dockerfile.cluster
index 823b6259..1c5e7421 100644
--- a/deploy/docker/Dockerfile.cluster
+++ b/deploy/docker/Dockerfile.cluster
@@ -15,7 +15,7 @@
 # The helm charts are built by the docker:build:cluster mise task
 # and placed in deploy/docker/.build/ before this Dockerfile is built.
 
-ARG K3S_VERSION=v1.29.8-k3s1
+ARG K3S_VERSION=v1.35.2-k3s1
 FROM rancher/k3s:${K3S_VERSION}
 
 # Create directories for manifests, charts, and configuration
diff --git a/deploy/docker/Dockerfile.server b/deploy/docker/Dockerfile.server
index e387fe75..e31af530 100644
--- a/deploy/docker/Dockerfile.server
+++ b/deploy/docker/Dockerfile.server
@@ -10,6 +10,7 @@
 FROM --platform=$BUILDPLATFORM rust:1.88-slim AS builder
 ARG TARGETARCH
 ARG BUILDARCH
+ARG NEMOCLAW_CARGO_VERSION
 ARG CARGO_TARGET_CACHE_SCOPE=default
 
 # Install build dependencies
@@ -71,6 +72,9 @@ RUN --mount=type=cache,id=cargo-registry-server-${TARGETARCH},sharing=locked,tar
     --mount=type=cache,id=cargo-target-server-${TARGETARCH}-${CARGO_TARGET_CACHE_SCOPE},sharing=locked,target=/build/target \
     --mount=type=cache,id=sccache-server-${TARGETARCH},sharing=locked,target=/tmp/sccache \
     . cross-build.sh && \
+    if [ -n "${NEMOCLAW_CARGO_VERSION:-}" ]; then \
+      sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "'"${NEMOCLAW_CARGO_VERSION}"'"/}' Cargo.toml; \
+    fi && \
     cargo_cross_build --release -p navigator-server && \
     cp "$(cross_output_dir release)/navigator-server" /build/navigator-server
 
diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh
index 8083c89c..9cca0ff0 100644
--- a/deploy/docker/cluster-entrypoint.sh
+++ b/deploy/docker/cluster-entrypoint.sh
@@ -252,7 +252,7 @@ fi
 # Generate a random SSH handshake secret for the NSSH1 HMAC handshake between
 # the gateway and sandbox SSH servers. This is required — the server will refuse
 # to start without it.
-SSH_HANDSHAKE_SECRET="${SSH_HANDSHAKE_SECRET:-$(openssl rand -hex 32)}"
+SSH_HANDSHAKE_SECRET="${SSH_HANDSHAKE_SECRET:-$(head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n')}"
 
 # Inject SSH gateway host/port into the HelmChart manifest so the navigator
 # server returns the correct address to CLI clients for SSH proxy CONNECT.
diff --git a/deploy/docker/sandbox/Dockerfile.base b/deploy/docker/sandbox/Dockerfile.base
index f9879b5c..082502f6 100644
--- a/deploy/docker/sandbox/Dockerfile.base
+++ b/deploy/docker/sandbox/Dockerfile.base
@@ -107,7 +107,7 @@ RUN groupadd -r supervisor && useradd -r -g supervisor -s /usr/sbin/nologin supe
 # Stage 3: Python dependencies builder
 FROM base AS builder
 
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+COPY --from=ghcr.io/astral-sh/uv:0.10.8 /uv /usr/local/bin/uv
 
 # Copy project files for dependency resolution
 COPY pyproject.toml uv.lock ./
@@ -117,8 +117,9 @@ RUN uv venv /app/.venv && \
     uv sync --frozen --no-dev --no-install-project 2>/dev/null || \
     uv sync --no-dev --no-install-project 2>/dev/null || true
 
-# Install navigator SDK dependencies
-RUN uv pip install --python /app/.venv/bin/python --no-cache-dir cloudpickle grpcio protobuf openai
+# Install navigator SDK dependencies and pip (uv venvs don't include pip by
+# default, and sandbox users need it for `pip install` inside the sandbox).
+RUN uv pip install --python /app/.venv/bin/python --no-cache-dir cloudpickle grpcio protobuf openai pip
 
 # Stage 4: Coding agents layer
 FROM base AS coding-agents
@@ -128,7 +129,7 @@ FROM base AS coding-agents
 # Include a minimal native toolchain so npm can compile optional native
 # dependencies on platforms where prebuilt artifacts are unavailable.
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
-    apt-get install -y --no-install-recommends build-essential git nodejs python3 vim-tiny nano && \
+    apt-get install -y --no-install-recommends build-essential git nodejs python3 nano && \
     rm -rf /var/lib/apt/lists/*
 
 # Install GitHub CLI (gh) from the official apt repository
@@ -144,8 +145,11 @@ RUN curl -fsSL https://claude.ai/install.sh | bash \
     && cp /root/.local/bin/claude /usr/local/bin/claude \
     && chmod 755 /usr/local/bin/claude
 
-# Install OpenCode CLI and Codex CLI (OpenAI)
-RUN npm install -g opencode-ai @openai/codex openclaw
+# Install OpenCode CLI, Codex CLI (OpenAI), and OpenClaw with pinned versions
+# for reproducible builds. Force-upgrade tar afterward to resolve transitive
+# dependency vulnerabilities (GHSA-r6q2-hw4h-h46w, GHSA-qffp-2rhf-9h96, etc.)
+RUN npm install -g opencode-ai@1.2.18 @openai/codex@0.111.0 openclaw@2026.3.2 && \
+    npm install -g tar@7.5.10
 
 # Install ai-pim-utils (NVIDIA PIM CLI tools: outlook, calendar, transcripts, etc.)
 # The install script auto-detects Debian and installs via .deb package to /usr/bin.
@@ -163,8 +167,14 @@ COPY --from=rust-builder /build/out/navigator-sandbox /usr/local/bin/
 # Copy navigator Python SDK into the virtual environment
 COPY python/navigator/ /app/.venv/lib/python3.12/site-packages/navigator/
 
-# Add venv to PATH
-ENV PATH="/app/.venv/bin:$PATH"
+# Add venvs to PATH -- /sandbox/.venv (writable, user-installed packages)
+# takes priority over /app/.venv (read-only, build-time packages).
+# ssh.rs inherits PATH at runtime so it stays in sync with this layout.
+# VIRTUAL_ENV and UV_PYTHON_INSTALL_DIR are also exported in .bashrc
+# so that login shell sessions (interactive and exec) see them.
+ENV PATH="/sandbox/.venv/bin:/app/.venv/bin:/usr/local/bin:/usr/bin:/bin" \
+    VIRTUAL_ENV="/sandbox/.venv" \
+    UV_PYTHON_INSTALL_DIR="/sandbox/.uv/python"
 
 # Copy custom navigator skills into the image
 # To add a skill, create a subdirectory under deploy/docker/sandbox/skills/
@@ -181,11 +191,16 @@ RUN mkdir -p /var/navigator /sandbox /var/log && \
     chown supervisor:supervisor /var/log/navigator.log && \
     chmod 0664 /var/log/navigator.log && \
     chown sandbox:sandbox /sandbox && \
+    # Create a writable venv that inherits all packages from the read-only
+    # /app/.venv.  Sandbox users can `pip install` or `uv pip install` into
+    # this venv without touching the base image layer.
+    uv venv --python /app/.venv/bin/python --seed --system-site-packages /sandbox/.venv && \
+    chown -R sandbox:sandbox /sandbox/.venv && \
     # Minimal shell init files so interactive and non-interactive shells
     # get a sane PATH and prompt.  Without these, bash sources nothing
     # under /sandbox and tools like VS Code Remote-SSH may mis-detect
     # the platform.
-    printf 'export PATH="/app/.venv/bin:$PATH"\nexport PS1="\\u@\\h:\\w\\$ "\n' \
+    printf 'export PATH="/sandbox/.venv/bin:/app/.venv/bin:/usr/local/bin:/usr/bin:/bin"\nexport VIRTUAL_ENV="/sandbox/.venv"\nexport UV_PYTHON_INSTALL_DIR="/sandbox/.uv/python"\nexport PS1="\\u@\\h:\\w\\$ "\n' \
     > /sandbox/.bashrc && \
     printf '[ -f ~/.bashrc ] && . ~/.bashrc\n' > /sandbox/.profile && \
     chown sandbox:sandbox /sandbox/.bashrc /sandbox/.profile && \
diff --git a/deploy/docker/sandbox/dev-sandbox-policy.yaml b/deploy/docker/sandbox/dev-sandbox-policy.yaml
index 29cdd8f0..4b539834 100644
--- a/deploy/docker/sandbox/dev-sandbox-policy.yaml
+++ b/deploy/docker/sandbox/dev-sandbox-policy.yaml
@@ -95,6 +95,28 @@ network_policies:
       - { path: /usr/local/bin/claude }
       - { path: /usr/bin/gh }
 
+  pypi:
+    name: pypi
+    endpoints:
+      - { host: pypi.org, port: 443 }
+      - { host: files.pythonhosted.org, port: 443 }
+      # uv python install downloads from python-build-standalone on GitHub
+      - { host: github.com, port: 443 }
+      - { host: objects.githubusercontent.com, port: 443 }
+      # uv resolves python-build-standalone release metadata via the GitHub API
+      - { host: api.github.com, port: 443 }
+      - { host: downloads.python.org, port: 443 }
+    binaries:
+      - { path: /sandbox/.venv/bin/python }
+      - { path: /sandbox/.venv/bin/python3 }
+      - { path: /sandbox/.venv/bin/pip }
+      - { path: /app/.venv/bin/python }
+      - { path: /app/.venv/bin/python3 }
+      - { path: /app/.venv/bin/pip }
+      - { path: /usr/local/bin/uv }
+      # Managed Python installations from uv python install
+      - { path: "/sandbox/.uv/python/**" }
+
   vscode:
     name: vscode
     endpoints:
@@ -108,7 +130,3 @@ network_policies:
       - { path: /usr/bin/wget }
       - { path: "/sandbox/.vscode-server/**" }
       - { path: "/sandbox/.vscode-remote-containers/**" }
-
-inference:
-  allowed_routes:
-    - local
diff --git a/docs/about/architecture.md b/docs/about/architecture.md
new file mode 100644
index 00000000..8c0647b5
--- /dev/null
+++ b/docs/about/architecture.md
@@ -0,0 +1,81 @@
+---
+title:
+  page: "Architecture Overview"
+  nav: "Architecture"
+description: "High-level overview of the OpenShell architecture: gateway, sandboxes, policy engine, and privacy router."
+keywords: ["openshell architecture", "sandbox architecture", "agent isolation", "k3s", "policy engine"]
+topics: ["generative_ai", "cybersecurity"]
+tags: ["ai_agents", "sandboxing", "security", "architecture"]
+content:
+  type: concept
+  difficulty: technical_beginner
+  audience: [engineer, data_scientist]
+---
+
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# How OpenShell Works
+
+OpenShell runs as a [k3s](https://k3s.io/) Kubernetes cluster inside a Docker container.
+Each sandbox is an isolated Kubernetes pod managed by the OpenShell control plane.
+Four components work together to keep agents secure.
+
+```{mermaid}
+flowchart LR
+    CLI["CLI"] -->|gRPC| GW["Gateway"]
+    GW --> SBX["Sandbox"]
+
+    subgraph SBX["Sandbox"]
+        direction TB
+        AGENT["Agent Process"] -->|All traffic| PROXY["Network Proxy"]
+        PROXY -->|Evaluate| OPA["Policy Engine"]
+    end
+
+    PROXY -->|Allowed traffic| EXT["External Services"]
+
+    style CLI fill:#ffffff,stroke:#000000,color:#000000
+    style GW fill:#76b900,stroke:#000000,color:#000000
+    style SBX fill:#f5f5f5,stroke:#000000,color:#000000
+    style AGENT fill:#ffffff,stroke:#000000,color:#000000
+    style PROXY fill:#76b900,stroke:#000000,color:#000000
+    style OPA fill:#76b900,stroke:#000000,color:#000000
+    style EXT fill:#ffffff,stroke:#000000,color:#000000
+
+    linkStyle default stroke:#76b900,stroke-width:2px
+```
+
+## Components
+
+OpenShell consists of the following components.
+
+Gateway
+: The control-plane API that manages sandbox lifecycle, stores encrypted credentials, distributes policies, and terminates SSH tunnels. The CLI communicates exclusively with the gateway—it never talks to sandbox pods directly.
+
+Sandbox
+: An isolated pod that runs your agent. Each sandbox contains a **supervisor** (sets up isolation and starts the agent), an **L7 proxy** (intercepts and evaluates every outbound connection), and the agent process itself.
+
+Policy Engine
+: Evaluates declarative YAML policies that define filesystem, network, and process constraints. The proxy queries the engine on every outbound connection. Policies can be hot-reloaded without restarting the agent.
+
+Privacy Router
+: Intercepts LLM API calls and routes them to local or self-hosted backends based on your routing policy. Sensitive prompts and completions stay on infrastructure you control.
+
+## How a Request Flows
+
+OpenShell works in the following way:
+
+1. The agent makes an outbound connection (for example, an API call).
+2. The L7 proxy intercepts the connection and identifies the calling process.
+3. The proxy queries the policy engine with the destination and process identity.
+4. Based on the policy decision, the proxy either allows the connection, routes it through the privacy router for inference, or denies it.
+
+## Remote Deployment
+
+OpenShell can also run on a remote host. Deploy with `openshell gateway start --remote user@host`, then set up a tunnel with `openshell gateway tunnel`. The architecture is identical—only the Docker container location changes.
+
+---
+
+For detailed component internals, refer to the [Architecture Reference](../reference/architecture.md).
diff --git a/docs/about/index.md b/docs/about/index.md
deleted file mode 100644
index 73a57815..00000000
--- a/docs/about/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# About NemoClaw
diff --git a/docs/about/overview.md b/docs/about/overview.md
new file mode 100644
index 00000000..fd30b039
--- /dev/null
+++ b/docs/about/overview.md
@@ -0,0 +1,80 @@
+---
+title:
+  page: "Overview of NVIDIA OpenShell"
+  nav: "Overview"
+description: "OpenShell is the safe, private runtime for autonomous AI agents. Run agents in sandboxed environments that protect your data, credentials, and infrastructure."
+keywords: ["openshell", "ai agent sandbox", "agent security", "agent isolation", "inference routing"]
+topics: ["generative_ai", "cybersecurity"]
+tags: ["ai_agents", "sandboxing", "security", "privacy", "inference_routing"]
+content:
+  type: concept
+  difficulty: technical_beginner
+  audience: [engineer, data_scientist]
+---
+
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Overview of NVIDIA OpenShell
+
+NVIDIA OpenShell is an open-source runtime that executes autonomous AI agents inside sandboxed environments with kernel-level isolation. It prevents agents from accessing unauthorized files, exfiltrating data, leaking credentials, or making uncontrolled network requests. A single declarative YAML policy governs filesystem, network, process, and inference protections across all sandboxes and is hot-reloadable without restarting running agents.
+
+## Common Challenges with AI Agents
+
+AI agents are most useful when they have broad access to reading files, installing packages, calling APIs, and using credentials. However, this same access makes them dangerous. An unrestricted agent can leak your API keys, send proprietary code to unauthorized endpoints, or reach infrastructure it was never meant to touch.
+
+Conventional containers do not solve this. They isolate processes from the host, but they do not control what an agent does *inside* the container — which files it reads, which hosts it contacts, or where it sends your prompts.
+
+## Benefits of Using OpenShell
+
+OpenShell addresses these risks through defense-in-depth enforcement across four policy domains: filesystem, network, process, and inference.
+
+:::{dropdown} 🛡️ Kernel-Level Isolation
+OpenShell enforces isolation at the Linux kernel level using [Landlock](https://docs.kernel.org/security/landlock.html) for filesystem restrictions, seccomp for system call filtering, and network namespaces for traffic control. These mechanisms operate below the application layer, so agents cannot bypass them regardless of the tools or languages they use.
+:::
+
+:::{dropdown} 📜 Declarative Policy Enforcement
+A single YAML policy file defines all security boundaries for a sandbox: allowed filesystem paths, permitted network destinations, restricted processes, and inference routing rules. Policies are hot-reloadable, so you can tighten or relax rules on a running sandbox without restarting the agent.
+:::
+
+:::{dropdown} 🔐 Credential Containment
+Credentials are injected into sandboxes as environment variables at startup and are scoped to the sandbox's isolated namespace. They cannot be read by processes outside the sandbox, and network policies prevent agents from transmitting them to unauthorized endpoints.
+:::
+
+:::{dropdown} 🔀 Private Inference Routing
+The built-in inference router intercepts LLM API calls and redirects them to local or self-hosted backends based on your routing policy. Sensitive prompts and completions stay on infrastructure you control. Routes are configurable per sandbox and can be updated without restarting agents.
+:::
+
+:::{dropdown} 🔍 Full L7 Traffic Inspection
+Every outbound TCP connection from a sandbox passes through an L7 proxy that resolves the calling process, evaluates the destination against the active policy, and either allows, denies, or reroutes the request. For REST endpoints, the proxy decrypts TLS, inspects HTTP method and path, and applies fine-grained access rules.
+:::
+
+## Use Cases
+
+The following are common use cases for OpenShell.
+
+:::{dropdown} 💻 Secure Coding Agents
+Run AI coding assistants such as Claude Code, OpenCode, or OpenClaw inside a sandbox where they can read and modify project files but cannot access SSH keys, cloud credentials, or files outside the project directory. Network policies restrict which package registries and APIs the agent can reach.
+:::
+
+:::{dropdown} 🏢 Private Enterprise Development
+Route all LLM inference through self-hosted NVIDIA NIM endpoints or private API backends. Proprietary source code and internal documentation stay on your infrastructure and are never sent to third-party LLM providers.
+:::
+
+:::{dropdown} ✅ Compliance and Audit
+Declarative policies serve as auditable security controls. Each sandbox runs under a well-defined policy that specifies exactly what the agent can access. Policy files can be version-controlled and reviewed as part of your security and compliance processes.
+:::
+
+:::{dropdown} 📦 Community and Custom Sandbox Images
+Use pre-built sandbox images from the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) catalog or bring your own container. Community sandboxes bundle domain-specific tools, policies, and skills, while custom containers let you package any environment your agents need.
+:::
+
+---
+
+## Next Steps
+
+- [Architecture Overview](architecture.md): Understand the components that make up the OpenShell runtime.
+- [Get Started](../index.md): Install the CLI and create your first sandbox.
+- [Security Model](../safety-and-privacy/security-model.md): Learn how OpenShell enforces isolation across all protection layers.
diff --git a/docs/about/release-notes.md b/docs/about/release-notes.md
new file mode 100644
index 00000000..1290a939
--- /dev/null
+++ b/docs/about/release-notes.md
@@ -0,0 +1,13 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Release Notes
+
+This page covers the highlights of each OpenShell release.
+For more details, refer to the [OpenShell GitHub Releases](https://github.com/NVIDIA/OpenShell/releases).
+
+## 0.1.0
+
+This is the first release of NVIDIA OpenShell. It introduces sandboxed AI agent execution with kernel-level isolation, policy enforcement, and credential management.
diff --git a/docs/conf.py b/docs/conf.py
index f1c2ff2f..d6dac204 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -7,7 +7,7 @@
 
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
-project = "NVIDIA NemoClaw Developer Guide"
+project = "NVIDIA OpenShell Developer Guide"
 this_year = date.today().year
 copyright = f"2025-{this_year}, NVIDIA Corporation"
 author = "NVIDIA Corporation"
@@ -38,6 +38,7 @@
 
 exclude_patterns = [
     "README.md",
+    "SETUP.md",
     "_build/**",
 ]
 
@@ -63,6 +64,23 @@
 html_show_sourcelink = False
 html_show_sphinx = False
 
+mermaid_init_js = (
+    "mermaid.initialize({"
+    "  startOnLoad: true,"
+    "  theme: 'base',"
+    "  themeVariables: {"
+    "    background: '#ffffff',"
+    "    primaryColor: '#76b900',"
+    "    primaryTextColor: '#000000',"
+    "    primaryBorderColor: '#000000',"
+    "    lineColor: '#000000',"
+    "    textColor: '#000000',"
+    "    mainBkg: '#ffffff',"
+    "    nodeBorder: '#000000'"
+    "  }"
+    "});"
+)
+
 html_domain_indices = False
 html_use_index = False
 highlight_language = "console"
@@ -71,9 +89,15 @@
     "icon_links": [
         {
             "name": "GitHub",
-            "url": "https://github.com/NVIDIA/NemoClaw",
+            "url": "https://github.com/NVIDIA/OpenShell",
             "icon": "fa-brands fa-github",
             "type": "fontawesome",
         },
+        {
+            "name": "PyPI",
+            "url": "https://pypi.org/project/openshell/",
+            "icon": "fa-brands fa-python",
+            "type": "fontawesome",
+        },
     ],
 }
diff --git a/docs/feature1/index.md b/docs/feature1/index.md
deleted file mode 100644
index debf3ae8..00000000
--- a/docs/feature1/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Feature 1
diff --git a/docs/feature2/index.md b/docs/feature2/index.md
deleted file mode 100644
index 9b14022e..00000000
--- a/docs/feature2/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Feature 2
diff --git a/docs/get-started/index.md b/docs/get-started/index.md
deleted file mode 100644
index 6452a1e9..00000000
--- a/docs/get-started/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Get Started
diff --git a/docs/get-started/quickstart.md b/docs/get-started/quickstart.md
new file mode 100644
index 00000000..6414c4b3
--- /dev/null
+++ b/docs/get-started/quickstart.md
@@ -0,0 +1,96 @@
+---
+title:
+  page: "Quickstart"
+  nav: "Quickstart"
+description: "Install the OpenShell CLI and create your first sandboxed AI agent in two commands."
+keywords: ["openshell install", "quickstart", "sandbox create", "getting started"]
+topics: ["generative_ai", "cybersecurity"]
+tags: ["ai_agents", "sandboxing", "installation", "quickstart"]
+content:
+  type: get_started
+  difficulty: technical_beginner
+  audience: [engineer, data_scientist]
+---
+
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Quickstart
+
+This page gets you from zero to a running, policy-enforced sandbox in two commands.
+
+## Prerequisites
+
+Before you begin, make sure you have:
+
+```{include} ../../README.md
+:start-after: <!-- quickstart-prereqs-start -->
+:end-before: <!-- quickstart-prereqs-end -->
+```
+
+## Install the OpenShell CLI
+
+```{include} ../../README.md
+:start-after: <!-- quickstart-install-start -->
+:end-before: <!-- quickstart-install-end -->
+```
+
+## Create Your First OpenShell Sandbox
+
+Choose the tab that matches your agent:
+
+::::{tab-set}
+
+:::{tab-item} Claude Code
+```console
+$ openshell sandbox create -- claude
+```
+
+```text
+✓ Runtime ready
+✓ Discovered Claude credentials (ANTHROPIC_API_KEY)
+✓ Created sandbox: keen-fox
+✓ Policy loaded (4 protection layers active)
+
+Connecting to keen-fox...
+```
+
+The CLI detects your `ANTHROPIC_API_KEY`, creates a provider, builds the sandbox, applies a default policy, and drops you into an interactive session. No additional configuration is required.
+:::
+
+:::{tab-item} Community Sandbox
+```console
+$ openshell sandbox create --from openclaw
+```
+
+The `--from` flag pulls a pre-built sandbox definition from the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) catalog. Each definition bundles a container image, a tailored policy, and optional skills into a single package.
+:::
+
+::::
+
+## What Happens Behind the Scenes
+
+When you create a sandbox, OpenShell activates the following protection layers.
+
+| Protection Layer        | Description                                                                                   |
+|------------------------|-----------------------------------------------------------------------------------------------|
+| Filesystem isolation   | The agent can only read and write paths that the policy explicitly permits.                   |
+| Network enforcement    | Outbound connections are denied by default. The policy allowlists specific hosts, ports, and binaries. |
+| Process restrictions   | The agent runs as a non-root user inside the container.                                       |
+| Inference privacy     | LLM API traffic is routed through a privacy-aware proxy. Credentials never leak outside the sandbox. |
+
+A single YAML policy file controls all four layers. You can hot-reload network and inference rules on a running sandbox without restarting it.
+
+:::{note}
+For OpenCode or Codex, the default policy does not cover the required endpoints. Follow the [Run OpenCode with NVIDIA Inference](run-opencode.md) tutorial for agent-specific setup.
+:::
+
+## Next Steps
+
+You now have a working sandbox. From here, you can:
+
+- Follow the [Tutorials](tutorials.md) for step-by-step walkthroughs with Claude Code, OpenClaw, and OpenCode.
+- Learn how sandboxes work in [Sandboxes](../sandboxes/create-and-manage.md).
+- Write your own policies in [Safety and Privacy](../safety-and-privacy/index.md).
diff --git a/docs/get-started/run-claude.md b/docs/get-started/run-claude.md
new file mode 100644
index 00000000..ec873ecd
--- /dev/null
+++ b/docs/get-started/run-claude.md
@@ -0,0 +1,89 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Run Claude Code Safely
+
+Create a sandbox with Claude Code: isolated environment, credentials injected, default policy applied. The default policy allows the Anthropic API, GitHub read-only (clone/fetch), and common development endpoints; other traffic is denied.
+
+## Prerequisites
+
+- **Docker** running (required for the OpenShell runtime). See {doc}`quickstart` for details.
+- **OpenShell CLI** installed (`pip install openshell` or from source).
+- **`ANTHROPIC_API_KEY`** set in your environment on the host.
+
+## Create the Sandbox
+
+```console
+$ openshell sandbox create -- claude
+```
+
+This command:
+
+1. Bootstraps the runtime (on first use: provisions a local k3s cluster in Docker; subsequent runs reuse it).
+2. Auto-discovers credentials from `ANTHROPIC_API_KEY` and creates a provider.
+3. Creates the sandbox with the default policy and drops you into an interactive SSH session.
+
+:::{note}
+First bootstrap can take a few minutes. Later sandbox creations are much faster.
+:::
+
+## Work Inside the Sandbox
+
+Start Claude Code:
+
+```console
+$ claude
+```
+
+Credentials are available as environment variables (e.g. `echo $ANTHROPIC_API_KEY`). Use `/sandbox` as the working directory. Git and common runtimes are available within policy limits.
+
+## Check Sandbox Status
+
+From a second terminal on the host:
+
+```console
+$ openshell sandbox list
+```
+
+For a live dashboard (status, connections, policy decisions):
+
+```console
+$ openshell term
+```
+
+## Connect from VS Code (Optional)
+
+Export SSH config, then connect with Remote-SSH to the host named after your sandbox:
+
+```console
+$ openshell sandbox ssh-config <name> >> ~/.ssh/config
+```
+
+Use `<name>` from `openshell sandbox list`, or the name you passed to `--name` at creation.
+
+## Clean Up
+
+Exit the sandbox shell (`exit` or Ctrl-D), then:
+
+```console
+$ openshell sandbox delete <name>
+```
+
+Use the sandbox name from `openshell sandbox list` or the one you set with `--name`.
+
+:::{tip}
+To keep the sandbox running after you disconnect, create with `--keep`:
+
+```console
+$ openshell sandbox create --keep -- claude
+```
+:::
+
+## Next Steps
+
+- {doc}`../sandboxes/create-and-manage`: Sandbox lifecycle and isolation model.
+- {doc}`../sandboxes/providers`: How credentials are injected.
+- {doc}`../safety-and-privacy/policies`: Customize or replace the default policy.
+- [Write Sandbox Policies (network access rules)](../safety-and-privacy/policies.md#network-access-rules): Network proxy and per-endpoint rules.
diff --git a/docs/get-started/run-openclaw.md b/docs/get-started/run-openclaw.md
new file mode 100644
index 00000000..35c892f5
--- /dev/null
+++ b/docs/get-started/run-openclaw.md
@@ -0,0 +1,73 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Run OpenClaw Safely
+
+Launch a sandbox with OpenClaw from the [OpenShell Community catalog](https://github.com/NVIDIA/OpenShell-Community) using `--from openclaw`. The definition includes a container image, policy, and optional skills.
+
+## Prerequisites
+
+- **Docker** running. See {doc}`quickstart` for details.
+- **OpenShell CLI** installed.
+- **NVIDIA GPU** with [supported drivers](https://docs.nvidia.com/datacenter/tesla/drivers/) (required for OpenClaw).
+
+## Create the Sandbox
+
+```console
+$ openshell sandbox create --from openclaw --keep
+```
+
+- `--from openclaw`: Fetches the OpenClaw definition from the community catalog, builds the image locally, and applies the bundled policy.
+- `--keep`: Keeps the sandbox running after creation so you can connect and disconnect without recreating.
+
+:::{note}
+First build can take longer while Docker pulls base layers and installs dependencies. Later creates reuse the cached image.
+:::
+
+## Connect to the Sandbox
+
+```console
+$ openshell sandbox connect <name>
+```
+
+Use `<name>` from the creation output, or from `openshell sandbox list` if you did not pass `--name`.
+
+## Explore the Environment
+
+The image is pre-configured for OpenClaw: tools, runtimes, and policy are set. You can start working without policy changes.
+
+## Inspect the Bundled Policy
+
+To see what the sandbox is allowed to do:
+
+```console
+$ openshell policy get <name> --full
+```
+
+Review network policies (hosts, ports, binaries), filesystem policy, process restrictions, and inference rules. Saving to a file is useful for reference or customization:
+
+```console
+$ openshell policy get <name> --full > openclaw-policy.yaml
+```
+
+## Clean Up
+
+Exit the sandbox (`exit`), then:
+
+```console
+$ openshell sandbox delete <name>
+```
+
+Use the sandbox name from `openshell sandbox list` or from `--name`.
+
+:::{note}
+To contribute a sandbox definition, see [OpenShell-Community](https://github.com/NVIDIA/OpenShell-Community).
+:::
+
+## Next Steps
+
+- {doc}`../sandboxes/community-sandboxes`: Community definitions, images, and how to contribute.
+- {doc}`../safety-and-privacy/policies`: Policy format and customization.
+- {doc}`../sandboxes/create-and-manage`: Isolation model and lifecycle.
diff --git a/docs/get-started/run-opencode.md b/docs/get-started/run-opencode.md
new file mode 100644
index 00000000..a2613e33
--- /dev/null
+++ b/docs/get-started/run-opencode.md
@@ -0,0 +1,231 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Run OpenCode with NVIDIA Inference
+
+Run [OpenCode](https://opencode.ai) in a OpenShell sandbox with inference routed to NVIDIA API endpoints. You will hit a policy denial, diagnose it from logs, apply a custom policy, and configure inference routing — the same iteration loop used for any new tool.
+
+## Prerequisites
+
+- **Docker** running. See {doc}`quickstart` for details.
+- **OpenShell CLI** installed.
+- **`NVIDIA_API_KEY`** set on the host with a valid NVIDIA API key.
+
+## Create the Provider
+
+Create a provider explicitly (unlike the Claude tutorial where the CLI auto-discovers):
+
+```console
+$ openshell provider create --name nvidia --type nvidia --from-existing
+```
+
+`--from-existing` reads `NVIDIA_API_KEY` from the environment. Verify:
+
+```console
+$ openshell provider list
+```
+
+## Create the Sandbox
+
+```console
+$ openshell sandbox create --name opencode-sandbox --provider nvidia --keep -- opencode
+```
+
+`--keep` keeps the sandbox running for the following steps. The default policy is built for Claude, not OpenCode, so OpenCode’s endpoints will be denied until you add a custom policy.
+
+## Hit a Policy Denial
+
+Use OpenCode in the sandbox; calls to NVIDIA inference will fail. In a second terminal, tail logs:
+
+```console
+$ openshell logs opencode-sandbox --tail
+```
+
+Or use `openshell term` for a live view. Look for lines such as:
+
+```
+action=deny  host=integrate.api.nvidia.com  binary=/usr/local/bin/opencode  reason="no matching network policy"
+action=deny  host=opencode.ai               binary=/usr/bin/node            reason="no matching network policy"
+```
+
+Each line gives host, binary, and reason. Use this to decide what to allow in the policy.
+
+## Understand the Denial
+
+The default policy has a `nvidia_inference` entry for a narrow set of binaries (e.g. `/usr/local/bin/claude`, `/usr/bin/node`). OpenCode uses different binaries, and the default has no entry for `opencode.ai`. OpenShell denies by default; you must add a policy that allows the endpoints and binaries OpenCode needs.
+
+## Write a Custom Policy
+
+Create `opencode-policy.yaml` with the content below. It adds `opencode_api`, broadens `nvidia_inference` binaries, sets `inference.allowed_routes` to `nvidia`, and includes GitHub access for OpenCode.
+
+```yaml
+version: 1
+inference:
+  allowed_routes:
+    - nvidia
+filesystem_policy:
+  include_workdir: true
+  read_only:
+    - /usr
+    - /lib
+    - /proc
+    - /dev/urandom
+    - /app
+    - /etc
+    - /var/log
+  read_write:
+    - /sandbox
+    - /tmp
+    - /dev/null
+landlock:
+  compatibility: best_effort
+process:
+  run_as_user: sandbox
+  run_as_group: sandbox
+network_policies:
+  opencode_api:
+    name: opencode-api
+    endpoints:
+      - host: opencode.ai
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: full
+    binaries:
+      - path: /usr/local/bin/opencode
+      - path: /usr/bin/node
+  nvidia_inference:
+    name: nvidia-inference
+    endpoints:
+      - host: integrate.api.nvidia.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: full
+    binaries:
+      - path: /usr/local/bin/opencode
+      - path: /usr/bin/node
+      - path: /usr/bin/curl
+      - path: /bin/bash
+  npm_registry:
+    name: npm-registry
+    endpoints:
+      - host: registry.npmjs.org
+        port: 443
+    binaries:
+      - path: /usr/bin/npm
+      - path: /usr/bin/node
+      - path: /usr/local/bin/npm
+      - path: /usr/local/bin/node
+  github_rest_api:
+    name: github-rest-api
+    endpoints:
+      - host: api.github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: read-only
+    binaries:
+      - path: /usr/local/bin/opencode
+      - path: /usr/bin/node
+      - path: /usr/bin/gh
+  github_ssh_over_https:
+    name: github-ssh-over-https
+    endpoints:
+      - host: github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        rules:
+          - allow:
+              method: GET
+              path: "/**/info/refs*"
+          - allow:
+              method: POST
+              path: "/**/git-upload-pack"
+    binaries:
+      - path: /usr/bin/git
+```
+
+This policy differs from the default in four key ways:
+
+- `opencode_api`: Allows OpenCode and Node.js to reach `opencode.ai:443`.
+- Broader `nvidia_inference` binaries: Adds `/usr/local/bin/opencode`, `/usr/bin/curl`, and `/bin/bash` so OpenCode's subprocesses can reach the NVIDIA endpoint.
+- `inference.allowed_routes`: Includes `nvidia` so inference routing works for userland code.
+- GitHub access: Scoped to support OpenCode's git operations.
+
+:::{warning}
+The `filesystem_policy`, `landlock`, and `process` sections are static. They are set at sandbox creation time and cannot be changed on a running sandbox. To modify these, delete and recreate the sandbox. The `network_policies` and `inference` sections are dynamic and can be hot-reloaded.
+:::
+
+## Apply the Policy
+
+Push your custom policy to the running sandbox:
+
+```console
+$ openshell policy set opencode-sandbox --policy opencode-policy.yaml --wait
+```
+
+The `--wait` flag blocks until the sandbox confirms the policy is loaded.
+
+Verify the policy revision was accepted:
+
+```console
+$ openshell policy list opencode-sandbox
+```
+
+The latest revision should show status `loaded`.
+
+## Set Up Inference Routing
+
+So far, you have allowed the OpenCode *agent* to reach `integrate.api.nvidia.com` directly through network policy. But code that OpenCode writes and runs inside the sandbox — scripts, notebooks, applications — uses a separate mechanism called the privacy router.
+
+Create an inference route so userland code can access NVIDIA models:
+
+```console
+$ openshell inference create \
+  --routing-hint nvidia \
+  --base-url https://integrate.api.nvidia.com \
+  --model-id z-ai/glm5 \
+  --api-key $NVIDIA_API_KEY
+```
+
+The policy you wrote earlier already includes `nvidia` in `inference.allowed_routes`, so no policy update is needed. If you had omitted it, you would add the route to the policy and push again.
+
+:::{note}
+*Network policies* and *inference routes* are two separate enforcement points. Network policies control which hosts the agent binary can reach directly. Inference routes control where LLM API calls from userland code get routed through the privacy proxy.
+:::
+
+## Verify the Policy
+
+Tail the logs again:
+
+```console
+$ openshell logs opencode-sandbox --tail
+```
+
+You should no longer see `action=deny` lines for the endpoints you added. Connections to `opencode.ai`, `integrate.api.nvidia.com`, and GitHub should show `action=allow`.
+
+If you still see denials, read the log line carefully. It tells you the exact host, port, and binary that was blocked. Add the missing entry to your policy and push again with `openshell policy set`. This observe-modify-push cycle is the normal workflow for onboarding any new tool in OpenShell.
+
+## Clean Up
+
+When you are finished, delete the sandbox:
+
+```console
+$ openshell sandbox delete opencode-sandbox
+```
+
+## Next Steps
+
+- {doc}`../safety-and-privacy/policies`: Full reference on policy YAML structure, static and dynamic fields, and enforcement modes.
+- [Write Sandbox Policies (network access rules)](../safety-and-privacy/policies.md#network-access-rules): How the proxy evaluates network rules, L4 and L7 inspection, and TLS termination.
+- {doc}`../inference/index`: Inference route configuration, protocol detection, and transparent rerouting.
+- {doc}`../sandboxes/providers`: Provider types, credential discovery, and manual and automatic creation.
+- {doc}`../safety-and-privacy/security-model`: The four protection layers and how they interact.
diff --git a/docs/get-started/tutorials.md b/docs/get-started/tutorials.md
new file mode 100644
index 00000000..d7c2c96f
--- /dev/null
+++ b/docs/get-started/tutorials.md
@@ -0,0 +1,66 @@
+---
+title:
+  page: "OpenShell Tutorials"
+  nav: "Tutorials"
+description: "Step-by-step tutorials for running AI agents inside OpenShell sandboxes."
+keywords: ["openshell tutorials", "claude code sandbox", "opencode sandbox", "openclaw sandbox"]
+topics: ["generative_ai", "cybersecurity"]
+tags: ["ai_agents", "sandboxing", "tutorial"]
+content:
+  type: tutorial
+  difficulty: technical_beginner
+  audience: [engineer, data_scientist]
+---
+
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Tutorials
+
+Step-by-step tutorials for running AI agents inside OpenShell sandboxes.
+
+::::{grid} 1 1 2 2
+:gutter: 3
+
+:::{grid-item-card} Run Claude Code Safely
+:link: run-claude
+:link-type: doc
+
+Create a sandbox with Claude Code.
+
++++
+{bdg-secondary}`Tutorial`
+:::
+
+:::{grid-item-card} Run OpenClaw Safely
+:link: run-openclaw
+:link-type: doc
+
+Launch a sandbox with OpenClaw from the OpenShell Community catalog using the `--from` flag.
+
++++
+{bdg-secondary}`Tutorial`
+:::
+
+:::{grid-item-card} Run OpenCode with NVIDIA Inference
+:link: run-opencode
+:link-type: doc
+
+Launch a sandbox with OpenCode with NVIDIA inference routed to NVIDIA API endpoints.
+
++++
+{bdg-secondary}`Tutorial`
+:::
+
+::::
+
+```{toctree}
+:hidden:
+:maxdepth: 2
+
+Run Claude Code Safely <run-claude>
+Run OpenClaw Safely <run-openclaw>
+Run OpenCode with NVIDIA Inference <run-opencode>
+```
diff --git a/docs/index.md b/docs/index.md
index 96144b97..97dfc9ec 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,64 +1,244 @@
+---
+title:
+  page: "NVIDIA OpenShell Developer Guide"
+  nav: "Get Started"
+  card: "NVIDIA OpenShell"
+description: "OpenShell is the safe, private runtime for autonomous AI agents. Run agents in sandboxed environments that protect your data, credentials, and infrastructure."
+topics:
+- Generative AI
+- Cybersecurity
+tags:
+- AI Agents
+- Sandboxing
+- Security
+- Privacy
+- Inference Routing
+content:
+  type: index
+---
+
 <!--
   SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
   SPDX-License-Identifier: Apache-2.0
 -->
 
-# NVIDIA NemoClaw Developer Guide
+# NVIDIA OpenShell
+
+[![GitHub](https://img.shields.io/badge/github-repo-green?logo=github)](https://github.com/NVIDIA/OpenShell)
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue)](https://github.com/NVIDIA/OpenShell/blob/main/LICENSE)
+[![PyPI](https://img.shields.io/badge/PyPI-openshell-orange?logo=pypi)](https://pypi.org/project/openshell/)
+
+OpenShell is the safe, private runtime for autonomous AI agents. It provides sandboxed execution environments
+that protect your data, credentials, and infrastructure. Agents run with exactly the permissions they need and
+nothing more, governed by declarative policies that prevent unauthorized file access, data exfiltration, and
+uncontrolled network activity.
+
+## Get Started
+
+Install the CLI and create your first sandbox in two commands.
+
+```{raw} html
+<style>
+.nc-term {
+  background: #1a1a2e;
+  border-radius: 8px;
+  overflow: hidden;
+  margin: 1.5em 0;
+  box-shadow: 0 4px 16px rgba(0,0,0,0.25);
+  font-family: 'SFMono-Regular', Menlo, Monaco, Consolas, 'Liberation Mono', monospace;
+  font-size: 0.875em;
+  line-height: 1.8;
+}
+.nc-term-bar {
+  background: #252545;
+  padding: 10px 14px;
+  display: flex;
+  gap: 7px;
+  align-items: center;
+}
+.nc-term-dot { width: 12px; height: 12px; border-radius: 50%; }
+.nc-term-dot-r { background: #ff5f56; }
+.nc-term-dot-y { background: #ffbd2e; }
+.nc-term-dot-g { background: #27c93f; }
+.nc-term-body { padding: 16px 20px; color: #d4d4d8; }
+.nc-term-body .nc-ps { color: #76b900; user-select: none; }
+.nc-swap {
+  display: inline-grid;
+  vertical-align: baseline;
+}
+.nc-swap > span {
+  grid-area: 1 / 1;
+  white-space: nowrap;
+  opacity: 0;
+  animation: nc-cycle 6s ease-in-out infinite;
+}
+.nc-swap > span:nth-child(2) { animation-delay: 3s; }
+@keyframes nc-cycle {
+  0%, 5%     { opacity: 0; }
+  10%, 42%   { opacity: 1; }
+  50%, 100%  { opacity: 0; }
+}
+.nc-hl { color: #76b900; font-weight: 600; }
+.nc-cursor {
+  display: inline-block;
+  width: 2px;
+  height: 1.1em;
+  background: #d4d4d8;
+  vertical-align: text-bottom;
+  margin-left: 1px;
+  animation: nc-blink 1s step-end infinite;
+}
+@keyframes nc-blink { 50% { opacity: 0; } }
+</style>
+<div class="nc-term">
+  <div class="nc-term-bar">
+    <span class="nc-term-dot nc-term-dot-r"></span>
+    <span class="nc-term-dot nc-term-dot-y"></span>
+    <span class="nc-term-dot nc-term-dot-g"></span>
+  </div>
+  <div class="nc-term-body">
+    <div><span class="nc-ps">$ </span>pip install openshell</div>
+    <div><span class="nc-ps">$ </span>openshell sandbox create <span class="nc-swap"><span>-- <span class="nc-hl">claude</span></span><span>--from <span class="nc-hl">openclaw</span></span></span><span class="nc-cursor"></span></div>
+  </div>
+</div>
+```
+
+Refer to the [Quickstart](get-started/quickstart.md) for more details.
+
+---
+
+## Explore
+
+::::{grid} 2 2 3 3
+:gutter: 3
+
+:::{grid-item-card} About OpenShell
+:link: about/overview
+:link-type: doc
+
+Learn about OpenShell and its capabilities.
+
++++
+{bdg-secondary}`Concept`
+:::
+
+:::{grid-item-card} Get Started
+:link: get-started/quickstart
+:link-type: doc
+
+Quickstart guide and tutorials for creating a OpenShell sandbox with Claude Code, OpenClaw, and OpenCode.
+
++++
+{bdg-secondary}`Tutorial`
+:::
+
+:::{grid-item-card} Sandboxes
+:link: sandboxes/index
+:link-type: doc
+
+Create, manage, and customize sandboxes. Use community images or bring your own container.
+
++++
+{bdg-secondary}`Concept`
+:::
+
+:::{grid-item-card} Safety and Privacy
+:link: safety-and-privacy/index
+:link-type: doc
+
+Write policies that control what agents can access. Iterate on network rules in real time.
+
++++
+{bdg-secondary}`Concept`
+:::
+
+:::{grid-item-card} Inference Routing
+:link: inference/index
+:link-type: doc
+
+Keep inference traffic private by routing API calls to local or self-hosted backends.
+
++++
+{bdg-secondary}`Concept`
+:::
+
+:::{grid-item-card} Reference
+:link: reference/cli
+:link-type: doc
+
+CLI commands, policy schema, environment variables, and system architecture.
+
++++
+{bdg-secondary}`Reference`
+:::
 
-NemoClaw is the runtime environment for autonomous agents. It provides secure sandboxed execution, cluster management, and infrastructure for running AI agent workloads.
+::::
 
 ```{toctree}
 :caption: About
 :hidden:
 
-about/index
+Overview <about/overview>
+How It Works <about/architecture>
+Release Notes <about/release-notes>
 ```
 
 ```{toctree}
 :caption: Get Started
 :hidden:
 
-get-started/index
+get-started/quickstart
+get-started/tutorials
 ```
 
 ```{toctree}
-:caption: Feature 1
+:caption: Sandboxes
 :hidden:
 
-feature1/index
+sandboxes/index
+sandboxes/create-and-manage
+sandboxes/providers
+sandboxes/custom-containers
+sandboxes/community-sandboxes
+sandboxes/terminal
 ```
 
 ```{toctree}
-:caption: Feature 2
+:caption: Safety and Privacy
 :hidden:
 
-feature2/index
+safety-and-privacy/index
+safety-and-privacy/security-model
+safety-and-privacy/policies
 ```
 
 ```{toctree}
-:caption: Observability
+:caption: Inference Routing
 :hidden:
 
-observability/index
+inference/index
+inference/configure-routes
 ```
 
 ```{toctree}
 :caption: Reference
 :hidden:
 
-reference/index
+reference/cli
+reference/policy-schema
+reference/architecture
 ```
 
 ```{toctree}
 :caption: Troubleshooting
 :hidden:
 
-troubleshooting/index
+troubleshooting
 ```
 
 ```{toctree}
 :caption: Resources
 :hidden:
 
-resources/index
+resources/eula
 ```
diff --git a/docs/inference/configure-routes.md b/docs/inference/configure-routes.md
new file mode 100644
index 00000000..4f03a070
--- /dev/null
+++ b/docs/inference/configure-routes.md
@@ -0,0 +1,92 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Configure Inference Routes
+
+This guide covers how to create and manage inference routes so that sandboxes can route AI API calls from userland code to policy-controlled backends. You will learn to create routes, connect them to sandboxes through policy, and manage routes across a cluster.
+
+:::{note}
+Inference routes are for *userland code*, which are scripts and programs that the agent writes and executes inside the sandbox. The agent's own API traffic flows directly through network policies, not through inference routing. Refer to [Write Sandbox Policies](../safety-and-privacy/policies.md#how-network-access-is-evaluated) for the distinction between agent traffic and userland traffic.
+:::
+
+## Create a Route
+
+Use `openshell inference create` to register a new inference backend:
+
+```console
+$ openshell inference create \
+    --routing-hint local \
+    --base-url https://my-llm.example.com \
+    --model-id my-model-v1 \
+    --api-key sk-abc123
+```
+
+This creates a route named after the routing hint. Any sandbox whose policy includes `local` in its `inference.allowed_routes` list can use this route. If you omit `--protocol`, the CLI probes the endpoint and auto-detects the supported protocol (refer to [Supported API Patterns](index.md#supported-api-patterns)). Refer to the [CLI Reference](../reference/cli.md#inference-create-flags) for all flags.
+
+## Manage Routes
+
+### List all routes
+
+```console
+$ openshell inference list
+```
+
+### Update a route
+
+Change any field on an existing route:
+
+```console
+$ openshell inference update <name> --base-url https://new-backend.example.com
+```
+
+```console
+$ openshell inference update <name> --model-id updated-model-v2 --api-key sk-new-key
+```
+
+### Delete a route
+
+```console
+$ openshell inference delete <name>
+```
+
+Deleting a route that is referenced by running sandboxes does not interrupt those sandboxes immediately. Future inference requests that would have matched the deleted route will be denied.
+
+## Connect a Sandbox to Routes
+
+Inference routes take effect only when a sandbox policy references the route's `routing_hint` in its `inference.allowed_routes` list.
+
+### Step 1: Add the routing hint to your policy
+
+```yaml
+inference:
+  allowed_routes:
+    - local
+```
+
+### Step 2: Create or update the sandbox with that policy
+
+```console
+$ openshell sandbox create --policy ./my-policy.yaml --keep -- claude
+```
+
+Or, if the sandbox is already running, push an updated policy:
+
+```console
+$ openshell policy set <name> --policy ./my-policy.yaml --wait
+```
+
+The `inference` section is a dynamic field, so you can add or remove routing hints on a running sandbox without recreating it.
+
+## Good to Know
+
+- Cluster-level: routes are shared across all sandboxes in the cluster, not scoped to one sandbox.
+- Per-model: each route maps to one model. Create multiple routes with the same `--routing-hint` but different `--model-id` values to expose multiple models.
+- Hot-reloadable: routes can be created, updated, or deleted at any time without restarting sandboxes.
+
+## Next Steps
+
+- {doc}`index`: understand the inference routing architecture, interception sequence, and routing hints.
+- [Write Sandbox Policies (network access rules)](../safety-and-privacy/policies.md#network-access-rules): configure the network policies that control agent traffic (as opposed to userland inference traffic).
+- {doc}`../safety-and-privacy/policies`: the full policy iteration workflow.
diff --git a/docs/inference/index.md b/docs/inference/index.md
new file mode 100644
index 00000000..927106f8
--- /dev/null
+++ b/docs/inference/index.md
@@ -0,0 +1,82 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# About Inference Routing
+
+The inference routing system keeps your AI inference traffic private by
+transparently intercepting API calls from sandboxed agents and rerouting them
+to backends you control.
+
+:::{note}
+Inference routing applies to userland traffic: code that the agent writes
+or runs, not the agent itself. The agent's own API calls (for example, Claude calling
+`api.anthropic.com`) go directly through network policy. Refer to
+Refer to [Write Sandbox Policies](../safety-and-privacy/policies.md#how-network-access-is-evaluated) for the distinction.
+:::
+
+## How It Works
+
+When userland code inside a sandbox makes an API call (for example, using the OpenAI
+or Anthropic SDK), the request flows through the sandbox proxy. If the
+destination does not match any explicit network policy but the sandbox has
+inference routes configured, the proxy:
+
+1. TLS-terminates the connection using the sandbox's ephemeral CA.
+2. Detects the inference API pattern (for example, `POST /v1/chat/completions`).
+3. Strips authorization headers and forwards to a matching backend.
+4. Rewrites the authorization with the route's API key and model ID.
+5. Returns the response to the agent's code. The agent sees a normal HTTP
+   response as if it came from the original API.
+
+The agent's code needs zero changes. Standard OpenAI/Anthropic SDK calls work
+transparently.
+
+```{mermaid}
+sequenceDiagram
+    participant Code as Userland Code
+    participant Proxy as Sandbox Proxy
+    participant OPA as Policy Engine
+    participant Router as Privacy Router
+    participant Backend as Your Backend
+
+    Code->>Proxy: CONNECT api.openai.com:443
+    Proxy->>OPA: evaluate policy
+    OPA-->>Proxy: InspectForInference
+    Proxy-->>Code: 200 Connection Established
+    Proxy->>Proxy: TLS terminate
+    Code->>Proxy: POST /v1/chat/completions
+    Proxy->>Router: route to matching backend
+    Router->>Backend: forwarded request
+    Backend-->>Router: response
+    Router-->>Proxy: response
+    Proxy-->>Code: HTTP 200 OK
+```
+
+## Supported API Patterns
+
+The proxy detects these inference patterns:
+
+| Pattern | Method | Path |
+|---|---|---|
+| OpenAI Chat Completions | POST | `/v1/chat/completions` |
+| OpenAI Completions | POST | `/v1/completions` |
+| Anthropic Messages | POST | `/v1/messages` |
+
+If an intercepted request does not match any known pattern, it is denied.
+
+## Key Properties
+
+- Zero code changes: standard SDK calls work transparently.
+- Inference privacy: prompts and responses stay on your infrastructure.
+- Credential isolation: the agent's code never sees your backend API key.
+- Policy-controlled: `inference.allowed_routes` determines which routes a
+  sandbox can use.
+- Hot-reloadable: update `allowed_routes` on a running sandbox without
+  restarting.
+
+## Next Steps
+
+- {doc}`configure-routes`: create and manage inference routes.
+- [Write Sandbox Policies](../safety-and-privacy/policies.md#how-network-access-is-evaluated): understand agent traffic compared to userland traffic.
diff --git a/docs/observability/index.md b/docs/observability/index.md
deleted file mode 100644
index 31d92605..00000000
--- a/docs/observability/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Observability
diff --git a/docs/reference/architecture.md b/docs/reference/architecture.md
new file mode 100644
index 00000000..c6bcaac7
--- /dev/null
+++ b/docs/reference/architecture.md
@@ -0,0 +1,208 @@
+---
+title:
+  page: "Architecture Reference"
+  nav: "Architecture"
+description: "Detailed reference for OpenShell architecture: gateway, sandbox internals, policy engine, privacy router, and remote deployment."
+keywords: ["openshell architecture", "sandbox architecture", "agent isolation", "k3s", "policy engine"]
+topics: ["generative_ai", "cybersecurity"]
+tags: ["ai_agents", "sandboxing", "security", "architecture"]
+content:
+  type: reference
+  difficulty: technical_advanced
+  audience: [engineer, data_scientist]
+---
+
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Architecture Reference
+
+This page provides detailed technical information about each OpenShell component.
+For a high-level summary, refer to the [Architecture Overview](../about/architecture.md).
+
+## Component Diagram
+
+```{mermaid}
+graph TB
+    subgraph docker["Docker Container"]
+        subgraph k3s["k3s Cluster"]
+            gw["Gateway"]
+            pr["Privacy Router"]
+
+            subgraph pod1["Sandbox"]
+                sup1["Supervisor"]
+                proxy1["L7 Proxy"]
+                pe1["Policy Engine"]
+                agent1["Agent"]
+
+                sup1 --> proxy1
+                sup1 --> agent1
+                proxy1 --> pe1
+            end
+
+            subgraph pod2["Sandbox"]
+                sup2["Supervisor"]
+                proxy2["L7 Proxy"]
+                pe2["Policy Engine"]
+                agent2["Agent"]
+
+                sup2 --> proxy2
+                sup2 --> agent2
+                proxy2 --> pe2
+            end
+
+            gw -- "credentials,<br/>policies" --> sup1
+            gw -- "credentials,<br/>policies" --> sup2
+        end
+    end
+
+    cli["openshell CLI"] -- "gRPC" --> gw
+    agent1 -- "all outbound<br/>traffic" --> proxy1
+    agent2 -- "all outbound<br/>traffic" --> proxy2
+    proxy1 -- "policy-approved<br/>traffic" --> internet["External Services"]
+    proxy2 -- "policy-approved<br/>traffic" --> internet
+    proxy1 -- "inference traffic" --> pr
+    proxy2 -- "inference traffic" --> pr
+    pr -- "routed requests" --> backend["LLM Backend"]
+
+    style cli fill:#ffffff,stroke:#000000,color:#000000
+    style gw fill:#76b900,stroke:#000000,color:#000000
+    style pr fill:#76b900,stroke:#000000,color:#000000
+    style sup1 fill:#76b900,stroke:#000000,color:#000000
+    style proxy1 fill:#76b900,stroke:#000000,color:#000000
+    style pe1 fill:#76b900,stroke:#000000,color:#000000
+    style agent1 fill:#ffffff,stroke:#000000,color:#000000
+    style sup2 fill:#76b900,stroke:#000000,color:#000000
+    style proxy2 fill:#76b900,stroke:#000000,color:#000000
+    style pe2 fill:#76b900,stroke:#000000,color:#000000
+    style agent2 fill:#ffffff,stroke:#000000,color:#000000
+    style internet fill:#ffffff,stroke:#000000,color:#000000
+    style backend fill:#ffffff,stroke:#000000,color:#000000
+    style docker fill:#f5f5f5,stroke:#000000,color:#000000
+    style k3s fill:#e8e8e8,stroke:#000000,color:#000000
+    style pod1 fill:#f5f5f5,stroke:#000000,color:#000000
+    style pod2 fill:#f5f5f5,stroke:#000000,color:#000000
+
+    linkStyle default stroke:#76b900,stroke-width:2px
+```
+
+## Gateway
+
+The gateway is the central control-plane API. It coordinates sandbox lifecycle
+and state, acts as the auth boundary, and brokers all requests across the
+platform. It exposes a gRPC API consumed by the CLI and handles:
+
+- Sandbox lifecycle: creates, monitors, and deletes sandbox pods.
+- Provider storage: stores encrypted provider credentials.
+- Policy distribution: delivers policy YAML to sandboxes at startup and on
+  hot-reload.
+- SSH termination: terminates SSH tunnels from the CLI and routes them to
+  the correct sandbox.
+
+The CLI never talks to sandbox pods directly. All commands go through the
+gateway.
+
+## Sandbox
+
+Each sandbox is an isolated runtime that includes container supervision and
+general L7 egress routing. It runs as a Kubernetes pod containing a supervisor
+process, an L7 proxy, and the agent.
+
+### Supervisor
+
+The supervisor is the sandbox's init process. It establishes all isolation
+boundaries before starting the agent:
+
+1. Fetch credentials from the gateway for all attached providers.
+2. Set up the network namespace. The sandbox gets its own network stack
+   with no default route. All outbound traffic is redirected through the proxy.
+3. Apply [Landlock](https://docs.kernel.org/security/landlock.html) filesystem restrictions based on the policy.
+4. Apply seccomp filters to restrict available system calls.
+5. Start the L7 proxy in the sandbox's network namespace.
+6. Start the SSH server for interactive access.
+7. Start the agent as a child process with credentials injected as
+   environment variables.
+
+### L7 Proxy
+
+Every outbound TCP connection from any process in the sandbox is routed through
+the proxy. For each connection, the proxy:
+
+1. Resolves the calling binary through `/proc/<pid>/exe`, ancestor process
+   walking, and `/proc/<pid>/cmdline`.
+2. Queries the policy engine with the destination host, port, and resolved
+   binary path.
+3. Acts on the decision: allow the connection directly, hand it to the
+   privacy router for inference routing, or deny it. Refer to
+   [How network access is evaluated](../safety-and-privacy/policies.md#how-network-access-is-evaluated)
+   for the full decision model.
+
+For endpoints configured with `protocol: rest` and `tls: terminate`, the proxy
+performs full L7 inspection: it decrypts TLS, reads the HTTP method and path,
+evaluates access rules, then re-encrypts and forwards the request.
+
+## Policy Engine
+
+The policy engine is the definition and enforcement layer for filesystem,
+network, and process constraints. Defense in depth enforces policies from the
+application layer down to infrastructure and kernel layers.
+
+The engine evaluates policies compiled from the sandbox's policy YAML. It is
+queried synchronously by the proxy on every outbound connection. Policy updates
+delivered through hot-reload are compiled and loaded without restarting the proxy.
+
+## Privacy Router
+
+The privacy router is a privacy-aware LLM routing layer that keeps sensitive
+context on sandbox compute and routes based on cost/privacy policy.
+
+When the policy engine determines that a connection should be inspected for
+inference, the privacy router:
+
+1. Reads the intercepted HTTP request.
+2. Checks whether the method and path match a recognized inference API pattern
+   (`/v1/chat/completions`, `/v1/completions`, `/v1/messages`).
+3. Selects a route whose `routing_hint` appears in the sandbox policy's
+   `allowed_routes`.
+4. Strips the original authorization header.
+5. Injects the route's API key and model ID.
+6. Forwards the request to the route's backend URL.
+
+The router refreshes its route list periodically from the gateway, so routes
+created with `openshell inference create` become available without restarting
+sandboxes.
+
+## Remote Deployment
+
+OpenShell can deploy the cluster to a remote host via SSH. This is useful for
+shared team environments or running sandboxes on machines with more resources.
+
+### Deploy
+
+```console
+$ openshell gateway start --remote user@host --ssh-key ~/.ssh/id_rsa
+```
+
+The CLI connects to the remote machine over SSH, installs k3s, deploys the
+OpenShell control plane, and registers the cluster locally. The remote machine
+needs Docker installed.
+
+### Tunnel
+
+After deploying to a remote host, set up a tunnel for CLI access:
+
+```console
+$ openshell gateway tunnel
+```
+
+This establishes an SSH tunnel from your local machine to the remote cluster's
+API server. All subsequent CLI commands route through this tunnel transparently.
+
+### Remote Architecture
+
+The architecture is identical to a local deployment. The only difference is
+that the Docker container runs on the remote host instead of your workstation.
+The CLI communicates with the gateway over the SSH tunnel. Sandbox SSH
+connections are also tunneled through the gateway.
diff --git a/docs/reference/cli.md b/docs/reference/cli.md
new file mode 100644
index 00000000..e5b94d0d
--- /dev/null
+++ b/docs/reference/cli.md
@@ -0,0 +1,204 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# CLI Reference
+
+Complete command reference for the `openshell` CLI. Every subcommand, flag, and option is documented here.
+
+## Command Tree
+
+```text
+openshell
+├── status
+├── logs [name]
+├── forward
+│   ├── start <port> <name>
+│   ├── stop <port> <name>
+│   └── list
+├── policy
+│   ├── set <name>
+│   ├── get <name>
+│   └── list <name>
+├── gateway
+│   ├── start
+│   ├── stop
+│   ├── destroy
+│   ├── info
+│   ├── tunnel
+│   └── select [name]
+├── sandbox
+│   ├── create
+│   ├── get [name]
+│   ├── list
+│   ├── delete <name...>
+│   ├── connect [name]
+│   ├── upload [name]
+│   ├── download [name]
+│   └── ssh-config <name>
+├── provider
+│   ├── create
+│   ├── get <name>
+│   ├── list
+│   ├── update <name>
+│   └── delete <name>
+├── inference
+│   ├── create
+│   ├── update <name>
+│   ├── delete <name>
+│   └── list
+├── term
+└── completions <shell>
+```
+
+## Top-Level Commands
+
+Commands available directly under `openshell` for common operations.
+
+| Command | Description |
+|---|---|
+| `openshell status` | Show the health and status of the active gateway. |
+| `openshell logs [name]` | View sandbox logs. Use `--tail` for streaming, `--source` and `--level` to filter. When name is omitted, uses the last-used sandbox. |
+| `openshell forward start <port> <name>` | Forward a sandbox port to the host. Add `-d` for background mode. |
+| `openshell forward stop <port> <name>` | Stop an active port forward. |
+| `openshell forward list` | List all active port forwards. |
+| `openshell policy set <name>` | Apply or update a policy on a running sandbox. Pass `--policy <file>`. |
+| `openshell policy get <name>` | Show the active policy for a sandbox. Add `--full` for the complete policy with metadata. |
+| `openshell policy list <name>` | List all policy versions applied to a sandbox, with status. |
+
+## Gateway Commands
+
+Manage the OpenShell runtime cluster.
+
+| Command | Description |
+|---|---|
+| `openshell gateway start` | Deploy a new cluster. Add `--remote user@host` for remote deployment. |
+| `openshell gateway stop` | Stop the active cluster, preserving state. |
+| `openshell gateway destroy` | Permanently remove the cluster and all its data. |
+| `openshell gateway info` | Show detailed information about the cluster. |
+| `openshell gateway tunnel` | Set up a kubectl tunnel to a remote cluster. |
+| `openshell gateway select <name>` | Set the active cluster. All subsequent commands target this cluster. |
+| `openshell gateway select` | List all registered clusters (when called without a name). |
+
+## Sandbox Commands
+
+Create and manage isolated agent execution environments.
+
+| Command | Description |
+|---|---|
+| `openshell sandbox create` | Create a new sandbox. See flag reference below. |
+| `openshell sandbox get [name]` | Show detailed information about a sandbox. When name is omitted, uses the last-used sandbox. |
+| `openshell sandbox list` | List all sandboxes in the active cluster. |
+| `openshell sandbox delete <name...>` | Delete one or more sandboxes by name. |
+| `openshell sandbox connect [name]` | Open an interactive SSH session into a running sandbox. When name is omitted, reconnects to the last-used sandbox. |
+| `openshell sandbox upload [name]` | Upload files from the host into a sandbox. When name is omitted, uses the last-used sandbox. |
+| `openshell sandbox download [name]` | Download files from a sandbox to the host. When name is omitted, uses the last-used sandbox. |
+| `openshell sandbox ssh-config <name>` | Print SSH config for a sandbox. Append to `~/.ssh/config` for VS Code Remote-SSH. |
+
+### Sandbox Create Flags
+
+| Flag | Description |
+|---|---|
+| `--name` | Assign a human-readable name to the sandbox. Auto-generated if omitted. |
+| `--provider` | Attach a credential provider. Repeatable for multiple providers. |
+| `--policy` | Path to a policy YAML file to apply at creation time. |
+| `--upload` | Upload local files into the sandbox before running. |
+| `--keep` | Keep the sandbox alive after the trailing command exits. |
+| `--forward` | Forward a local port into the sandbox at startup. |
+| `--from` | Build from a community sandbox name, local Dockerfile directory, or container image reference. |
+| `-- <command>` | The command to run inside the sandbox. Everything after `--` is passed as the agent command. |
+
+## Provider Commands
+
+Manage credential providers that inject secrets into sandboxes.
+
+| Command | Description |
+|---|---|
+| `openshell provider create` | Create a new credential provider. See flag reference below. |
+| `openshell provider get <name>` | Show details of a provider. |
+| `openshell provider list` | List all providers in the active cluster. |
+| `openshell provider update <name>` | Update a provider's credentials or configuration. |
+| `openshell provider delete <name>` | Delete a provider. |
+
+### Provider Create Flags
+
+| Flag | Description |
+|---|---|
+| `--name` | Name for the provider. |
+| `--type` | Provider type: `claude`, `codex`, `opencode`, `github`, `gitlab`, `nvidia`, `generic`, `outlook`. |
+| `--from-existing` | Discover credentials from your current shell environment variables. |
+| `--credential` | Set a credential explicitly. Format: `KEY=VALUE` or bare `KEY` to read from env. Repeatable. |
+| `--config` | Set a configuration value. Format: `KEY=VALUE`. Repeatable. |
+
+## Inference Commands
+
+Manage inference routes that intercept and reroute LLM API calls from userland code.
+
+| Command | Description |
+|---|---|
+| `openshell inference create` | Create a new inference route. See flag reference below. |
+| `openshell inference update <name>` | Update an existing route's configuration. |
+| `openshell inference delete <name>` | Delete an inference route. |
+| `openshell inference list` | List all inference routes in the active cluster. |
+
+### Inference Create Flags
+
+| Flag | Description |
+|---|---|
+| `--routing-hint` | Short label that identifies this route (for example, `local`, `nvidia`, `staging`). Referenced by `allowed_routes` in sandbox policies. |
+| `--base-url` | Base URL of the inference backend (for example, `https://vllm.internal:8000`). |
+| `--model-id` | Model identifier to send to the backend (for example, `meta/llama-3.1-8b`). |
+| `--api-key` | API key for authenticating with the backend. |
+| `--protocol` | API protocol: `openai` or `anthropic`. Defaults to `openai`. |
+| `--disabled` | Create the route in a disabled state. |
+
+## OpenShell Terminal
+
+`openshell term` launches the OpenShell Terminal, a dashboard that shows sandbox
+status, live logs, and policy decisions in a single view. Navigate with `j`/`k`,
+press `f` to follow live output, `s` to filter by source, and `q` to quit.
+
+Refer to {doc}`/sandboxes/terminal` for the full guide, including how to read log
+entries, diagnose blocked connections, and interpret inference interception.
+
+## Sandbox Name Fallback
+
+Commands that accept an optional `[name]` argument, such as `get`, `connect`, `upload`, `download`, and `logs`, fall back to the last-used sandbox when the name is omitted. The CLI records the sandbox name each time you create or connect to a sandbox. When falling back, the CLI prints a hint showing which sandbox was selected.
+
+If no sandbox has been used yet and no name is provided, the command exits with an error prompting you to specify a name.
+
+## Environment Variables
+
+| Variable | Description |
+|---|---|
+| `OPENSHELL_CLUSTER` | Name of the cluster to operate on. Overrides the active cluster set by `openshell gateway select`. |
+| `OPENSHELL_SANDBOX_POLICY` | Default path to a policy YAML file. When set, `openshell sandbox create` uses this policy if no `--policy` flag is provided. |
+
+## Shell Completions
+
+Generate shell completion scripts for tab completion:
+
+```console
+$ openshell completions bash
+$ openshell completions zsh
+$ openshell completions fish
+```
+
+Pipe the output to your shell's config file:
+
+```console
+$ openshell completions zsh >> ~/.zshrc
+$ source ~/.zshrc
+```
+
+## Self-Teaching
+
+Every command and subcommand includes built-in help. Use `--help` at any level to see available subcommands, flags, and usage examples:
+
+```console
+$ openshell --help
+$ openshell sandbox --help
+$ openshell sandbox create --help
+$ openshell gateway --help
+```
diff --git a/docs/reference/index.md b/docs/reference/index.md
deleted file mode 100644
index d15068d3..00000000
--- a/docs/reference/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Reference
\ No newline at end of file
diff --git a/docs/reference/policy-schema.md b/docs/reference/policy-schema.md
new file mode 100644
index 00000000..b1aef14b
--- /dev/null
+++ b/docs/reference/policy-schema.md
@@ -0,0 +1,226 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Policy Schema Reference
+
+Complete field reference for the sandbox policy YAML. Each field is documented with its type, whether it is required, and whether it is static (locked at sandbox creation) or dynamic (hot-reloadable on a running sandbox).
+
+## Top-Level Structure
+
+```yaml
+version: 1
+filesystem_policy: { ... }
+landlock: { ... }
+process: { ... }
+network_policies: { ... }
+inference: { ... }
+```
+
+| Field | Type | Required | Category | Description |
+|---|---|---|---|---|
+| `version` | integer | Yes | -- | Policy schema version. Must be `1`. |
+| `filesystem_policy` | object | No | Static | Controls which directories the agent can read and write. |
+| `landlock` | object | No | Static | Configures Landlock LSM enforcement behavior. |
+| `process` | object | No | Static | Sets the user and group the agent process runs as. |
+| `network_policies` | map | No | Dynamic | Declares which binaries can reach which network endpoints. |
+| `inference` | object | No | Dynamic | Controls which inference routing backends are available. |
+
+Static fields are set at sandbox creation time. Changing them requires destroying and recreating the sandbox. Dynamic fields can be updated on a running sandbox with `openshell policy set` and take effect without restarting.
+
+## Version
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `version` | integer | Yes | Schema version number. Currently must be `1`. |
+
+## Filesystem Policy
+
+**Category:** Static
+
+Controls filesystem access inside the sandbox. Paths not listed in either `read_only` or `read_write` are inaccessible.
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `include_workdir` | bool | No | When `true`, automatically adds the agent's working directory to `read_write`. |
+| `read_only` | list of strings | No | Paths the agent can read but not modify. Typically system directories like `/usr`, `/lib`, `/etc`. |
+| `read_write` | list of strings | No | Paths the agent can read and write. Typically `/sandbox` (working directory) and `/tmp`. |
+
+**Validation constraints:**
+
+- Every path must be absolute (start with `/`).
+- Paths must not contain `..` traversal components. The server normalizes paths before storage, but rejects policies where traversal would escape the intended scope.
+- Read-write paths must not be overly broad (for example, `/` alone is rejected).
+- Each individual path must not exceed 4096 characters.
+- The combined total of `read_only` and `read_write` paths must not exceed 256.
+
+Policies that violate these constraints are rejected with `INVALID_ARGUMENT` at creation or update time. Disk-loaded YAML policies that fail validation fall back to a restrictive default.
+
+Example:
+
+```yaml
+filesystem_policy:
+  include_workdir: true
+  read_only:
+    - /usr
+    - /lib
+    - /proc
+    - /dev/urandom
+    - /etc
+  read_write:
+    - /sandbox
+    - /tmp
+    - /dev/null
+```
+
+## Landlock
+
+**Category:** Static
+
+Configures [Landlock LSM](https://docs.kernel.org/security/landlock.html) enforcement at the kernel level. Landlock provides mandatory filesystem access control below what UNIX permissions allow.
+
+| Field | Type | Required | Values | Description |
+|---|---|---|---|---|
+| `compatibility` | string | No | `best_effort`, `hard_requirement` | How OpenShell handles kernel ABI differences. `best_effort` uses the highest Landlock ABI the host kernel supports. `hard_requirement` fails if the required ABI is unavailable. |
+
+Example:
+
+```yaml
+landlock:
+  compatibility: best_effort
+```
+
+## Process
+
+**Category:** Static
+
+Sets the OS-level identity for the agent process inside the sandbox.
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `run_as_user` | string | No | The user name or UID the agent process runs as. Default: `sandbox`. |
+| `run_as_group` | string | No | The group name or GID the agent process runs as. Default: `sandbox`. |
+
+**Validation constraint:** Neither `run_as_user` nor `run_as_group` may be set to `root` or `0`. Policies that request root process identity are rejected at creation or update time.
+
+Example:
+
+```yaml
+process:
+  run_as_user: sandbox
+  run_as_group: sandbox
+```
+
+## Network Policies
+
+**Category:** Dynamic
+
+The policy is a map of named entries. Each entry defines which binaries can reach which endpoints; use multiple entries for different services or different permission levels (e.g. one entry for GitHub, another for your own API). A connection is allowed only when both the destination (host and port) and the calling binary match the same entry. The map key is a logical identifier; the `name` field inside the entry is the display name used in logs.
+
+### Network Policy Entry
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `name` | string | No | Display name for the policy entry. Used in log output. Defaults to the map key. |
+| `endpoints` | list of endpoint objects | Yes | Hosts and ports this entry permits. |
+| `binaries` | list of binary objects | Yes | Executables allowed to connect to these endpoints. |
+
+### Endpoint Object
+
+Each endpoint defines a reachable destination and optional inspection rules.
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `host` | string | Yes | Hostname or IP address. Supports wildcards: `*.example.com` matches any subdomain. |
+| `port` | integer | Yes | TCP port number. |
+| `protocol` | string | No | Set to `rest` to enable L7 (HTTP) inspection. Omit for L4-only (TCP passthrough). |
+| `tls` | string | No | TLS handling mode. `terminate` decrypts TLS at the proxy for inspection. `passthrough` forwards encrypted traffic without inspection. Only relevant when `protocol` is `rest`. |
+| `enforcement` | string | No | `enforce` actively blocks disallowed requests. `audit` logs violations but allows traffic through. |
+| `access` | string | No | HTTP access level. One of `read-only`, `read-write`, `full`, `git-fetch`, or `git-full`. Refer to table below. Mutually exclusive with `rules`. |
+| `rules` | list of rule objects | No | Fine-grained per-method, per-path allow rules. Mutually exclusive with `access`. |
+
+#### Access Levels
+
+| Value | Description |
+|-------|--------------|
+| `full` | All HTTP methods and paths. |
+| `read-only` | `GET`, `HEAD`, `OPTIONS` on any path. |
+| `read-write` | `GET`, `HEAD`, `OPTIONS`, `POST`, `PUT`, `PATCH` on any path. |
+| `git-fetch` | Git Smart HTTP fetch only (clone, pull): `GET /**/info/refs*`, `POST /**/git-upload-pack`. Use only for Git Smart HTTP endpoints (e.g. github.com, gitlab.com). |
+| `git-full` | Git Smart HTTP fetch and push: same as `git-fetch` plus `POST /**/git-receive-pack`. Use only for Git Smart HTTP endpoints (e.g. github.com, gitlab.com). |
+
+#### Rule Object
+
+Used when `access` is not set. Each rule explicitly allows a method and path combination.
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `allow.method` | string | Yes | HTTP method to allow (e.g. `GET`, `POST`). Use `"*"` to allow any HTTP method on the given path. |
+| `allow.path` | string | Yes | URL path pattern. Supports `*` and `**` glob syntax; path matching uses `/` as the segment delimiter. |
+
+Example with rules. Git Smart HTTP paths are `/{owner}/{repo}.git/{operation}` (e.g. `/{owner}/{repo}.git/git-upload-pack`). Use `/**/` to match any owner/repo.
+
+```yaml
+rules:
+  - allow:
+      method: GET
+      path: /**/info/refs*
+  - allow:
+      method: POST
+      path: /**/git-upload-pack
+```
+
+### Binary Object
+
+Identifies an executable that is permitted to use the associated endpoints.
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `path` | string | Yes | Filesystem path to the executable. Supports glob patterns with `*` and `**`. For example, `/sandbox/.vscode-server/**` matches any executable under that directory tree. |
+
+### Full Example
+
+```yaml
+network_policies:
+  github_rest_api:
+    name: github-rest-api
+    endpoints:
+      - host: api.github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: read-only
+    binaries:
+      - path: /usr/local/bin/claude
+      - path: /usr/bin/node
+      - path: /usr/bin/gh
+  npm_registry:
+    name: npm-registry
+    endpoints:
+      - host: registry.npmjs.org
+        port: 443
+    binaries:
+      - path: /usr/bin/npm
+      - path: /usr/bin/node
+```
+
+## Inference
+
+**Category:** Dynamic
+
+Controls which inference routing backends userland code can access. The `allowed_routes` list names route types that the privacy router will accept. Traffic matching an inference API pattern that targets a route type not in this list is denied.
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `allowed_routes` | list of strings | No | Routing hint labels (e.g., `local`, `nvidia`, `staging`) that this sandbox can use. Must match the `routing_hint` of inference routes created with `openshell inference create`. |
+
+Example:
+
+```yaml
+inference:
+  allowed_routes:
+    - local
+    - nvidia
+```
diff --git a/docs/reference/troubleshooting.md b/docs/reference/troubleshooting.md
new file mode 100644
index 00000000..4bf21ccf
--- /dev/null
+++ b/docs/reference/troubleshooting.md
@@ -0,0 +1,221 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Troubleshooting
+
+Common issues organized by area, with symptoms, causes, and fixes.
+
+## Cluster Issues
+
+### Docker not running
+
+**Symptom:** CLI commands fail with a Docker connection error.
+
+**Fix:** Start Docker Desktop or Docker Engine, then retry.
+
+```console
+$ docker info
+```
+
+If this command fails, Docker is not running.
+
+### Port conflicts
+
+**Symptom:** Cluster deployment fails with "port already in use" or "address already in use."
+
+**Fix:** Another process is using a port the cluster needs. Stop the conflicting process or change the port. Check which process holds the port:
+
+```console
+$ lsof -i :<port>
+```
+
+### Cluster won't start
+
+**Symptom:** `nemoclaw cluster status` shows the cluster is unhealthy or not running. Commands fail with connection errors.
+
+**Fix:** Destroy the cluster and redeploy. This removes all state (sandboxes, providers, policies), so export anything you need first.
+
+```console
+$ nemoclaw cluster admin destroy
+$ nemoclaw cluster admin deploy
+```
+
+## Sandbox Issues
+
+### Sandbox stuck in Provisioning
+
+**Symptom:** `nemoclaw sandbox get <name>` shows phase `Provisioning` and does not transition to `Ready`.
+
+**Causes:**
+- The cluster is unhealthy or overloaded.
+- The container image is being pulled for the first time (large images take time).
+- A resource constraint on the host.
+
+**Fix:** Check cluster health first:
+
+```console
+$ nemoclaw cluster status
+```
+
+Then inspect the sandbox logs for errors:
+
+```console
+$ nemoclaw sandbox logs <name>
+```
+
+If the cluster itself is unhealthy, see the cluster issues section above.
+
+### Connection refused
+
+**Symptom:** `nemoclaw sandbox connect <name>` fails with "connection refused."
+
+**Cause:** The sandbox is not in the `Ready` phase yet. The SSH server starts only after the supervisor finishes setting up isolation.
+
+**Fix:** Check the sandbox phase:
+
+```console
+$ nemoclaw sandbox get <name>
+```
+
+Wait until the phase transitions to `Ready`, then retry the connection. If the sandbox is stuck in `Provisioning`, see the section above.
+
+## Provider Issues
+
+### "no existing local credentials/config found"
+
+**Symptom:** `nemoclaw provider create --from-existing` fails with this error.
+
+**Cause:** The expected environment variable is not set in your current shell session. The CLI only reads from environment variables, not config files or keychains.
+
+**Fix:** Check whether the variable is set:
+
+```console
+$ echo $ANTHROPIC_API_KEY
+```
+
+If empty, export it and retry:
+
+```console
+$ export ANTHROPIC_API_KEY=sk-ant-...
+$ nemoclaw provider create --name my-claude --type claude --from-existing
+```
+
+See {doc}`../sandboxes/providers` for the full list of variables each provider type expects.
+
+### Provider not found
+
+**Symptom:** `nemoclaw sandbox create --provider <name>` fails because the provider does not exist.
+
+**Fix:** Create the provider before referencing it in sandbox creation:
+
+```console
+$ nemoclaw provider create --name my-claude --type claude --from-existing
+$ nemoclaw sandbox create --provider my-claude -- claude
+```
+
+List existing providers with:
+
+```console
+$ nemoclaw provider list
+```
+
+## Policy Issues
+
+### "failed to parse sandbox policy YAML"
+
+**Symptom:** `nemoclaw sandbox policy set` fails with a YAML parse error.
+
+**Cause:** The policy file contains metadata headers. This commonly happens when you export a policy with `--full` and try to reapply it directly. The `--full` output includes status metadata that is not valid in a policy input file.
+
+**Fix:** Strip the metadata from the exported YAML. Use only the policy content (starting from `version: 1`) without any status or metadata fields added by `--full`.
+
+### Policy shows status "failed"
+
+**Symptom:** `nemoclaw sandbox policy list <name>` shows a policy version with status `failed`.
+
+**Cause:** The policy YAML is syntactically valid but contains a semantic error (invalid field value, conflicting rules, etc.).
+
+**Fix:** Check the error message in the `policy list` output. The previous policy remains active when a new policy fails to apply. Fix the error in your YAML and reapply:
+
+```console
+$ nemoclaw sandbox policy list <name>
+$ nemoclaw sandbox policy set <name> --policy fixed-policy.yaml
+```
+
+## Network Issues
+
+### Agent API calls being denied
+
+**Symptom:** The agent cannot reach its API endpoint. Logs show `action=deny` for requests that should be allowed.
+
+**Fix:** Check the sandbox logs for denied connections:
+
+```console
+$ nemoclaw sandbox logs <name>
+```
+
+Look for entries with `action=deny`. Verify that:
+
+1. The endpoint (host and port) is listed in your policy's `network_policies`.
+2. The calling binary's path is listed in the `binaries` for that policy entry.
+
+Both the endpoint and binary must match for the connection to be allowed.
+
+### Agent calls intercepted instead of going direct
+
+**Symptom:** The agent's own API calls are being intercepted by the privacy router instead of flowing directly to the API. The agent may receive responses from a different model or fail with authentication errors.
+
+**Cause:** The binary path in your policy does not match the actual process executable making the connection. The proxy resolves the calling binary via `/proc/<pid>/exe`. If the resolved path does not match any binary in your `network_policies` entry, the connection falls through to inference interception.
+
+**Fix:** Check the sandbox logs for the binary path the proxy resolved:
+
+```console
+$ nemoclaw sandbox logs <name>
+```
+
+Look for the `binary_path` field in intercepted connection entries. Update your policy's `binaries` list to include the actual executable path. Common mismatches:
+
+| Expected | Actual | Why |
+|---|---|---|
+| `/usr/local/bin/claude` | `/usr/bin/node` | Claude Code runs as a Node.js process. Include both paths. |
+| `/usr/bin/python3` | `/usr/bin/python3.12` | Versioned Python binary. Use the exact path from the logs. |
+| `/usr/local/bin/opencode` | `/usr/bin/node` | opencode runs via Node.js. Include the Node binary path. |
+
+### Inference routing not working
+
+**Symptom:** Userland code makes an inference API call but gets a 403 or the call is denied. The privacy router does not intercept the request.
+
+**Causes:**
+- The `allowed_routes` in the sandbox policy does not include the route's `routing_hint`.
+- No inference route exists with a matching `routing_hint`.
+- The inference route is disabled.
+
+**Fix:** Check that routes exist and match:
+
+```console
+$ nemoclaw inference list
+```
+
+Verify that the route's `routing_hint` appears in your sandbox policy's `inference.allowed_routes`. Check the logs for `route_count` to confirm the router loaded the expected number of routes:
+
+```console
+$ nemoclaw sandbox logs <name>
+```
+
+## NemoClaw Terminal Issues
+
+### Terminal shows no logs
+
+**Symptom:** `nemoclaw gator` launches but displays no log entries.
+
+**Cause:** No sandbox is running, or the sandbox has not started producing log output yet.
+
+**Fix:** Check whether a sandbox is running and in the `Ready` phase:
+
+```console
+$ nemoclaw sandbox get <name>
+```
+
+If the sandbox is in `Provisioning`, wait for it to reach `Ready`. If no sandboxes exist, create one first. The NemoClaw Terminal displays logs from all active sandboxes in the cluster.
diff --git a/docs/resources/index.md b/docs/resources/index.md
deleted file mode 100644
index eb696ad1..00000000
--- a/docs/resources/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Resources
diff --git a/docs/safety-and-privacy/index.md b/docs/safety-and-privacy/index.md
new file mode 100644
index 00000000..32448a55
--- /dev/null
+++ b/docs/safety-and-privacy/index.md
@@ -0,0 +1,58 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# About Safety and Privacy
+
+OpenShell wraps every sandbox in four independent protection layers. No single
+point of failure can compromise your environment. Each layer covers gaps the
+others cannot.
+
+```{mermaid}
+graph TB
+    subgraph runtime["OpenShell Runtime"]
+        direction TB
+
+        subgraph layers["Protection Layers"]
+            direction TB
+
+            fs["Filesystem — Landlock LSM"]
+            net["Network — Proxy + Policy Engine"]
+            proc["Process — seccomp + Unprivileged User"]
+            inf["Inference — Privacy Router"]
+
+            subgraph sandbox["Sandbox"]
+                agent(["AI Agent"])
+            end
+        end
+    end
+
+    agent -- "read /sandbox ✔" --> fs
+    agent -- "read /etc/shadow ✘" --> fs
+    agent -- "curl approved.com ✔" --> net
+    agent -- "curl evil.com ✘" --> net
+    agent -- "sudo install pkg ✘" --> proc
+    agent -- "call api.openai.com" --> inf
+    inf -- "reroute → your backend ✔" --> net
+
+    style runtime fill:#f5f5f5,stroke:#000000,color:#000000
+    style layers fill:#e8e8e8,stroke:#000000,color:#000000
+    style sandbox fill:#f5f5f5,stroke:#000000,color:#000000
+    style agent fill:#ffffff,stroke:#000000,color:#000000
+    style fs fill:#76b900,stroke:#000000,color:#000000
+    style net fill:#76b900,stroke:#000000,color:#000000
+    style proc fill:#76b900,stroke:#000000,color:#000000
+    style inf fill:#76b900,stroke:#000000,color:#000000
+
+    linkStyle default stroke:#76b900,stroke-width:2px
+```
+
+You control all four layers through a single YAML policy. Network and inference
+rules are hot-reloadable on a running sandbox. Filesystem and process
+restrictions are locked at creation time.
+
+- {doc}`security-model`: Threat scenarios (data exfiltration, credential
+  theft, unauthorized API calls, privilege escalation) and how OpenShell
+  addresses each one.
+- {doc}`policies`: What a policy is, how it's evaluated, its structure, how to edit it, network access rules (including the GitHub push example), and the iteration workflow.
diff --git a/docs/safety-and-privacy/policies.md b/docs/safety-and-privacy/policies.md
new file mode 100644
index 00000000..54dcd856
--- /dev/null
+++ b/docs/safety-and-privacy/policies.md
@@ -0,0 +1,211 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Write Sandbox Policies
+
+This guide covers how to author, iterate, and manage sandbox policies that control what an agent can do inside a OpenShell sandbox. You will learn to create sandboxes with custom policies, monitor denied traffic to discover missing rules, and push policy updates without restarting the sandbox.
+
+## What is a policy
+
+A policy is a single YAML document that controls what a sandbox can do: filesystem access, process identity, network access, and inference routing. You attach it when creating a sandbox; the network and inference parts can be updated on a running sandbox without restarting. OpenShell's four protection layers (filesystem, network, process, inference) are all configured through this one policy.
+
+## How is it evaluated
+
+For **network** traffic, the proxy matches destination (host and port) and calling binary to a policy block and optionally applies per-endpoint rules; see [Network access rules](#network-access-rules) below. For filesystem and process, the policy is applied at sandbox start (and for static fields, at creation only). For the full endpoint schema and binary matching, see the [Policy Schema Reference](../reference/policy-schema.md).
+
+## How is it structured
+
+A policy is a YAML document with five top-level sections: `version`, `filesystem_policy`, `landlock`, `process`, `network_policies`, and `inference`. Static fields (`filesystem_policy`, `landlock`, `process`) are locked at sandbox creation and require recreation to change. Dynamic fields (`network_policies`, `inference`) are hot-reloadable on a running sandbox. The `landlock` section configures [Landlock LSM](https://docs.kernel.org/security/landlock.html) enforcement at the kernel level.
+
+```yaml
+version: 1
+
+# Static: locked at sandbox creation. Paths the agent can read vs read/write.
+filesystem_policy:
+  read_only: [/usr, /lib, /etc]
+  read_write: [/sandbox, /tmp]
+
+# Static: Landlock LSM kernel enforcement. best_effort uses highest ABI the host supports.
+landlock:
+  compatibility: best_effort
+
+# Static: Unprivileged user/group the agent process runs as.
+process:
+  run_as_user: sandbox
+  run_as_group: sandbox
+
+# Dynamic: hot-reloadable. Named blocks of endpoints + binaries allowed to reach them.
+network_policies:
+  my_api:
+    name: my-api
+    endpoints:
+      - host: api.example.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: full
+    binaries:
+      - path: /usr/bin/curl
+
+# Dynamic: hot-reloadable. Routing hints this sandbox can use for inference (e.g. local, nvidia).
+inference:
+  allowed_routes: [local]
+```
+
+For the complete structure and every field, see the [Policy Schema Reference](../reference/policy-schema.md).
+
+## Network access rules
+
+Network access is controlled by policy blocks under `network_policies`. Each block has a **name**, a list of **endpoints** (host, port, protocol, and optional rules), and a list of **binaries** that are allowed to use those endpoints. The example below shows a full policy block.
+
+### How network access is evaluated
+
+Every outbound connection from the sandbox goes through the proxy. The proxy matches the **destination** (host and port) and the **calling binary** to an endpoint in one of your policy blocks. If an endpoint matches the destination and the binary is listed in that block's `binaries`, the connection is **allowed**. For endpoints with `protocol: rest` and `tls: terminate`, each HTTP request is also checked against that endpoint's `rules` (method and path). If no endpoint matches and inference routes are configured, the request may be **rerouted for inference**. Otherwise the connection is **denied**. Endpoints without `protocol` or `tls` (L4-only) allow the TCP stream through without inspecting payloads. For the full endpoint schema, access presets, and binary matching, see the [Policy Schema Reference](../reference/policy-schema.md).
+
+### Enable GitHub push
+
+The following policy block allows the listed binaries (Claude and the GitHub CLI) to reach `api.github.com` with the given rules: read-only (GET, HEAD, OPTIONS) and GraphQL (POST) for all paths; full write access for `alpha-repo`; and create/edit issues only for `bravo-repo`. Replace `<org_name>` with your GitHub org or username.
+
+```yaml
+  github_repos:
+    name: github_repos
+    endpoints:
+      - host: api.github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        rules:
+          # Read-only access to all GitHub API paths
+          - allow:
+              method: GET
+              path: "/**"
+          - allow:
+              method: HEAD
+              path: "/**"
+          - allow:
+              method: OPTIONS
+              path: "/**"
+          # GraphQL API (used by gh CLI for most operations)
+          - allow:
+             method: POST
+             path: "/graphql"
+          # alpha-repo: full write access
+          - allow:
+              method: "*"
+              path: "/repos/<org_name>/alpha-repo/**"
+          # bravo-repo: create + edit issues
+          - allow:
+              method: POST
+              path: "/repos/<org_name>/bravo-repo/issues"
+          - allow:
+              method: PATCH
+              path: "/repos/<org_name>/bravo-repo/issues/*"
+    binaries:
+      - { path: /usr/local/bin/claude }
+      - { path: /usr/bin/gh }
+```
+
+Then run `openshell policy set <name> --policy <file> --wait` to apply.
+
+### If something is blocked
+
+Check `openshell logs <name> --tail --source sandbox` for the denied host, path, and binary. Add or adjust the matching endpoint or rules in the relevant policy block (e.g. add a new `allow` rule for the method and path, or add the binary to that block's `binaries` list). See [How do I edit it](#how-do-i-edit-it) for the full iteration workflow.
+
+## Default Policy
+
+OpenShell ships a built-in default policy designed for Claude Code. It covers Claude's API endpoints, telemetry hosts, GitHub access, and VS Code marketplace traffic out of the box.
+
+| Agent | Default policy coverage | What you need to do |
+|---|---|---|
+| Claude Code | Full | Nothing: works out of the box |
+| OpenCode | Partial | Add `opencode.ai` endpoint and OpenCode binary paths. |
+| Codex | None | Provide a complete custom policy with OpenAI endpoints and Codex binary paths. |
+
+:::{important}
+If you run a non-Claude agent without a custom policy, the agent's API calls will be denied by the proxy. You must provide a policy that declares the agent's endpoints and binaries.
+:::
+
+## Create a Sandbox with a Custom Policy
+
+Pass a policy YAML file when creating the sandbox:
+
+```console
+$ openshell sandbox create --policy ./my-policy.yaml --keep -- claude
+```
+
+The `--keep` flag keeps the sandbox running after the initial command exits, which is useful when you plan to iterate on the policy.
+
+To avoid passing `--policy` every time, set a default policy with an environment variable:
+
+```console
+$ export OPENSHELL_SANDBOX_POLICY=./my-policy.yaml
+$ openshell sandbox create --keep -- claude
+```
+
+The CLI uses the policy from `OPENSHELL_SANDBOX_POLICY` whenever `--policy` is not explicitly provided.
+
+## How do I edit it
+
+To change what the sandbox can access, you pull the current policy, edit the YAML, and push the update. The workflow is iterative: create the sandbox, monitor logs for denied actions, pull the policy, modify it, push, and verify.
+
+```{mermaid}
+flowchart TD
+    A["1. Create sandbox with initial policy"] --> B["2. Monitor logs for denied actions"]
+    B --> C["3. Pull current policy"]
+    C --> D["4. Modify the policy YAML"]
+    D --> E["5. Push updated policy"]
+    E --> F["6. Verify the new revision loaded"]
+    F --> B
+
+    style A fill:#76b900,stroke:#000000,color:#000000
+    style B fill:#76b900,stroke:#000000,color:#000000
+    style C fill:#76b900,stroke:#000000,color:#000000
+    style D fill:#ffffff,stroke:#000000,color:#000000
+    style E fill:#76b900,stroke:#000000,color:#000000
+    style F fill:#76b900,stroke:#000000,color:#000000
+
+    linkStyle default stroke:#76b900,stroke-width:2px
+```
+
+**Steps**
+
+1. **Create** the sandbox with your initial policy (or set `OPENSHELL_SANDBOX_POLICY`).
+
+   ```console
+   $ openshell sandbox create --policy ./my-policy.yaml --keep -- claude
+   ```
+
+2. **Monitor** denials — each log entry shows host, port, binary, and reason. Alternatively use `openshell term` for a live dashboard.
+
+   ```console
+   $ openshell logs <name> --tail --source sandbox
+   ```
+
+3. **Pull** the current policy. Strip the metadata header (Version, Hash, Status) before reusing the file.
+
+   ```console
+   $ openshell policy get <name> --full > current-policy.yaml
+   ```
+
+4. **Edit** the YAML: add or adjust `network_policies` entries, binaries, `access` or `rules`, or `inference.allowed_routes`.
+
+5. **Push** the updated policy. Exit codes: 0 = loaded, 1 = validation failed, 124 = timeout.
+
+   ```console
+   $ openshell policy set <name> --policy current-policy.yaml --wait
+   ```
+
+6. **Verify** the new revision. If status is `loaded`, repeat from step 2 as needed; if `failed`, fix the policy and repeat from step 4.
+
+   ```console
+   $ openshell policy list <name>
+   ```
+
+## Next Steps
+
+- [Policy Schema Reference](../reference/policy-schema.md): Complete field reference for the policy YAML.
+- [Security Model](security-model.md): Threat scenarios and protection layers.
diff --git a/docs/safety-and-privacy/security-model.md b/docs/safety-and-privacy/security-model.md
new file mode 100644
index 00000000..8108b564
--- /dev/null
+++ b/docs/safety-and-privacy/security-model.md
@@ -0,0 +1,86 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# The Security Model
+
+When an AI agent runs with unrestricted access to your system, it can read any
+file, reach any network host, call any API with your credentials, and install
+arbitrary software. OpenShell's security model exists to prevent all of that.
+
+:::{note}
+OpenShell uses defense in depth. Four independent protection layers: filesystem,
+network, process, and inference. They work together so that no single point of
+failure can compromise your environment.
+:::
+
+## What Happens Without Protection
+
+Autonomous agents are powerful, but power without boundaries is risk. Here are
+four concrete threat scenarios and how OpenShell addresses each one.
+
+### Data Exfiltration
+
+**Without protection:**
+The agent writes a script that reads your source code and uploads it to an
+external server using `curl`.
+
+**With OpenShell:**
+The network policy blocks all outbound connections except to hosts you have
+explicitly approved. The `curl` command to an unapproved destination is denied
+at the proxy before the request ever leaves the sandbox.
+
+---
+
+### Credential Theft
+
+**Without protection:**
+The agent reads `~/.ssh/id_rsa`, `~/.aws/credentials`, or other sensitive files
+from your home directory and exfiltrates them.
+
+**With OpenShell:**
+[Landlock](https://docs.kernel.org/security/landlock.html) filesystem restrictions limit the agent to declared paths. The agent
+can access `/sandbox`, `/tmp`, and read-only system directories, but not your
+home directory, SSH keys, cloud credentials, or anything else outside the
+policy.
+
+---
+
+### Unauthorized API Calls
+
+**Without protection:**
+The agent code calls `api.openai.com` with your API key, sending proprietary
+data to a third-party inference provider you did not approve.
+
+**With OpenShell:**
+The privacy router intercepts outbound API calls and reroutes them to a
+backend you control: a local model, an NVIDIA endpoint, or your own
+deployment. The agent's code does not need to change; the rerouting is
+transparent. Your data never reaches an unauthorized provider.
+
+---
+
+### Privilege Escalation
+
+**Without protection:**
+The agent runs `sudo apt install` to install packages, modifies `/etc/passwd`,
+or uses raw sockets to scan your internal network.
+
+**With OpenShell:**
+The agent runs as an unprivileged user with seccomp filters that block
+dangerous system calls. [Landlock](https://docs.kernel.org/security/landlock.html) prevents writes outside allowed paths. There
+is no `sudo`, no `setuid`, and no path to elevated privileges.
+
+:::{important}
+All four layers work together. No single layer is sufficient on its own.
+Filesystem restrictions do not prevent network exfiltration. Network policies do
+not prevent local privilege escalation. Process restrictions do not control
+where inference traffic goes. Defense in depth means every layer covers gaps
+that the others cannot.
+:::
+
+## Next Steps
+
+- {doc}`policies`: Write and iterate on the policy YAML that configures all four layers (including network rules, binary matching, and TLS inspection).
+- {doc}`../inference/index`: Set up private inference backends
diff --git a/docs/sandboxes/community-sandboxes.md b/docs/sandboxes/community-sandboxes.md
new file mode 100644
index 00000000..981cd607
--- /dev/null
+++ b/docs/sandboxes/community-sandboxes.md
@@ -0,0 +1,95 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Community Sandboxes
+
+Use pre-built sandboxes from the OpenShell Community catalog, or contribute your
+own.
+
+## What Are Community Sandboxes
+
+Community sandboxes are ready-to-use environments published in the
+[OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository.
+Each sandbox bundles a Dockerfile, policy, optional skills, and startup scripts
+into a single package that you can launch with one command.
+
+## Current Catalog
+
+The following community sandboxes are available in the catalog.
+
+| Sandbox | Description |
+|---|---|
+| `base` | Foundational image with system tools and dev environment |
+| `openclaw` | Open agent manipulation and control |
+| `sdg` | Synthetic data generation workflows |
+| `simulation` | General-purpose simulation sandboxes |
+
+## Use a Community Sandbox
+
+Launch a community sandbox by name with the `--from` flag:
+
+```console
+$ openshell sandbox create --from openclaw
+```
+
+When you pass `--from` with a community sandbox name, the CLI:
+
+1. Resolves the name against the
+   [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository.
+2. Pulls the Dockerfile, policy, skills, and any startup scripts.
+3. Builds the container image locally.
+4. Creates the sandbox with the bundled configuration applied.
+
+You end up with a running sandbox whose image, policy, and tooling are all
+preconfigured by the community package.
+
+### Other Sources
+
+The `--from` flag also accepts:
+
+- Local directory paths: Point to a directory on disk that contains a
+  Dockerfile and optional policy/skills:
+
+  ```console
+  $ openshell sandbox create --from ./my-sandbox-dir
+  ```
+
+- Container image references: Use an existing container image directly:
+
+  ```console
+  $ openshell sandbox create --from my-registry.example.com/my-image:latest
+  ```
+
+## Contribute a Community Sandbox
+
+Each community sandbox is a directory under `sandboxes/` in the
+[OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) repository.
+At minimum, a sandbox directory must contain:
+
+- `Dockerfile`: Defines the container image
+- `README.md`: Describes the sandbox and how to use it
+
+Optional files:
+
+- `policy.yaml`: Default policy applied when the sandbox launches
+- `skills/`: Agent skill definitions bundled with the sandbox
+- Startup scripts: Any scripts the Dockerfile or entrypoint invokes
+
+To contribute, fork the repository, add your sandbox directory, and open a pull
+request. Refer to the repository's
+[CONTRIBUTING.md](https://github.com/NVIDIA/OpenShell-Community/blob/main/CONTRIBUTING.md)
+for submission guidelines.
+
+:::{note}
+The community catalog is designed to grow. If you have built a sandbox that
+supports a particular workflow (data processing, simulation, code review,
+or anything else), consider contributing it back so others can use it.
+:::
+
+## Next Steps
+
+- {doc}`create-and-manage`: Full sandbox lifecycle management
+- {doc}`custom-containers`: Build a fully custom container with BYOC
+- {doc}`../safety-and-privacy/policies`: Customize the policy applied to any sandbox
diff --git a/docs/sandboxes/create-and-manage.md b/docs/sandboxes/create-and-manage.md
new file mode 100644
index 00000000..fd68eac7
--- /dev/null
+++ b/docs/sandboxes/create-and-manage.md
@@ -0,0 +1,195 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Create and Manage Sandboxes
+
+This page walks you through the full sandbox lifecycle: creating, inspecting, connecting to, monitoring, and deleting sandboxes. For background on what sandboxes are and how the runtime works, refer to [About Sandboxes](index.md).
+
+## Prerequisites
+
+Ensure the following are installed before creating sandboxes.
+
+- OpenShell CLI installed (`pip install openshell`)
+- Docker running on your machine
+
+## Create a Sandbox
+
+The simplest way to create a sandbox is to specify a trailing command:
+
+```console
+$ openshell sandbox create -- claude
+```
+
+The CLI bootstraps the runtime (if this is your first run), discovers your
+credentials, applies the default policy, and drops you into the sandbox.
+
+You can customize creation with flags like `--name`, `--provider`, `--policy`,
+`--upload`, `--keep`, `--forward`, and `--from`. Refer to the
+[CLI Reference](../reference/cli.md) for the full flag list.
+
+A fully specified creation command might look like:
+
+```console
+$ openshell sandbox create \
+    --name dev \
+    --provider my-claude \
+    --policy policy.yaml \
+    --upload \
+    --keep \
+    -- claude
+```
+
+:::{tip}
+Use `--keep` to keep the sandbox running after the trailing command exits.
+This is especially useful when you are iterating on a policy or want to
+reconnect later from another terminal or VS Code.
+:::
+
+## List and Inspect Sandboxes
+
+Check the status of your sandboxes and retrieve detailed information about individual ones.
+
+List all sandboxes:
+
+```console
+$ openshell sandbox list
+```
+
+Get detailed information about a specific sandbox:
+
+```console
+$ openshell sandbox get my-sandbox
+```
+
+## Connect to a Sandbox
+
+Access a running sandbox through an interactive SSH session or VS Code Remote-SSH.
+
+### Interactive SSH
+
+Open an SSH session into a running sandbox:
+
+```console
+$ openshell sandbox connect my-sandbox
+```
+
+### VS Code Remote-SSH
+
+Export the sandbox SSH configuration and append it to your SSH config:
+
+```console
+$ openshell sandbox ssh-config my-sandbox >> ~/.ssh/config
+```
+
+Then open VS Code, install the Remote - SSH extension if you have not
+already, and connect to the host named `my-sandbox`.
+
+## View Logs
+
+Stream and filter sandbox logs to monitor agent activity and diagnose policy decisions.
+
+Stream sandbox logs:
+
+```console
+$ openshell logs my-sandbox
+```
+
+Use flags to filter and follow output:
+
+| Flag | Purpose | Example |
+|---|---|---|
+| `--tail` | Stream logs in real time | `openshell logs my-sandbox --tail` |
+| `--source` | Filter by log source | `--source sandbox` |
+| `--level` | Filter by severity | `--level warn` |
+| `--since` | Show logs from a time window | `--since 5m` |
+
+Combine flags to narrow in on what you need:
+
+```console
+$ openshell logs my-sandbox --tail --source sandbox --level warn --since 5m
+```
+
+:::{tip}
+For a real-time dashboard that combines sandbox status and logs in one view,
+run `openshell term`. Refer to {doc}`terminal` for details on reading log entries and
+diagnosing blocked connections.
+:::
+
+## Transfer Files
+
+Transfer files between your host machine and a running sandbox.
+
+Upload files from your host into the sandbox:
+
+```console
+$ openshell sandbox upload my-sandbox ./src /sandbox/src
+```
+
+Download files from the sandbox to your host:
+
+```console
+$ openshell sandbox download my-sandbox /sandbox/output ./local
+```
+
+:::{note}
+You can also upload files at creation time with the `--upload` flag on
+`openshell sandbox create`.
+:::
+
+## Port Forwarding
+
+Forward a port from the sandbox to your host machine. This runs in the
+foreground by default:
+
+```console
+$ openshell forward start 8080 my-sandbox
+```
+
+Add `-d` to run the forward in the background:
+
+```console
+$ openshell forward start 8080 my-sandbox -d
+```
+
+List active port forwards:
+
+```console
+$ openshell forward list
+```
+
+Stop a port forward:
+
+```console
+$ openshell forward stop 8080 my-sandbox
+```
+
+:::{note}
+You can set up port forwarding at creation time with the `--forward` flag on
+`openshell sandbox create`, which is convenient when you know upfront that
+your workload exposes a service.
+:::
+
+## Delete Sandboxes
+
+Remove sandboxes when they are no longer needed. Deleting a sandbox stops all processes, releases cluster resources, and purges injected credentials.
+
+Delete a sandbox by name:
+
+```console
+$ openshell sandbox delete my-sandbox
+```
+
+You can delete multiple sandboxes in a single command:
+
+```console
+$ openshell sandbox delete sandbox-a sandbox-b sandbox-c
+```
+
+## Next Steps
+
+- {doc}`community-sandboxes`: Use pre-built sandboxes from the community catalog
+- {doc}`providers`: Create and attach credential providers
+- {doc}`custom-containers`: Build and run your own container image
+- {doc}`../safety-and-privacy/policies`: Control what the agent can access
\ No newline at end of file
diff --git a/docs/sandboxes/custom-containers.md b/docs/sandboxes/custom-containers.md
new file mode 100644
index 00000000..80b7878d
--- /dev/null
+++ b/docs/sandboxes/custom-containers.md
@@ -0,0 +1,80 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Custom Containers
+
+Build a custom container image and run it as a OpenShell sandbox.
+
+## Prerequisites
+
+Ensure the following are installed before building custom container sandboxes.
+
+- OpenShell CLI installed (`pip install openshell`)
+- Docker running on your machine
+- A Dockerfile for your workload
+
+## Step 1: Create a Sandbox from Your Dockerfile
+
+Point `--from` at the directory containing your Dockerfile:
+
+```console
+$ openshell sandbox create --from ./my-app --keep --name my-app
+```
+
+The CLI builds the image locally using Docker, pushes it into the cluster, and
+creates the sandbox, all in one step. No external container registry is
+needed.
+
+You can also pass a full container image reference if the image is already
+built:
+
+```console
+$ openshell sandbox create --from my-registry.example.com/my-image:latest --keep --name my-app
+```
+
+## Step 2: Forward Ports
+
+If your container runs a service, forward the port to your host:
+
+```console
+$ openshell forward start 8080 my-app -d
+```
+
+The `-d` flag runs the forward in the background so you can continue using
+your terminal.
+
+## Step 3: Iterate
+
+When you change your Dockerfile, delete the sandbox and recreate:
+
+```console
+$ openshell sandbox delete my-app && \
+    openshell sandbox create --from ./my-app --keep --name my-app
+```
+
+## Shortcut: Create with Forwarding and a Startup Command
+
+You can combine port forwarding and a startup command in a single step:
+
+```console
+$ openshell sandbox create --from ./my-app --forward 8080 --keep -- ./start-server.sh
+```
+
+This creates the sandbox, sets up port forwarding on port 8080, and runs
+`./start-server.sh` as the sandbox command.
+
+:::{warning}
+OpenShell does not support distroless and `FROM scratch` images. The
+supervisor requires glibc, `/proc`, and a shell to operate. Images missing
+`iproute2` or required Linux capabilities will fail to start in proxy mode.
+Ensure your base image includes these dependencies.
+:::
+
+## Next Steps
+
+- {doc}`create-and-manage`: Full sandbox lifecycle commands
+- {doc}`providers`: Attach credentials to your custom container
+- {doc}`/safety-and-privacy/policies`: Write a policy tailored to your workload
+- {doc}`/safety-and-privacy/security-model`: Understand the isolation layers applied to custom images
\ No newline at end of file
diff --git a/docs/sandboxes/index.md b/docs/sandboxes/index.md
new file mode 100644
index 00000000..19340b8e
--- /dev/null
+++ b/docs/sandboxes/index.md
@@ -0,0 +1,51 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# About Sandboxes
+
+A OpenShell sandbox is a safe, private execution environment for an AI agent. Each sandbox runs inside a Kubernetes pod with multiple layers of protection that prevent unauthorized data access, credential exposure, and network exfiltration. Protection layers include filesystem restrictions ([Landlock](https://docs.kernel.org/security/landlock.html)), system call filtering (seccomp), network namespace isolation, and a privacy-enforcing HTTP CONNECT proxy.
+
+## Sandbox Lifecycle
+
+Every sandbox moves through a defined set of phases:
+
+| Phase | Description |
+|---|---|
+| Provisioning | The runtime is setting up the sandbox environment, injecting credentials, and applying your policy. |
+| Ready | The sandbox is running. The agent process is active and all isolation layers are enforced. You can connect, sync files, and view logs. |
+| Error | Something went wrong during provisioning or execution. Check logs with `openshell logs` for details. |
+| Deleting | The sandbox is being torn down. The system releases resources and purges credentials. |
+
+## The OpenShell Runtime
+
+Sandboxes run inside a lightweight runtime cluster that OpenShell manages for
+you. The cluster runs as a [k3s](https://k3s.io/) Kubernetes distribution
+inside a Docker container on your machine.
+
+You do not need to set this up manually. The first time you run a command
+that needs a cluster (such as `openshell sandbox create`), the CLI provisions
+one automatically. This is the "Runtime ready" line you see in the output.
+Subsequent commands reuse the existing cluster.
+
+For teams or when you need more resources, you can deploy the cluster to a
+remote host instead of your local machine:
+
+```console
+$ openshell gateway start --remote user@host
+```
+
+Refer to [Remote Deployment](../about/architecture.md) for
+details. If you have multiple clusters (local and remote), switch between them
+with `openshell gateway select <name>`. Refer to the
+[CLI Reference](../reference/cli.md#gateway-commands) for the full command set.
+
+## Next Steps
+
+- [Create and Manage Sandboxes](create-and-manage.md): The full sandbox lifecycle — create, inspect, connect, monitor, and delete.
+- [Providers](providers.md): Create and attach credential providers.
+- [Custom Containers](custom-containers.md): Build and run your own container image.
+- [Community Sandboxes](community-sandboxes.md): Use pre-built sandboxes from the community catalog.
+- [Terminal](terminal.md): Monitor sandbox status and live activity in a dashboard.
+- To allow or restrict network access (e.g. git push to GitHub, or custom APIs), see [Write Sandbox Policies](../safety-and-privacy/policies.md#network-access-rules).
diff --git a/docs/sandboxes/providers.md b/docs/sandboxes/providers.md
new file mode 100644
index 00000000..b9f5c09f
--- /dev/null
+++ b/docs/sandboxes/providers.md
@@ -0,0 +1,165 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Providers
+
+AI agents typically need credentials to access external services: an API key for the AI model provider, a token for GitHub or GitLab, and so on. OpenShell manages these credentials as first-class entities called *providers*.
+
+Create and manage providers that supply credentials to sandboxes.
+
+## Create a Provider
+
+Providers can be created from local environment variables or with explicit credential values.
+
+### From Local Credentials
+
+The fastest way to create a provider is to let the CLI discover credentials from
+your shell environment:
+
+```console
+$ openshell provider create --name my-claude --type claude --from-existing
+```
+
+This reads `ANTHROPIC_API_KEY` or `CLAUDE_API_KEY` from your current environment
+and stores them in the provider.
+
+### With Explicit Credentials
+
+Supply a credential value directly:
+
+```console
+$ openshell provider create --name my-api --type generic --credential API_KEY=sk-abc123
+```
+
+### Bare Key Form
+
+Pass a key name without a value to read the value from the environment variable
+of that name:
+
+```console
+$ openshell provider create --name my-api --type generic --credential API_KEY
+```
+
+This looks up the current value of `$API_KEY` in your shell and stores it.
+
+## Manage Providers
+
+List, inspect, update, and delete providers from the active cluster.
+
+List all providers:
+
+```console
+$ openshell provider list
+```
+
+Inspect a provider:
+
+```console
+$ openshell provider get my-claude
+```
+
+Update a provider's credentials:
+
+```console
+$ openshell provider update my-claude --type claude --from-existing
+```
+
+Delete a provider:
+
+```console
+$ openshell provider delete my-claude
+```
+
+## Attach Providers to Sandboxes
+
+Pass one or more `--provider` flags when creating a sandbox:
+
+```console
+$ openshell sandbox create --provider my-claude --provider my-github -- claude
+```
+
+Each `--provider` flag attaches one provider. The sandbox receives all
+credentials from every attached provider at runtime.
+
+:::{warning}
+Providers cannot be added to a running sandbox. If you need to attach an
+additional provider, delete the sandbox and recreate it with all required
+providers specified.
+:::
+
+### Auto-Discovery Shortcut
+
+When the trailing command in `openshell sandbox create` is a recognized tool name (`claude`, `codex`, or `opencode`), the CLI auto-creates the required
+provider from your local credentials if one does not already exist. You do not
+need to create the provider separately:
+
+```console
+$ openshell sandbox create -- claude
+```
+
+This detects `claude` as a known tool, finds your `ANTHROPIC_API_KEY`, creates
+a provider, attaches it to the sandbox, and launches Claude Code.
+
+## How Credentials Flow
+
+Credentials follow a secure path from your machine into the agent process.
+
+```{mermaid}
+flowchart LR
+    A["You create a provider"] --> B["Attach provider\nto sandbox at creation"]
+    B --> C["Sandbox starts"]
+    C --> D["Supervisor fetches\ncredentials from gateway"]
+    D --> E["Credentials injected into\nagent process + SSH sessions"]
+
+    style A fill:#ffffff,stroke:#000000,color:#000000
+    style B fill:#ffffff,stroke:#000000,color:#000000
+    style C fill:#76b900,stroke:#000000,color:#000000
+    style D fill:#76b900,stroke:#000000,color:#000000
+    style E fill:#76b900,stroke:#000000,color:#000000
+
+    linkStyle default stroke:#76b900,stroke-width:2px
+```
+
+1. You create a provider with credentials from your environment or
+   specified explicitly.
+2. You attach the provider to a sandbox at creation time using the
+   `--provider` flag (one or more providers can be attached).
+3. The sandbox starts. The supervisor process initializes.
+4. The supervisor fetches credentials from the OpenShell gateway at runtime.
+   The system does not store credentials in the sandbox specification. It retrieves them on demand.
+5. Credentials are injected into the agent process as environment variables.
+   They are also available in SSH sessions when you connect to the sandbox.
+
+:::{warning}
+The system does not store credentials in the sandbox container specification. The supervisor fetches them at runtime and holds them only in process memory. This
+means you cannot find credentials in container inspection, image layers, or
+environment dumps of the container spec.
+:::
+
+## Supported Provider Types
+
+The following provider types are supported.
+
+| Type | Environment Variables Injected | Typical Use |
+|---|---|---|
+| `claude` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | Claude Code, Anthropic API |
+| `codex` | `OPENAI_API_KEY` | OpenAI Codex |
+| `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool |
+| `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API, `gh` CLI |
+| `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI |
+| `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
+| `generic` | User-defined | Any service with custom credentials |
+| `outlook` | *(none: no auto-discovery)* | Microsoft Outlook integration |
+
+:::{tip}
+Use the `generic` type for any service not listed above. You define the
+environment variable names and values yourself with `--credential`.
+:::
+
+## Next Steps
+
+- {doc}`create-and-manage`: Full sandbox lifecycle management
+- {doc}`custom-containers`: Use providers with custom container images
+- {doc}`../safety-and-privacy/security-model`: Why credential isolation matters
\ No newline at end of file
diff --git a/docs/sandboxes/terminal.md b/docs/sandboxes/terminal.md
new file mode 100644
index 00000000..bd847f7b
--- /dev/null
+++ b/docs/sandboxes/terminal.md
@@ -0,0 +1,110 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Terminal
+
+OpenShell Terminal is a terminal dashboard that displays sandbox status and live activity in a single view. Use it to monitor agent behavior, diagnose blocked connections, and observe inference interception in real time.
+
+```console
+$ openshell term
+```
+
+## Sandbox Status
+
+The status pane at the top of the dashboard displays the following sandbox metadata:
+
+- Name and phase (`Provisioning`, `Ready`, `Error`)
+- Image running in the sandbox
+- Providers attached and their available credentials
+- Age since creation
+- Port forwards currently active
+
+A phase other than `Ready` indicates the sandbox is still initializing or has encountered an error. Inspect the logs pane for details.
+
+## Live Log Stream
+
+The logs pane streams activity in real time. Outbound connections, policy decisions, and inference interceptions appear as they occur.
+
+Log entries originate from two sources:
+
+- sandbox: The sandbox supervisor (proxy decisions, policy enforcement, SSH connections, process lifecycle).
+- gateway: The control plane (sandbox creation, phase changes, policy distribution).
+
+Press `f` to enable follow mode and auto-scroll to new entries.
+
+## Diagnosing Blocked Connections
+
+Entries with `action=deny` indicate connections blocked by policy:
+
+```
+22:35:19 sandbox INFO CONNECT action=deny dst_host=registry.npmjs.org dst_port=443
+```
+
+Each deny entry contains the following fields:
+
+| Field | Description |
+|---|---|
+| `action=deny` | Connection was blocked by the network policy. |
+| `dst_host` | Destination host the process attempted to reach. |
+| `dst_port` | Destination port (typically 443 for HTTPS). |
+| `src_addr` | Source address inside the sandbox. |
+| `policy` | Policy rule that was evaluated, or `-` if no rule matched. |
+
+To resolve a blocked connection:
+
+1. Add the host to the network policy if the connection is legitimate. Refer to {doc}`../safety-and-privacy/policies` for the iteration workflow.
+2. Leave it blocked if the connection is unauthorized.
+
+## Diagnosing Inference Interception
+
+Entries with `action=inspect_for_inference` indicate intercepted API calls:
+
+```
+22:35:37 sandbox INFO CONNECT action=inspect_for_inference dst_host=integrate.api.nvidia.com dst_port=443
+22:35:37 sandbox INFO Intercepted inference request, routing locally kind=chat_completion
+```
+
+This sequence indicates:
+
+- No network policy matched the connection (the endpoint and binary combination is not in the policy).
+- Inference routing is configured (`allowed_routes` is non-empty), so the proxy intercepted the call instead of denying it.
+- The proxy TLS-terminated the connection, detected an inference API pattern, and routed the request through the privacy router.
+
+:::{note}
+If these calls should go directly to the destination rather than through inference routing, the most likely cause is a binary path mismatch. The process making the HTTP call does not match any binary listed in the network policy.
+
+Check the log entry for the binary path, then update the `binaries` list in the policy. Refer to [Write Sandbox Policies](../safety-and-privacy/policies.md#network-access-rules) for details on binary matching.
+:::
+
+## Filtering and Navigation
+
+The dashboard provides filtering and navigation controls:
+
+- Press `s` to filter logs by source. Display only `sandbox` logs (policy decisions) or only `gateway` logs (lifecycle events).
+- Press `f` to toggle follow mode. Auto-scroll to the latest entries.
+- Press `Enter` on a log entry to open the detail view with the full message.
+- Use `j` / `k` to navigate up and down the log list.
+
+## Keyboard Shortcuts
+
+The following keyboard shortcuts are available in the terminal dashboard.
+
+| Key | Action |
+|---|---|
+| `j` / `k` | Navigate down / up in the log list. |
+| `Enter` | Open detail view for the selected entry. |
+| `g` / `G` | Jump to top / bottom. |
+| `f` | Toggle follow mode (auto-scroll to new entries). |
+| `s` | Open source filter (sandbox, gateway, or all). |
+| `Esc` | Return to the main view / close detail view. |
+| `q` | Quit. |
+
+## Related Topics
+
+For deeper dives into topics covered by the terminal dashboard, refer to the following guides.
+
+- Blocked connections: Follow {doc}`../safety-and-privacy/policies` to pull the current policy, add the missing endpoint, and push an update without restarting the sandbox.
+- Inference interception: Refer to [Write Sandbox Policies](../safety-and-privacy/policies.md#how-network-access-is-evaluated) for the distinction between agent traffic (routed directly) and userland traffic (routed through inference routing).
+- Troubleshooting: Refer to {doc}`../troubleshooting` for troubleshooting tips and diagnostics.
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
new file mode 100644
index 00000000..e88f9ac5
--- /dev/null
+++ b/docs/troubleshooting.md
@@ -0,0 +1,135 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Troubleshooting
+
+Use this guide to troubleshoot problems with OpenShell.
+
+## Cluster Issues
+
+Troubleshoot problems with deploying, connecting to, and running OpenShell clusters.
+
+### Cluster Deploy Fails
+
+**Symptom:** `openshell gateway start` exits with an error.
+
+**Check:**
+1. Is Docker running? The cluster requires Docker to be active.
+2. Is the port already in use? Try a different port: `--port 8081`.
+3. Does a stale container exist? Destroy and redeploy: `openshell gateway destroy && openshell gateway start`.
+
+### Cluster Not Reachable
+
+**Symptom:** `openshell status` fails to connect.
+
+**Check:**
+1. Is the cluster container running? `docker ps | grep openshell`.
+2. Was the cluster stopped? Redeploy: `openshell gateway start`.
+3. For remote clusters, is the SSH connection working?
+
+### Health Check Fails During Deploy
+
+**Symptom:** Deploy hangs or times out waiting for health checks.
+
+**Check:**
+1. View container logs: `docker logs openshell-cluster`.
+2. Check if k3s started: the bootstrap process waits up to 180 attempts (six minutes) for cluster readiness.
+3. Look for resource constraints. k3s needs sufficient memory and disk.
+
+## Sandbox Issues
+
+Troubleshoot problems with creating, connecting to, and configuring sandboxes.
+
+### Sandbox Stuck in Provisioning
+
+**Symptom:** Sandbox shows `Provisioning` status and does not become `Ready`.
+
+**Check:**
+1. View sandbox logs: `openshell logs <name> --source gateway`.
+2. Check if the container image can be pulled.
+3. For custom images, verify the image was pushed: `openshell sandbox image push`.
+
+### Cannot Connect to Sandbox
+
+**Symptom:** `openshell sandbox connect <name>` fails.
+
+**Check:**
+1. Is the sandbox in `Ready` state? `openshell sandbox get <name>`.
+2. Is SSH accessible? The tunnel goes through the gateway. Verify cluster connectivity first.
+
+### Network Requests Denied
+
+**Symptom:** The agent cannot reach a remote host.
+
+**Check:**
+1. Stream sandbox logs: `openshell logs <name> --tail --source sandbox`.
+2. Look for `deny` actions. They include the destination, binary, and reason.
+3. Update the policy to allow the blocked endpoint. Refer to [Policy Iteration Loop](safety-and-privacy/policies.md#the-policy-iteration-loop).
+
+### Policy Update Fails
+
+**Symptom:** `openshell policy set` returns an error or the status shows `failed`.
+
+**Check:**
+1. Are you changing a static field? `filesystem_policy`, `landlock`, and `process` cannot change after creation.
+2. Are you adding/removing `network_policies` to change the network mode? This is not allowed. The mode is fixed at creation.
+3. Check the error message in `openshell policy list <name>`.
+
+## Provider Issues
+
+Troubleshoot problems with provider credential discovery and injection into sandboxes.
+
+### Provider Discovery Finds No Credentials
+
+**Symptom:** `--from-existing` creates a provider with no credentials.
+
+**Check:**
+1. Are the expected environment variables set? (for example, `ANTHROPIC_API_KEY` for Claude).
+2. Do the expected config files exist? (for example, `~/.claude.json`).
+3. Try explicit credentials: `--credential ANTHROPIC_API_KEY=sk-...`.
+
+### Sandbox Missing Credentials
+
+**Symptom:** Environment variables for a provider are not set inside the sandbox.
+
+**Check:**
+1. Was the provider attached? `openshell sandbox get <name>`. Check the providers list.
+2. Does the provider have credentials? `openshell provider get <name>`.
+3. Are the credential keys valid env var names? Keys with dots, dashes, or spaces are silently skipped.
+
+## Custom Container Issues
+
+Troubleshoot problems with building and running custom container images in sandboxes.
+
+### Custom Image Fails to Start
+
+**Symptom:** Sandbox with `--from <image>` goes to `Error` state.
+
+**Check:**
+1. Is the image pushed to the cluster? `openshell sandbox image push --dockerfile ./Dockerfile --tag my-image`.
+2. Does the image have glibc and `/proc`? Distroless / `FROM scratch` images are not supported.
+3. For proxy mode, does the image have `iproute2`? Network namespace setup requires it.
+
+## Port Forwarding Issues
+
+Troubleshoot problems with forwarding local ports into sandbox services.
+
+### Port Forward Not Working
+
+**Symptom:** `localhost:<port>` does not connect to the sandbox service.
+
+**Check:**
+1. Is the forward running? `openshell forward list`.
+2. Is the service listening on that port inside the sandbox?
+3. Is the sandbox still in `Ready` state?
+4. Try stopping and restarting: `openshell forward stop <port> <name> && openshell forward start <port> <name> -d`.
+
+## Getting More Information
+
+Use these techniques to gather additional diagnostic detail when troubleshooting.
+
+- Increase CLI verbosity: `openshell -vvv <command>` for trace-level output.
+- View gateway-side logs: `openshell logs <name> --source gateway`.
+- View sandbox-side logs: `openshell logs <name> --source sandbox --level debug`.
diff --git a/docs/troubleshooting/index.md b/docs/troubleshooting/index.md
deleted file mode 100644
index 093695e1..00000000
--- a/docs/troubleshooting/index.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<!--
-  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-  SPDX-License-Identifier: Apache-2.0
--->
-
-# Troubleshooting
diff --git a/docs/tutorials/claude-code.md b/docs/tutorials/claude-code.md
new file mode 100644
index 00000000..c8a63ef1
--- /dev/null
+++ b/docs/tutorials/claude-code.md
@@ -0,0 +1,126 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Launch Claude Code
+
+This tutorial walks you through the simplest path to running Claude Code inside a NemoClaw sandbox. By the end, you will have an isolated environment with Claude Code running, your credentials securely injected, and a default policy controlling what the agent can access.
+
+**What you will learn:**
+
+- Creating a sandbox with a single command
+- How NemoClaw auto-discovers provider credentials
+- What the default policy allows and denies
+- Connecting to a sandbox and working inside it
+
+## Step 1: Install NemoClaw
+
+Install the CLI from PyPI:
+
+```console
+$ pip install nemoclaw
+```
+
+:::{tip}
+You also need Docker running on your machine.
+:::
+
+## Step 2: Create a Sandbox
+
+Run the following command:
+
+```console
+$ nemoclaw sandbox create -- claude
+```
+
+This single command does several things:
+
+1. **Bootstraps the runtime.** If this is your first time using NemoClaw, the CLI provisions a local k3s cluster inside Docker and deploys the NemoClaw control plane. This happens once --- subsequent commands reuse the existing cluster.
+2. **Auto-discovers credentials.** The CLI detects that `claude` is a recognized tool and looks for your Anthropic credentials. It reads the `ANTHROPIC_API_KEY` environment variable and creates a provider automatically.
+3. **Creates the sandbox.** An isolated environment is provisioned with the default policy applied. The policy allows Claude Code to reach `api.anthropic.com` and a small set of supporting endpoints while blocking everything else.
+4. **Drops you into the sandbox.** You land in an interactive SSH session inside the sandbox, ready to work.
+
+:::{note}
+The first bootstrap takes a few minutes depending on your network speed. The CLI prints progress as each component starts. Subsequent sandbox creations are much faster.
+:::
+
+## Step 3: Work Inside the Sandbox
+
+You are now in an SSH session inside the sandbox. Start Claude Code:
+
+```console
+$ claude
+```
+
+Your credentials are available as environment variables inside the sandbox. You can verify this:
+
+```console
+$ echo $ANTHROPIC_API_KEY
+sk-ant-...
+```
+
+The sandbox has a working directory at `/sandbox` where you can create and edit files. Claude Code has access to standard development tools --- git, common language runtimes, and package managers --- within the boundaries set by the policy.
+
+## Step 4: Check Sandbox Status
+
+Open a second terminal on your host machine. You can inspect running sandboxes from there.
+
+List all sandboxes:
+
+```console
+$ nemoclaw sandbox list
+```
+
+For a live dashboard view, launch the NemoClaw Terminal:
+
+```console
+$ nemoclaw gator
+```
+
+The terminal dashboard shows sandbox status, active network connections, and policy decisions in real time.
+
+## Step 5: Connect from VS Code
+
+If you prefer to work in VS Code rather than a terminal, you can connect via Remote-SSH.
+
+First, export the sandbox's SSH configuration:
+
+```console
+$ nemoclaw sandbox ssh-config my-sandbox >> ~/.ssh/config
+```
+
+Then open VS Code, install the **Remote - SSH** extension if you have not already, and connect to the host named `my-sandbox`. VS Code opens a full editor session inside the sandbox.
+
+:::{tip}
+Replace `my-sandbox` with the actual name of your sandbox. Run `nemoclaw sandbox list` to find it if you did not specify a name at creation time.
+:::
+
+## Step 6: Clean Up
+
+When you are done, exit the sandbox shell:
+
+```console
+$ exit
+```
+
+Then delete the sandbox:
+
+```console
+$ nemoclaw sandbox delete my-sandbox
+```
+
+:::{tip}
+Use the `--keep` flag when you want the sandbox to stay alive after the command exits. This is useful when you plan to connect later or want to iterate on the policy while the sandbox runs.
+
+```console
+$ nemoclaw sandbox create --keep -- claude
+```
+:::
+
+## Next Steps
+
+- {doc}`../sandboxes/create-and-manage` --- understand the isolation model and sandbox lifecycle
+- {doc}`../sandboxes/providers` --- how credentials are injected without exposing them to agent code
+- {doc}`../safety-and-privacy/policies` --- learn how the default policy works and how to customize it
+- {doc}`../safety-and-privacy/network-access-rules` --- dig into the network proxy and per-endpoint rules
diff --git a/docs/tutorials/openclaw.md b/docs/tutorials/openclaw.md
new file mode 100644
index 00000000..85dddfb7
--- /dev/null
+++ b/docs/tutorials/openclaw.md
@@ -0,0 +1,104 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Set Up OpenClaw
+
+This tutorial shows you how to launch a community sandbox using the `--from` flag. Community sandboxes are pre-built configurations published to the [NemoClaw Community](https://github.com/NVIDIA/NemoClaw-Community) repository --- they bundle a container image, a tailored policy, and optional skills into a single package you can run with one command.
+
+**What you will learn:**
+
+- What community sandboxes are and how they differ from default sandboxes
+- How the `--from` flag pulls and builds a complete sandbox configuration
+- How to inspect the bundled policy that ships with a community sandbox
+
+## Prerequisites
+
+Before you begin, make sure you have:
+
+- **Docker** running on your machine
+- **NVIDIA GPU with drivers** installed (required for GPU-accelerated workloads in the OpenClaw sandbox)
+- **NemoClaw CLI** installed (`pip install nemoclaw`)
+
+## Step 1: Create a Sandbox from the Community Image
+
+Run the following command:
+
+```console
+$ nemoclaw sandbox create --from openclaw --keep
+```
+
+The `--from` flag tells the CLI to pull a sandbox definition from the NemoClaw Community catalog. Here is what happens:
+
+1. **Fetches the definition.** The CLI downloads the OpenClaw sandbox definition from the NemoClaw-Community repository. This includes a Dockerfile, a policy YAML, and any bundled skills.
+2. **Builds the image.** The Dockerfile is built locally via Docker. The image includes all tools and dependencies that OpenClaw needs.
+3. **Applies the bundled policy.** Instead of the generic default policy, the sandbox starts with a policy specifically written for the OpenClaw workload --- it allows the endpoints and binaries that OpenClaw requires.
+4. **Creates and keeps the sandbox.** The `--keep` flag ensures the sandbox stays running after creation so you can connect and disconnect freely.
+
+:::{note}
+The first build takes longer because Docker needs to pull base layers and install dependencies. Subsequent creates reuse the cached image.
+:::
+
+## Step 2: Connect to the Sandbox
+
+Once the sandbox is running, connect to it:
+
+```console
+$ nemoclaw sandbox connect <name>
+```
+
+Replace `<name>` with the sandbox name shown in the creation output. If you did not specify a name with `--name`, the CLI assigns one automatically --- run `nemoclaw sandbox list` to find it.
+
+## Step 3: Explore the Environment
+
+The sandbox comes pre-configured for the OpenClaw workload. The tools, runtimes, and libraries that OpenClaw needs are already installed in the container image. The policy is tuned to allow the specific network endpoints and operations that OpenClaw uses, so you can start working immediately without policy adjustments.
+
+## Step 4: Check the Bundled Policy
+
+To see exactly what the sandbox is allowed to do, pull the full policy:
+
+```console
+$ nemoclaw sandbox policy get <name> --full
+```
+
+This outputs the complete policy YAML, including:
+
+- **Network policies** --- which hosts and ports the sandbox can reach, and which binaries are allowed to initiate those connections
+- **Filesystem policy** --- which paths are read-only and which are read-write
+- **Process restrictions** --- the user and group the sandbox runs as
+- **Inference rules** --- which inference routing hints are allowed
+
+Reviewing the bundled policy is a good way to understand what a community sandbox has access to before you start using it for sensitive work.
+
+:::{tip}
+You can save the policy to a file for reference or as a starting point for customization:
+
+```console
+$ nemoclaw sandbox policy get <name> --full > openclaw-policy.yaml
+```
+:::
+
+## Step 5: Clean Up
+
+When you are finished, exit the sandbox if you are connected:
+
+```console
+$ exit
+```
+
+Then delete it:
+
+```console
+$ nemoclaw sandbox delete <name>
+```
+
+:::{note}
+The NemoClaw Community repository accepts contributions. If you build a sandbox configuration that would be useful to others, you can submit it to the [NemoClaw-Community](https://github.com/NVIDIA/NemoClaw-Community) repository.
+:::
+
+## Next Steps
+
+- {doc}`../sandboxes/community-sandboxes` --- full reference on community sandbox definitions, available images, and how to contribute your own
+- {doc}`../safety-and-privacy/policies` --- understand the policy format and how to customize what a sandbox can do
+- {doc}`../sandboxes/create-and-manage` --- the isolation model and lifecycle behind every sandbox
diff --git a/docs/tutorials/opencode-nvidia.md b/docs/tutorials/opencode-nvidia.md
new file mode 100644
index 00000000..c3d65460
--- /dev/null
+++ b/docs/tutorials/opencode-nvidia.md
@@ -0,0 +1,264 @@
+<!--
+  SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+# Run opencode with NVIDIA Inference
+
+This tutorial walks through a realistic setup where you run [opencode](https://opencode.ai) inside a NemoClaw sandbox with inference routed to NVIDIA API endpoints. Along the way, you will hit policy problems, diagnose them from logs, write a custom policy, and configure inference routing --- the full policy iteration loop.
+
+**What you will learn:**
+
+- Creating a provider manually with `--from-existing`
+- Writing a custom policy to replace the defaults
+- Reading sandbox logs to diagnose denied actions
+- The difference between agent traffic and userland inference
+- Setting up inference routes for code running inside the sandbox
+
+## Prerequisites
+
+Before you begin:
+
+- **`NVIDIA_API_KEY` environment variable** set on your host machine with a valid NVIDIA API key
+- **NemoClaw CLI** installed (`pip install nemoclaw`)
+
+## Step 1: Create the Provider
+
+Unlike the Claude Code tutorial where the CLI auto-discovered credentials, here you create a provider explicitly. This gives you control over the provider name and type.
+
+```console
+$ nemoclaw provider create --name nvidia --type nvidia --from-existing
+```
+
+The `--from-existing` flag tells the CLI to discover credentials from your local environment. It finds `NVIDIA_API_KEY` and stores it securely. The provider is now available to attach to any sandbox.
+
+Verify it was created:
+
+```console
+$ nemoclaw provider list
+```
+
+## Step 2: Create the Sandbox
+
+Create a sandbox with the NVIDIA provider attached and opencode as the startup command:
+
+```console
+$ nemoclaw sandbox create --name opencode-sandbox --provider nvidia --keep -- opencode
+```
+
+The `--keep` flag keeps the sandbox alive after you exit, which you will need for the iteration steps ahead. The CLI creates the sandbox with the default policy, injects the NVIDIA credentials, and starts opencode.
+
+## Step 3: Hit a Problem
+
+Try using opencode inside the sandbox. You will likely find that calls to NVIDIA inference endpoints fail or behave unexpectedly. The default policy is designed around Claude Code, not opencode.
+
+Open a second terminal and check the logs:
+
+```console
+$ nemoclaw sandbox logs opencode-sandbox --tail
+```
+
+Or launch the NemoClaw Terminal for a live view:
+
+```console
+$ nemoclaw gator
+```
+
+Look for lines like these in the output:
+
+```
+action=deny  host=integrate.api.nvidia.com  binary=/usr/local/bin/opencode  reason="no matching network policy"
+action=deny  host=opencode.ai               binary=/usr/bin/node            reason="no matching network policy"
+action=inspect_for_inference  host=integrate.api.nvidia.com  binary=/bin/bash
+```
+
+These log entries tell you exactly what is being blocked and why.
+
+## Step 4: Understand Why
+
+The default policy has a `nvidia_inference` network policy entry, but it is configured for a narrow set of binaries --- typically `/usr/local/bin/claude` and `/usr/bin/node`. If opencode makes HTTP calls through a different binary (its own binary, `curl`, or a shell subprocess), those connections do not match any policy rule and get denied.
+
+There are two separate problems:
+
+1. **opencode's own traffic.** opencode contacts `opencode.ai` for its API and `integrate.api.nvidia.com` for inference. Neither of these endpoints has a matching policy entry for the binaries opencode uses.
+2. **No opencode.ai endpoint.** The default policy has no entry for `opencode.ai` at all. Even if the binary matched, the destination is not listed.
+
+This is the expected behavior --- NemoClaw denies by default. You need to write a policy that explicitly allows what opencode needs.
+
+## Step 5: Write a Custom Policy
+
+Create a file called `opencode-policy.yaml` with the following content:
+
+```yaml
+version: 1
+inference:
+  allowed_routes:
+    - nvidia
+filesystem_policy:
+  include_workdir: true
+  read_only:
+    - /usr
+    - /lib
+    - /proc
+    - /dev/urandom
+    - /app
+    - /etc
+    - /var/log
+  read_write:
+    - /sandbox
+    - /tmp
+    - /dev/null
+landlock:
+  compatibility: best_effort
+process:
+  run_as_user: sandbox
+  run_as_group: sandbox
+network_policies:
+  opencode_api:
+    name: opencode-api
+    endpoints:
+      - host: opencode.ai
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: full
+    binaries:
+      - path: /usr/local/bin/opencode
+      - path: /usr/bin/node
+  nvidia_inference:
+    name: nvidia-inference
+    endpoints:
+      - host: integrate.api.nvidia.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: full
+    binaries:
+      - path: /usr/local/bin/opencode
+      - path: /usr/bin/node
+      - path: /usr/bin/curl
+      - path: /bin/bash
+  npm_registry:
+    name: npm-registry
+    endpoints:
+      - host: registry.npmjs.org
+        port: 443
+    binaries:
+      - path: /usr/bin/npm
+      - path: /usr/bin/node
+      - path: /usr/local/bin/npm
+      - path: /usr/local/bin/node
+  github_rest_api:
+    name: github-rest-api
+    endpoints:
+      - host: api.github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        access: read-only
+    binaries:
+      - path: /usr/local/bin/opencode
+      - path: /usr/bin/node
+      - path: /usr/bin/gh
+  github_ssh_over_https:
+    name: github-ssh-over-https
+    endpoints:
+      - host: github.com
+        port: 443
+        protocol: rest
+        tls: terminate
+        enforcement: enforce
+        rules:
+          - allow:
+              method: GET
+              path: "/**/info/refs*"
+          - allow:
+              method: POST
+              path: "/**/git-upload-pack"
+    binaries:
+      - path: /usr/bin/git
+```
+
+Compared to the default policy, this adds:
+
+- **`opencode_api`** --- allows opencode and Node.js to reach `opencode.ai:443`
+- **Broader `nvidia_inference` binaries** --- adds `/usr/local/bin/opencode`, `/usr/bin/curl`, and `/bin/bash` so opencode's subprocesses can reach the NVIDIA endpoint
+- **`inference.allowed_routes`** --- includes `nvidia` so inference routing works for userland code
+- **GitHub access** scoped for opencode's git operations
+
+:::{warning}
+The `filesystem_policy`, `landlock`, and `process` sections are static --- they are set at sandbox creation time and cannot be changed on a running sandbox. If you need to modify these, you must delete and recreate the sandbox. The `network_policies` and `inference` sections are dynamic and can be hot-reloaded.
+:::
+
+## Step 6: Push the Policy
+
+Apply your custom policy to the running sandbox:
+
+```console
+$ nemoclaw sandbox policy set opencode-sandbox --policy opencode-policy.yaml --wait
+```
+
+The `--wait` flag blocks until the sandbox confirms the policy is loaded. You will see output indicating success or failure.
+
+Verify the policy revision was accepted:
+
+```console
+$ nemoclaw sandbox policy list opencode-sandbox
+```
+
+The latest revision should show status `loaded`.
+
+## Step 7: Set Up Inference Routing
+
+So far, you have allowed the opencode *agent* to reach `integrate.api.nvidia.com` directly via network policy. But what about code that opencode writes and runs inside the sandbox? If that code calls an LLM API, it goes through the privacy router --- a separate mechanism.
+
+Create an inference route so userland code can access NVIDIA models:
+
+```console
+$ nemoclaw inference create \
+  --routing-hint nvidia \
+  --base-url https://integrate.api.nvidia.com \
+  --model-id z-ai/glm5 \
+  --api-key $NVIDIA_API_KEY
+```
+
+The policy you wrote in Step 5 already includes `nvidia` in `inference.allowed_routes`, so you do not need to push a policy update. If you had not included it, you would add it to the policy and push again:
+
+```console
+$ nemoclaw sandbox policy set opencode-sandbox --policy opencode-policy.yaml --wait
+```
+
+:::{note}
+The distinction matters: **network policies** control which hosts the agent binary can reach directly. **Inference routes** control where LLM API calls from userland code (scripts, notebooks, applications the agent writes) get routed. They are two separate enforcement points.
+:::
+
+## Step 8: Verify
+
+Tail the logs again:
+
+```console
+$ nemoclaw sandbox logs opencode-sandbox --tail
+```
+
+You should no longer see `action=deny` lines for the endpoints you added. Connections to `opencode.ai`, `integrate.api.nvidia.com`, and GitHub should show `action=allow`.
+
+If you still see denials, read the log line carefully --- it tells you the exact host, port, and binary that was blocked. Add the missing entry to your policy and push again. This observe-modify-push cycle is the policy iteration loop, and it is the normal workflow for getting a new tool running in NemoClaw.
+
+## Clean Up
+
+When you are done:
+
+```console
+$ nemoclaw sandbox delete opencode-sandbox
+```
+
+## Next Steps
+
+- {doc}`../safety-and-privacy/policies` --- full reference on policy YAML structure, static vs dynamic fields, and enforcement modes
+- {doc}`../safety-and-privacy/network-access-rules` --- how the proxy evaluates network rules, L4 vs L7 inspection, and TLS termination
+- {doc}`../inference/index` --- inference route configuration, protocol detection, and transparent rerouting
+- {doc}`../sandboxes/providers` --- provider types, credential discovery, and manual vs automatic creation
+- {doc}`../safety-and-privacy/security-model` --- the four protection layers and how they interact
diff --git a/e2e/bash/test_port_forward.sh b/e2e/bash/test_port_forward.sh
deleted file mode 100755
index 67b5214a..00000000
--- a/e2e/bash/test_port_forward.sh
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env bash
-
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Integration test for port forwarding through a sandbox.
-#
-# Prerequisites:
-#   - A running nemoclaw cluster (nemoclaw cluster admin deploy)
-#   - The `nemoclaw` binary on PATH (or set NAV_BIN)
-#
-# Usage:
-#   ./e2e/bash/test_port_forward.sh
-
-set -euo pipefail
-
-###############################################################################
-# Configuration
-###############################################################################
-
-# Resolve the nemoclaw binary: prefer NAV_BIN, then target/debug, then PATH.
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-
-if [[ -n "${NAV_BIN:-}" ]]; then
-  NAV="${NAV_BIN}"
-elif [[ -x "${PROJECT_ROOT}/target/debug/nemoclaw" ]]; then
-  NAV="${PROJECT_ROOT}/target/debug/nemoclaw"
-else
-  NAV="nemoclaw"
-fi
-
-FORWARD_PORT="${FORWARD_PORT:-19876}"
-TIMEOUT_FORWARD="${TIMEOUT_FORWARD:-30}"
-SANDBOX_NAME=""
-FORWARD_PID=""
-CREATE_PID=""
-
-###############################################################################
-# Helpers
-###############################################################################
-
-info()  { printf '==> %s\n' "$*" >&2; }
-error() { printf 'ERROR: %s\n' "$*" >&2; }
-
-# Strip ANSI escape codes from stdin.
-strip_ansi() {
-  sed $'s/\x1b\\[[0-9;]*m//g'
-}
-
-# Wait for a TCP port to accept connections.
-wait_for_port() {
-  local host=$1 port=$2 timeout=$3
-  local i
-  for i in $(seq 1 "${timeout}"); do
-    if (echo >/dev/tcp/"${host}"/"${port}") 2>/dev/null; then
-      return 0
-    fi
-    sleep 1
-  done
-  return 1
-}
-
-# Kill a process and all of its children.
-kill_tree() {
-  local pid=$1
-  # Kill children first (best-effort).
-  pkill -P "${pid}" 2>/dev/null || true
-  kill "${pid}" 2>/dev/null || true
-  wait "${pid}" 2>/dev/null || true
-}
-
-cleanup() {
-  local exit_code=$?
-
-  if [[ -n "${FORWARD_PID}" ]]; then
-    info "Stopping port-forward (pid ${FORWARD_PID})"
-    kill_tree "${FORWARD_PID}"
-  fi
-
-  if [[ -n "${CREATE_PID}" ]]; then
-    info "Stopping sandbox create (pid ${CREATE_PID})"
-    kill_tree "${CREATE_PID}"
-  fi
-
-  if [[ -n "${SANDBOX_NAME}" ]]; then
-    info "Deleting sandbox ${SANDBOX_NAME}"
-    "${NAV}" sandbox delete "${SANDBOX_NAME}" 2>/dev/null || true
-  fi
-
-  if [[ ${exit_code} -eq 0 ]]; then
-    info "PASS"
-  else
-    error "FAIL (exit ${exit_code})"
-  fi
-  exit "${exit_code}"
-}
-
-trap cleanup EXIT
-
-# Verify the test port is not already in use.
-if (echo >/dev/tcp/127.0.0.1/"${FORWARD_PORT}") 2>/dev/null; then
-  error "Port ${FORWARD_PORT} is already in use; choose a different FORWARD_PORT"
-  exit 1
-fi
-
-###############################################################################
-# Step 1 — Create a sandbox with a long-running TCP echo server.
-#
-# The echo server runs as the foreground process of `sandbox create --keep`.
-# This ensures it stays alive for the duration of the test. We run the
-# create command in the background and parse its output for the sandbox name.
-###############################################################################
-
-info "Creating sandbox with TCP echo server on port ${FORWARD_PORT}"
-
-CREATE_LOG=$(mktemp)
-
-"${NAV}" sandbox create --keep -- \
-  python3 -c "
-import socket, sys, signal, os
-signal.signal(signal.SIGHUP, signal.SIG_IGN)
-signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
-port = ${FORWARD_PORT}
-sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-sock.bind(('127.0.0.1', port))
-sock.listen(1)
-sock.settimeout(300)
-print('echo-server-ready', flush=True)
-try:
-    while True:
-        conn, _ = sock.accept()
-        data = conn.recv(4096)
-        if data:
-            conn.sendall(b'echo:' + data)
-        conn.close()
-except (socket.timeout, OSError):
-    pass
-finally:
-    sock.close()
-" > "${CREATE_LOG}" 2>&1 &
-
-CREATE_PID=$!
-
-# Wait for the sandbox to be created and the echo server to start.
-# We poll the log file for the sandbox name and the ready marker.
-info "Waiting for sandbox to be ready"
-for i in $(seq 1 120); do
-  if [[ -f "${CREATE_LOG}" ]] && grep -q 'echo-server-ready' "${CREATE_LOG}" 2>/dev/null; then
-    break
-  fi
-  if ! kill -0 "${CREATE_PID}" 2>/dev/null; then
-    error "Sandbox create exited prematurely"
-    cat "${CREATE_LOG}" >&2
-    exit 1
-  fi
-  sleep 1
-done
-
-if ! grep -q 'echo-server-ready' "${CREATE_LOG}" 2>/dev/null; then
-  error "Echo server did not become ready within 120s"
-  cat "${CREATE_LOG}" >&2
-  exit 1
-fi
-
-# Parse sandbox name from the create log.
-SANDBOX_NAME=$(
-  strip_ansi < "${CREATE_LOG}" | awk '/Name:/ { print $NF }'
-)
-
-if [[ -z "${SANDBOX_NAME}" ]]; then
-  error "Could not parse sandbox name from create output"
-  cat "${CREATE_LOG}" >&2
-  exit 1
-fi
-
-info "Sandbox created: ${SANDBOX_NAME}"
-
-###############################################################################
-# Step 2 — Start port forwarding in the background.
-###############################################################################
-
-info "Starting port forward ${FORWARD_PORT} -> ${SANDBOX_NAME}"
-
-"${NAV}" sandbox forward start "${FORWARD_PORT}" "${SANDBOX_NAME}" &
-FORWARD_PID=$!
-
-# Wait for the local port to become available.
-info "Waiting for local port ${FORWARD_PORT} to open"
-if ! wait_for_port 127.0.0.1 "${FORWARD_PORT}" "${TIMEOUT_FORWARD}"; then
-  if ! kill -0 "${FORWARD_PID}" 2>/dev/null; then
-    error "Port-forward process exited prematurely"
-  else
-    error "Local port ${FORWARD_PORT} did not open within ${TIMEOUT_FORWARD}s"
-  fi
-  exit 1
-fi
-
-info "Port ${FORWARD_PORT} is open"
-
-# Give the SSH tunnel a moment to fully establish the direct-tcpip channel.
-sleep 2
-
-###############################################################################
-# Step 3 — Send data through the forwarded port and verify the response.
-#
-# We retry a few times to handle transient tunnel setup delays.
-###############################################################################
-
-info "Sending test payload through forwarded port"
-
-EXPECTED="echo:hello-nav"
-RESPONSE_TRIMMED=""
-
-for attempt in $(seq 1 5); do
-  RESPONSE=$(
-    python3 -c "
-import socket, sys
-s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-s.settimeout(10)
-try:
-    s.connect(('127.0.0.1', ${FORWARD_PORT}))
-    s.sendall(b'hello-nav\n')
-    data = s.recv(4096)
-    sys.stdout.write(data.decode())
-except Exception:
-    pass
-finally:
-    s.close()
-" 2>/dev/null
-  ) || true
-
-  RESPONSE_TRIMMED=$(printf '%s' "${RESPONSE}" | tr -d '\r\n')
-
-  if [[ "${RESPONSE_TRIMMED}" == "${EXPECTED}"* ]]; then
-    break
-  fi
-
-  info "Attempt ${attempt}: no valid response yet, retrying in 2s..."
-  sleep 2
-done
-
-if [[ "${RESPONSE_TRIMMED}" != "${EXPECTED}"* ]]; then
-  error "Unexpected response: '${RESPONSE_TRIMMED}' (expected '${EXPECTED}')"
-  exit 1
-fi
-
-info "Received expected response: '${RESPONSE_TRIMMED}'"
-
-###############################################################################
-# Cleanup is handled by the EXIT trap.
-###############################################################################
diff --git a/e2e/bash/test_sandbox_custom_image.sh b/e2e/bash/test_sandbox_custom_image.sh
deleted file mode 100755
index b7d6973e..00000000
--- a/e2e/bash/test_sandbox_custom_image.sh
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env bash
-
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Integration test for building a custom container image and running a sandbox
-# with it.
-#
-# Verifies the full flow:
-#   nemoclaw sandbox create --from <Dockerfile> -- <cmd>
-#
-# Prerequisites:
-#   - A running nemoclaw cluster (nemoclaw cluster admin deploy)
-#   - Docker daemon running (for image build)
-#   - The `nemoclaw` binary on PATH (or set NAV_BIN)
-#
-# Usage:
-#   ./e2e/bash/test_sandbox_custom_image.sh
-
-set -euo pipefail
-
-###############################################################################
-# Configuration
-###############################################################################
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-
-if [[ -n "${NAV_BIN:-}" ]]; then
-  NAV="${NAV_BIN}"
-elif [[ -x "${PROJECT_ROOT}/target/debug/nemoclaw" ]]; then
-  NAV="${PROJECT_ROOT}/target/debug/nemoclaw"
-else
-  NAV="nemoclaw"
-fi
-
-SANDBOX_NAME=""
-TMPDIR_ROOT=""
-
-###############################################################################
-# Helpers
-###############################################################################
-
-info()  { printf '==> %s\n' "$*" >&2; }
-error() { printf 'ERROR: %s\n' "$*" >&2; }
-
-strip_ansi() {
-  sed $'s/\x1b\\[[0-9;]*m//g'
-}
-
-cleanup() {
-  local exit_code=$?
-
-  if [[ -n "${SANDBOX_NAME}" ]]; then
-    info "Deleting sandbox ${SANDBOX_NAME}"
-    "${NAV}" sandbox delete "${SANDBOX_NAME}" 2>/dev/null || true
-  fi
-
-  if [[ -n "${TMPDIR_ROOT}" && -d "${TMPDIR_ROOT}" ]]; then
-    rm -rf "${TMPDIR_ROOT}"
-  fi
-
-  if [[ ${exit_code} -eq 0 ]]; then
-    info "PASS"
-  else
-    error "FAIL (exit ${exit_code})"
-  fi
-  exit "${exit_code}"
-}
-
-trap cleanup EXIT
-
-###############################################################################
-# Step 1 — Create a minimal Dockerfile for testing
-###############################################################################
-
-info "Creating temporary Dockerfile"
-
-TMPDIR_ROOT=$(mktemp -d)
-DOCKERFILE="${TMPDIR_ROOT}/Dockerfile"
-
-cat > "${DOCKERFILE}" <<'DOCKERFILE_CONTENT'
-FROM python:3.12-slim
-
-# Create the sandbox user/group so the supervisor can switch to it.
-RUN groupadd -g 1000 sandbox && \
-    useradd -m -u 1000 -g sandbox sandbox
-
-# Write a marker file so we can verify this is our custom image.
-RUN echo "custom-image-e2e-marker" > /opt/marker.txt
-
-CMD ["sleep", "infinity"]
-DOCKERFILE_CONTENT
-
-###############################################################################
-# Step 2 — Create a sandbox from the Dockerfile and verify it works
-###############################################################################
-
-info "Creating sandbox from Dockerfile"
-
-CREATE_LOG=$(mktemp)
-if ! "${NAV}" sandbox create \
-    --from "${DOCKERFILE}" \
-    -- cat /opt/marker.txt \
-    > "${CREATE_LOG}" 2>&1; then
-  error "Sandbox create failed"
-  cat "${CREATE_LOG}" >&2
-  exit 1
-fi
-
-# Parse sandbox name from the create output for cleanup.
-SANDBOX_NAME=$(
-  strip_ansi < "${CREATE_LOG}" | awk '/Name:/ { print $NF }'
-) || true
-
-info "Verifying marker file from custom image"
-
-# The sandbox ran `cat /opt/marker.txt` — check that the expected marker
-# appears in the output.
-if ! strip_ansi < "${CREATE_LOG}" | grep -q "custom-image-e2e-marker"; then
-  error "Marker file content not found in sandbox output"
-  cat "${CREATE_LOG}" >&2
-  exit 1
-fi
-
-info "Custom image marker verified"
-
-###############################################################################
-# Cleanup is handled by the EXIT trap.
-###############################################################################
diff --git a/e2e/bash/test_sandbox_sync.sh b/e2e/bash/test_sandbox_sync.sh
deleted file mode 100755
index f8cc0645..00000000
--- a/e2e/bash/test_sandbox_sync.sh
+++ /dev/null
@@ -1,297 +0,0 @@
-#!/usr/bin/env bash
-
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Integration test for bidirectional file sync with a sandbox.
-#
-# Verifies the full flow:
-#   1. nemoclaw sandbox create --keep  (long-running sandbox for sync tests)
-#   2. nemoclaw sandbox sync <name> --up <local> <sandbox-dest>  (push)
-#   3. nemoclaw sandbox sync <name> --down <sandbox-path> <local-dest>  (pull)
-#   4. Single-file round-trip
-#
-# Prerequisites:
-#   - A running nemoclaw cluster (nemoclaw cluster admin deploy)
-#   - The `nemoclaw` binary on PATH (or set NAV_BIN)
-#
-# Usage:
-#   ./e2e/bash/test_sandbox_sync.sh
-
-set -euo pipefail
-
-###############################################################################
-# Configuration
-###############################################################################
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-
-if [[ -n "${NAV_BIN:-}" ]]; then
-  NAV="${NAV_BIN}"
-elif [[ -x "${PROJECT_ROOT}/target/debug/nemoclaw" ]]; then
-  NAV="${PROJECT_ROOT}/target/debug/nemoclaw"
-else
-  NAV="nemoclaw"
-fi
-
-SANDBOX_NAME=""
-CREATE_PID=""
-TMPDIR_ROOT=""
-
-###############################################################################
-# Helpers
-###############################################################################
-
-info()  { printf '==> %s\n' "$*" >&2; }
-error() { printf 'ERROR: %s\n' "$*" >&2; }
-
-strip_ansi() {
-  sed $'s/\x1b\\[[0-9;]*m//g'
-}
-
-# Kill a process and all of its children.
-kill_tree() {
-  local pid=$1
-  pkill -P "${pid}" 2>/dev/null || true
-  kill "${pid}" 2>/dev/null || true
-  wait "${pid}" 2>/dev/null || true
-}
-
-cleanup() {
-  local exit_code=$?
-
-  if [[ -n "${CREATE_PID}" ]]; then
-    info "Stopping sandbox create (pid ${CREATE_PID})"
-    kill_tree "${CREATE_PID}"
-  fi
-
-  if [[ -n "${SANDBOX_NAME}" ]]; then
-    info "Deleting sandbox ${SANDBOX_NAME}"
-    "${NAV}" sandbox delete "${SANDBOX_NAME}" 2>/dev/null || true
-  fi
-
-  if [[ -n "${TMPDIR_ROOT}" && -d "${TMPDIR_ROOT}" ]]; then
-    rm -rf "${TMPDIR_ROOT}"
-  fi
-
-  if [[ ${exit_code} -eq 0 ]]; then
-    info "PASS"
-  else
-    error "FAIL (exit ${exit_code})"
-  fi
-  exit "${exit_code}"
-}
-
-trap cleanup EXIT
-
-###############################################################################
-# Step 1 — Create a sandbox with --keep so it stays alive for sync tests.
-#
-# `sandbox create --keep -- sleep infinity` blocks forever, so we run it in
-# the background and poll the log for the sandbox name and ready marker.
-###############################################################################
-
-info "Creating sandbox with sleep infinity"
-
-CREATE_LOG=$(mktemp)
-
-"${NAV}" sandbox create --keep -- sleep infinity \
-  > "${CREATE_LOG}" 2>&1 &
-CREATE_PID=$!
-
-# Wait for the sandbox to become ready. The CLI prints the phase label
-# "Ready" once the sandbox reaches that state. We also check for "Name:"
-# in the header to know the sandbox was created.
-info "Waiting for sandbox to be ready"
-for i in $(seq 1 120); do
-  if [[ -f "${CREATE_LOG}" ]] && strip_ansi < "${CREATE_LOG}" | grep -q 'Name:'; then
-    # Name is printed in the header; now wait for Ready phase.
-    if strip_ansi < "${CREATE_LOG}" | grep -qw 'Ready'; then
-      break
-    fi
-  fi
-  if ! kill -0 "${CREATE_PID}" 2>/dev/null; then
-    error "Sandbox create exited prematurely"
-    cat "${CREATE_LOG}" >&2
-    exit 1
-  fi
-  sleep 1
-done
-
-if ! strip_ansi < "${CREATE_LOG}" | grep -qw 'Ready'; then
-  error "Sandbox did not become ready within 120s"
-  cat "${CREATE_LOG}" >&2
-  exit 1
-fi
-
-# Parse sandbox name from the create output.
-SANDBOX_NAME=$(
-  strip_ansi < "${CREATE_LOG}" | awk '/Name:/ { print $NF }'
-) || true
-
-if [[ -z "${SANDBOX_NAME}" ]]; then
-  error "Could not parse sandbox name from create output"
-  cat "${CREATE_LOG}" >&2
-  exit 1
-fi
-
-info "Sandbox created: ${SANDBOX_NAME}"
-
-###############################################################################
-# Step 2 — Sync up: push a local directory into the sandbox.
-###############################################################################
-
-info "Preparing local test files"
-
-TMPDIR_ROOT=$(mktemp -d)
-LOCAL_UP="${TMPDIR_ROOT}/upload"
-mkdir -p "${LOCAL_UP}/subdir"
-echo "hello-from-local" > "${LOCAL_UP}/greeting.txt"
-echo "nested-content"   > "${LOCAL_UP}/subdir/nested.txt"
-
-info "Syncing local directory up to sandbox"
-
-SYNC_UP_LOG=$(mktemp)
-if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --up "${LOCAL_UP}" /sandbox/uploaded \
-    > "${SYNC_UP_LOG}" 2>&1; then
-  error "sync --up failed"
-  cat "${SYNC_UP_LOG}" >&2
-  exit 1
-fi
-
-###############################################################################
-# Step 3 — Sync down: pull the uploaded files back and verify contents.
-###############################################################################
-
-info "Syncing files back down from sandbox"
-
-LOCAL_DOWN="${TMPDIR_ROOT}/download"
-mkdir -p "${LOCAL_DOWN}"
-
-SYNC_DOWN_LOG=$(mktemp)
-if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --down /sandbox/uploaded "${LOCAL_DOWN}" \
-    > "${SYNC_DOWN_LOG}" 2>&1; then
-  error "sync --down failed"
-  cat "${SYNC_DOWN_LOG}" >&2
-  exit 1
-fi
-
-info "Verifying downloaded files"
-
-# Check top-level file.
-if [[ ! -f "${LOCAL_DOWN}/greeting.txt" ]]; then
-  error "greeting.txt not found after sync --down"
-  ls -lR "${LOCAL_DOWN}" >&2
-  exit 1
-fi
-
-GREETING_CONTENT=$(cat "${LOCAL_DOWN}/greeting.txt")
-if [[ "${GREETING_CONTENT}" != "hello-from-local" ]]; then
-  error "greeting.txt content mismatch: got '${GREETING_CONTENT}'"
-  exit 1
-fi
-
-info "greeting.txt verified"
-
-# Check nested file.
-if [[ ! -f "${LOCAL_DOWN}/subdir/nested.txt" ]]; then
-  error "subdir/nested.txt not found after sync --down"
-  ls -lR "${LOCAL_DOWN}" >&2
-  exit 1
-fi
-
-NESTED_CONTENT=$(cat "${LOCAL_DOWN}/subdir/nested.txt")
-if [[ "${NESTED_CONTENT}" != "nested-content" ]]; then
-  error "subdir/nested.txt content mismatch: got '${NESTED_CONTENT}'"
-  exit 1
-fi
-
-info "subdir/nested.txt verified"
-
-###############################################################################
-# Step 4 — Large-file round-trip to exercise multi-chunk SSH transport.
-#
-# The tar archive is streamed through the SSH channel in 4096-byte chunks.
-# Historically, a fire-and-forget tokio::spawn per chunk caused out-of-order
-# delivery that corrupted the tar stream.  A ~512 KiB file spans many chunks
-# and makes such ordering bugs much more likely to surface.
-###############################################################################
-
-info "Generating large test file (~512 KiB)"
-
-LARGE_DIR="${TMPDIR_ROOT}/large_upload"
-mkdir -p "${LARGE_DIR}"
-
-# Deterministic pseudo-random content so we can verify with a checksum.
-dd if=/dev/urandom bs=1024 count=512 2>/dev/null > "${LARGE_DIR}/large.bin"
-EXPECTED_HASH=$(shasum -a 256 "${LARGE_DIR}/large.bin" | awk '{print $1}')
-
-info "Syncing large file up to sandbox"
-if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --up "${LARGE_DIR}" /sandbox/large_test \
-    > /dev/null 2>&1; then
-  error "sync --up large file failed"
-  exit 1
-fi
-
-info "Syncing large file back down"
-LARGE_DOWN="${TMPDIR_ROOT}/large_download"
-mkdir -p "${LARGE_DOWN}"
-
-if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --down /sandbox/large_test "${LARGE_DOWN}" \
-    > /dev/null 2>&1; then
-  error "sync --down large file failed"
-  exit 1
-fi
-
-ACTUAL_HASH=$(shasum -a 256 "${LARGE_DOWN}/large.bin" | awk '{print $1}')
-if [[ "${EXPECTED_HASH}" != "${ACTUAL_HASH}" ]]; then
-  error "large.bin checksum mismatch after round-trip"
-  error "  expected: ${EXPECTED_HASH}"
-  error "  actual:   ${ACTUAL_HASH}"
-  exit 1
-fi
-
-ACTUAL_SIZE=$(wc -c < "${LARGE_DOWN}/large.bin" | tr -d ' ')
-if [[ "${ACTUAL_SIZE}" -ne 524288 ]]; then
-  error "large.bin size mismatch: expected 524288, got ${ACTUAL_SIZE}"
-  exit 1
-fi
-
-info "Large file round-trip verified (SHA-256 match, ${ACTUAL_SIZE} bytes)"
-
-###############################################################################
-# Step 5 — Sync up a single file and round-trip it.
-###############################################################################
-
-info "Testing single-file sync"
-
-SINGLE_FILE="${TMPDIR_ROOT}/single.txt"
-echo "single-file-payload" > "${SINGLE_FILE}"
-
-if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --up "${SINGLE_FILE}" /sandbox \
-    > /dev/null 2>&1; then
-  error "sync --up single file failed"
-  exit 1
-fi
-
-LOCAL_SINGLE_DOWN="${TMPDIR_ROOT}/single_down"
-mkdir -p "${LOCAL_SINGLE_DOWN}"
-
-if ! "${NAV}" sandbox sync "${SANDBOX_NAME}" --down /sandbox/single.txt "${LOCAL_SINGLE_DOWN}" \
-    > /dev/null 2>&1; then
-  error "sync --down single file failed"
-  exit 1
-fi
-
-SINGLE_CONTENT=$(cat "${LOCAL_SINGLE_DOWN}/single.txt")
-if [[ "${SINGLE_CONTENT}" != "single-file-payload" ]]; then
-  error "single.txt content mismatch: got '${SINGLE_CONTENT}'"
-  exit 1
-fi
-
-info "Single-file round-trip verified"
-
-###############################################################################
-# Cleanup is handled by the EXIT trap.
-###############################################################################
diff --git a/e2e/python/conftest.py b/e2e/python/conftest.py
index b4672788..cffb1553 100644
--- a/e2e/python/conftest.py
+++ b/e2e/python/conftest.py
@@ -84,92 +84,6 @@ def _worker_suffix(worker_id: str) -> str:
     return f"-{worker_id}"
 
 
-@pytest.fixture(scope="session")
-def mock_inference_route(
-    inference_client: InferenceRouteClient,
-    _worker_suffix: str,
-) -> Iterator[str]:
-    name = f"e2e-mock-local{_worker_suffix}"
-    routing_hint = f"e2e_mock_local{_worker_suffix}"
-    # Clean up any leftover route from a previous run.
-    try:
-        inference_client.delete(name)
-    except grpc.RpcError:
-        pass
-
-    inference_client.create(
-        name=name,
-        routing_hint=routing_hint,
-        base_url="mock://e2e",
-        protocols=["openai_chat_completions"],
-        api_key="mock",
-        model_id="mock/test-model",
-        enabled=True,
-    )
-    yield routing_hint
-    try:
-        inference_client.delete(name)
-    except grpc.RpcError:
-        pass
-
-
-@pytest.fixture(scope="session")
-def mock_anthropic_route(
-    inference_client: InferenceRouteClient,
-    _worker_suffix: str,
-) -> Iterator[str]:
-    name = f"e2e-mock-anthropic{_worker_suffix}"
-    routing_hint = f"e2e_mock_anthropic{_worker_suffix}"
-    try:
-        inference_client.delete(name)
-    except grpc.RpcError:
-        pass
-
-    inference_client.create(
-        name=name,
-        routing_hint=routing_hint,
-        base_url="mock://e2e-anthropic",
-        protocols=["anthropic_messages"],
-        api_key="mock",
-        model_id="mock/claude-test",
-        enabled=True,
-    )
-    yield routing_hint
-    try:
-        inference_client.delete(name)
-    except grpc.RpcError:
-        pass
-
-
-@pytest.fixture(scope="session")
-def mock_disallowed_route(
-    inference_client: InferenceRouteClient,
-    _worker_suffix: str,
-) -> Iterator[str]:
-    """Route that exists but is NOT in any sandbox's allowed_routes."""
-    name = f"e2e-mock-disallowed{_worker_suffix}"
-    routing_hint = f"e2e_mock_disallowed{_worker_suffix}"
-    try:
-        inference_client.delete(name)
-    except grpc.RpcError:
-        pass
-
-    inference_client.create(
-        name=name,
-        routing_hint=routing_hint,
-        base_url="mock://e2e-disallowed",
-        protocols=["openai_chat_completions"],
-        api_key="mock",
-        model_id="mock/disallowed-model",
-        enabled=True,
-    )
-    yield routing_hint
-    try:
-        inference_client.delete(name)
-    except grpc.RpcError:
-        pass
-
-
 @pytest.fixture
 def run_python() -> Callable[[Sandbox, str], tuple[int, str, str]]:
     def _run(sandbox: Sandbox, code: str) -> tuple[int, str, str]:
diff --git a/e2e/python/test_inference_routing.py b/e2e/python/test_inference_routing.py
index 932f100c..01f932bf 100644
--- a/e2e/python/test_inference_routing.py
+++ b/e2e/python/test_inference_routing.py
@@ -1,34 +1,34 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-"""E2E tests for inference interception and routing.
+"""E2E tests for explicit inference routing via `inference.local`.
 
-When a process inside the sandbox makes an inference API call (e.g. POST
-/v1/chat/completions) to an endpoint not explicitly allowed by network policy,
-the proxy intercepts it, TLS-terminates the connection, detects the inference
-API pattern, and the sandbox routes the request locally to the configured
-backend (configured with `mock://` for testing).
+In the new model, sandbox traffic is routed only when the request targets
+`inference.local`. There is no implicit catch-all interception for arbitrary
+hosts like `api.openai.com`.
 """
 
 from __future__ import annotations
 
-import time
+import fcntl
+from contextlib import contextmanager
+from typing import TYPE_CHECKING
 
 import grpc
+import pytest
 
-from typing import TYPE_CHECKING
-
-from navigator._proto import datamodel_pb2, sandbox_pb2
+from navigator._proto import datamodel_pb2, navigator_pb2, sandbox_pb2
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
-
-    from navigator import InferenceRouteClient, Sandbox
+    from collections.abc import Callable, Iterator
 
+    from navigator import (
+        ClusterInferenceConfig,
+        InferenceRouteClient,
+        Sandbox,
+        SandboxClient,
+    )
 
-# =============================================================================
-# Policy helpers
-# =============================================================================
 
 _BASE_FILESYSTEM = sandbox_pb2.FilesystemPolicy(
     include_workdir=True,
@@ -38,148 +38,168 @@
 _BASE_LANDLOCK = sandbox_pb2.LandlockPolicy(compatibility="best_effort")
 _BASE_PROCESS = sandbox_pb2.ProcessPolicy(run_as_user="sandbox", run_as_group="sandbox")
 
+pytestmark = pytest.mark.xdist_group("inference-routing")
+
+_MANAGED_OPENAI_MODEL_ID = "mock/e2e-openai-model"
+_MANAGED_OPENAI_PROVIDER_NAME = "e2e-managed-openai"
+_INFERENCE_CONFIG_LOCK = "/tmp/nemoclaw-e2e-inference-config.lock"
 
-def _inference_routing_policy(
-    allowed_route: str = "e2e_mock_local",
-) -> sandbox_pb2.SandboxPolicy:
-    """Policy with inference routing enabled.
 
-    No network_policies needed — any connection from any binary to an endpoint
-    not in an explicit policy will be intercepted for inference when
-    allowed_routes is non-empty.
-    """
+def _baseline_policy() -> sandbox_pb2.SandboxPolicy:
     return sandbox_pb2.SandboxPolicy(
         version=1,
-        inference=sandbox_pb2.InferencePolicy(allowed_routes=[allowed_route]),
         filesystem=_BASE_FILESYSTEM,
         landlock=_BASE_LANDLOCK,
         process=_BASE_PROCESS,
     )
 
 
-# =============================================================================
-# Tests
-# =============================================================================
+def _upsert_managed_inference(
+    inference_client: InferenceRouteClient,
+    sandbox_client: SandboxClient,
+    *,
+    provider_name: str,
+    provider_type: str,
+    credential_key: str,
+    base_url_key: str,
+    model_id: str,
+    base_url: str,
+) -> None:
+    provider = datamodel_pb2.Provider(
+        name=provider_name,
+        type=provider_type,
+        credentials={credential_key: "mock"},
+        config={
+            base_url_key: base_url,
+        },
+    )
+    timeout = sandbox_client._timeout
 
+    for _ in range(5):
+        try:
+            sandbox_client._stub.UpdateProvider(
+                navigator_pb2.UpdateProviderRequest(provider=provider),
+                timeout=timeout,
+            )
+            break
+        except grpc.RpcError as exc:
+            if exc.code() != grpc.StatusCode.NOT_FOUND:
+                raise
+
+            try:
+                sandbox_client._stub.CreateProvider(
+                    navigator_pb2.CreateProviderRequest(provider=provider),
+                    timeout=timeout,
+                )
+                break
+            except grpc.RpcError as create_exc:
+                if create_exc.code() == grpc.StatusCode.ALREADY_EXISTS:
+                    continue
+                if (
+                    create_exc.code() == grpc.StatusCode.INTERNAL
+                    and "UNIQUE constraint failed" in (create_exc.details() or "")
+                ):
+                    continue
+                raise
+    else:
+        raise RuntimeError("failed to upsert managed e2e provider after retries")
+
+    inference_client.set_cluster(
+        provider_name=provider_name,
+        model_id=model_id,
+    )
 
-def test_route_refresh_picks_up_route_created_after_sandbox_start(
-    sandbox: Callable[..., Sandbox],
+
+def _current_cluster_inference(
     inference_client: InferenceRouteClient,
-) -> None:
-    """Route refresh picks up a route created after sandbox startup.
-
-    Regression scenario:
-    1. Sandbox starts with inference allowed_routes configured but no matching route exists yet.
-    2. Initial inference request should be intercepted and return 503 (empty route cache).
-    3. Create the route after sandbox startup.
-    4. Background refresh should load the new route and subsequent requests should succeed.
-    """
-    route_name = "e2e-mock-refresh-late"
-    route_hint = "e2e_mock_refresh_late"
-    spec = datamodel_pb2.SandboxSpec(policy=_inference_routing_policy(route_hint))
+) -> ClusterInferenceConfig | None:
+    try:
+        return inference_client.get_cluster()
+    except grpc.RpcError as exc:
+        if exc.code() == grpc.StatusCode.NOT_FOUND:
+            return None
+        raise
 
-    def call_chat_completions() -> str:
-        import json
-        import ssl
-        import urllib.error
-        import urllib.request
 
-        body = json.dumps(
-            {
-                "model": "test-model",
-                "messages": [{"role": "user", "content": "hello"}],
-            }
-        ).encode()
+def _restore_cluster_inference(
+    inference_client: InferenceRouteClient,
+    previous: ClusterInferenceConfig | None,
+) -> None:
+    if previous is None:
+        return
 
-        req = urllib.request.Request(
-            "https://api.openai.com/v1/chat/completions",
-            data=body,
-            headers={
-                "Content-Type": "application/json",
-                "Authorization": "Bearer dummy-key",
-            },
-            method="POST",
-        )
+    inference_client.set_cluster(
+        provider_name=previous.provider_name,
+        model_id=previous.model_id,
+    )
 
-        ctx = ssl.create_default_context()
-        ctx.check_hostname = False
-        ctx.verify_mode = ssl.CERT_NONE
 
+@contextmanager
+def _cluster_config_lock() -> Iterator[None]:
+    with open(_INFERENCE_CONFIG_LOCK, "a+", encoding="utf-8") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
         try:
-            resp = urllib.request.urlopen(req, timeout=30, context=ctx)
-            return resp.read().decode()
-        except urllib.error.HTTPError as e:
-            body = e.read().decode("utf-8", errors="replace")
-            return f"http_error_{e.code}:{body}"
-        except Exception as e:
-            return f"error:{type(e).__name__}:{e}"
+            yield
+        finally:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
 
-    try:
-        inference_client.delete(route_name)
-    except grpc.RpcError:
-        pass
 
-    try:
-        with sandbox(spec=spec, delete_on_exit=True) as sb:
-            initial = sb.exec_python(call_chat_completions, timeout_seconds=60)
-            assert initial.exit_code == 0, f"stderr: {initial.stderr}"
-            initial_output = initial.stdout.strip()
-            assert initial_output.startswith("http_error_503"), initial_output
-            assert (
-                "inference endpoint detected without matching inference route"
-                in initial_output
-            ), initial_output
-
-            inference_client.create(
-                name=route_name,
-                routing_hint=route_hint,
-                base_url="mock://e2e-refresh-late",
-                protocols=["openai_chat_completions"],
-                api_key="mock",
-                model_id="mock/late-route-model",
-                enabled=True,
-            )
+@pytest.fixture
+def managed_openai_route(
+    inference_client: InferenceRouteClient,
+    sandbox_client: SandboxClient,
+) -> Iterator[str]:
+    with _cluster_config_lock():
+        previous = _current_cluster_inference(inference_client)
+        _upsert_managed_inference(
+            inference_client,
+            sandbox_client,
+            provider_name=_MANAGED_OPENAI_PROVIDER_NAME,
+            provider_type="openai",
+            credential_key="OPENAI_API_KEY",
+            base_url_key="OPENAI_BASE_URL",
+            model_id=_MANAGED_OPENAI_MODEL_ID,
+            base_url="mock://e2e-managed-openai",
+        )
+        try:
+            yield _MANAGED_OPENAI_MODEL_ID
+        finally:
+            _restore_cluster_inference(inference_client, previous)
 
-            deadline = time.time() + 95
-            last_output = initial_output
 
-            while time.time() < deadline:
-                result = sb.exec_python(call_chat_completions, timeout_seconds=60)
-                assert result.exit_code == 0, f"stderr: {result.stderr}"
-                last_output = result.stdout.strip()
+def test_model_discovery_call_routed_to_backend(
+    sandbox: Callable[..., Sandbox],
+    managed_openai_route: str,
+) -> None:
+    """Model discovery endpoint is treated as an inference protocol."""
+    spec = datamodel_pb2.SandboxSpec(policy=_baseline_policy())
 
-                if "Hello from nemoclaw mock backend" in last_output:
-                    break
+    def call_models() -> str:
+        import ssl
+        import urllib.request
 
-                time.sleep(5)
+        ctx = ssl.create_default_context()
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
 
-            assert "Hello from nemoclaw mock backend" in last_output, last_output
-            assert "mock/late-route-model" in last_output, last_output
-    finally:
-        try:
-            inference_client.delete(route_name)
-        except grpc.RpcError:
-            pass
+        req = urllib.request.Request("https://inference.local/v1/models", method="GET")
+        resp = urllib.request.urlopen(req, timeout=30, context=ctx)
+        return resp.read().decode()
+
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(call_models, timeout_seconds=60)
+        assert result.exit_code == 0, f"stderr: {result.stderr}"
+        output = result.stdout.strip()
+        assert "Hello from nemoclaw mock backend" in output
+        assert managed_openai_route in output
 
 
 def test_inference_call_routed_to_backend(
     sandbox: Callable[..., Sandbox],
-    mock_inference_route: str,
+    managed_openai_route: str,
 ) -> None:
-    """Inference call to undeclared endpoint is intercepted and routed.
-
-    A Python process inside the sandbox calls the OpenAI chat completions
-    endpoint via raw urllib. Since api.openai.com is not in any network
-    policy, but inference routing is configured, the proxy should:
-    1. Detect no explicit policy match (inspect_for_inference)
-    2. TLS-terminate the connection
-    3. Detect the inference API pattern (POST /v1/chat/completions)
-    4. Forward locally via sandbox router to the policy-allowed backend
-    5. Return the mock response from the configured route
-    """
-    spec = datamodel_pb2.SandboxSpec(
-        policy=_inference_routing_policy(mock_inference_route)
-    )
+    """OpenAI chat request to `inference.local` is intercepted and routed."""
+    spec = datamodel_pb2.SandboxSpec(policy=_baseline_policy())
 
     def call_chat_completions() -> str:
         import json
@@ -194,7 +214,7 @@ def call_chat_completions() -> str:
         ).encode()
 
         req = urllib.request.Request(
-            "https://api.openai.com/v1/chat/completions",
+            "https://inference.local/v1/chat/completions",
             data=body,
             headers={
                 "Content-Type": "application/json",
@@ -215,22 +235,16 @@ def call_chat_completions() -> str:
         assert result.exit_code == 0, f"stderr: {result.stderr}"
         output = result.stdout.strip()
         assert "Hello from nemoclaw mock backend" in output
-        assert "mock/test-model" in output
+        assert managed_openai_route in output
 
 
 def test_non_inference_request_denied(
     sandbox: Callable[..., Sandbox],
-    mock_inference_route: str,
+    managed_openai_route: str,
 ) -> None:
-    """Non-inference HTTP request on an intercepted connection is denied.
-
-    A process making a non-inference request (e.g. GET /v1/models) to an
-    undeclared endpoint should be denied with 403 when inference routing
-    is configured — only recognized inference API patterns are routed.
-    """
-    spec = datamodel_pb2.SandboxSpec(
-        policy=_inference_routing_policy(mock_inference_route)
-    )
+    """Non-inference path on `inference.local` is denied with 403."""
+    _ = managed_openai_route
+    spec = datamodel_pb2.SandboxSpec(policy=_baseline_policy())
 
     def make_non_inference_request() -> str:
         import ssl
@@ -242,7 +256,7 @@ def make_non_inference_request() -> str:
         ctx.verify_mode = ssl.CERT_NONE
 
         try:
-            req = urllib.request.Request("https://api.openai.com/v1/models")
+            req = urllib.request.Request("https://inference.local/v1/not-inference")
             urllib.request.urlopen(req, timeout=10, context=ctx)
             return "unexpected_success"
         except urllib.error.HTTPError as e:
@@ -253,33 +267,21 @@ def make_non_inference_request() -> str:
     with sandbox(spec=spec, delete_on_exit=True) as sb:
         result = sb.exec_python(make_non_inference_request, timeout_seconds=30)
         assert result.exit_code == 0, f"stderr: {result.stderr}"
-        assert "403" in result.stdout.strip()
+        assert result.stdout.strip() == "http_error_403"
 
 
-def test_inference_anthropic_messages_protocol(
+def test_unsupported_protocol_returns_400(
     sandbox: Callable[..., Sandbox],
-    mock_anthropic_route: str,
+    managed_openai_route: str,
 ) -> None:
-    """Anthropic messages protocol (POST /v1/messages) is intercepted and routed.
-
-    Verifies multi-protocol routing: a request using the Anthropic messages
-    format is correctly detected and forwarded to a route configured with
-    the anthropic_messages protocol.
-    """
-    policy = sandbox_pb2.SandboxPolicy(
-        version=1,
-        inference=sandbox_pb2.InferencePolicy(
-            allowed_routes=[mock_anthropic_route],
-        ),
-        filesystem=_BASE_FILESYSTEM,
-        landlock=_BASE_LANDLOCK,
-        process=_BASE_PROCESS,
-    )
-    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    """Protocol mismatch returns 400 when no compatible route exists."""
+    _ = managed_openai_route
+    spec = datamodel_pb2.SandboxSpec(policy=_baseline_policy())
 
     def call_anthropic_messages() -> str:
         import json
         import ssl
+        import urllib.error
         import urllib.request
 
         body = json.dumps(
@@ -291,7 +293,7 @@ def call_anthropic_messages() -> str:
         ).encode()
 
         req = urllib.request.Request(
-            "https://api.anthropic.com/v1/messages",
+            "https://inference.local/v1/messages",
             data=body,
             headers={
                 "Content-Type": "application/json",
@@ -304,37 +306,34 @@ def call_anthropic_messages() -> str:
         ctx.check_hostname = False
         ctx.verify_mode = ssl.CERT_NONE
 
-        resp = urllib.request.urlopen(req, timeout=30, context=ctx)
-        return resp.read().decode()
+        try:
+            resp = urllib.request.urlopen(req, timeout=30, context=ctx)
+            return resp.read().decode()
+        except urllib.error.HTTPError as exc:
+            return (
+                f"http_error_{exc.code}:{exc.read().decode('utf-8', errors='replace')}"
+            )
 
     with sandbox(spec=spec, delete_on_exit=True) as sb:
         result = sb.exec_python(call_anthropic_messages, timeout_seconds=60)
         assert result.exit_code == 0, f"stderr: {result.stderr}"
         output = result.stdout.strip()
-        assert "Hello from nemoclaw mock backend" in output
-        assert "mock/claude-test" in output
+        assert output.startswith("http_error_400"), output
+        assert "no compatible route" in output
 
 
-def test_inference_route_filtering_by_allowed_routes(
+def test_non_inference_host_is_not_intercepted(
     sandbox: Callable[..., Sandbox],
-    mock_inference_route: str,
-    mock_disallowed_route: str,
+    managed_openai_route: str,
 ) -> None:
-    """Only routes in allowed_routes are available; others produce errors.
-
-    Two routes exist (e2e_mock_local and e2e_mock_disallowed), but the
-    policy only allows e2e_mock_local. A request that would match the
-    allowed route should succeed, while inference requests that can't
-    match any allowed route get an error from the sandbox router.
-    """
-    # Policy only allows the mock_inference_route, NOT mock_disallowed_route
-    spec = datamodel_pb2.SandboxSpec(
-        policy=_inference_routing_policy(mock_inference_route)
-    )
+    """Requests to non-`inference.local` hosts do not get inference routing."""
+    _ = managed_openai_route
+    spec = datamodel_pb2.SandboxSpec(policy=_baseline_policy())
 
-    def call_allowed_route() -> str:
+    def call_external_openai_endpoint() -> str:
         import json
         import ssl
+        import urllib.error
         import urllib.request
 
         body = json.dumps(
@@ -357,13 +356,16 @@ def call_allowed_route() -> str:
         ctx.check_hostname = False
         ctx.verify_mode = ssl.CERT_NONE
 
-        resp = urllib.request.urlopen(req, timeout=30, context=ctx)
-        return resp.read().decode()
+        try:
+            resp = urllib.request.urlopen(req, timeout=30, context=ctx)
+            return resp.read().decode()
+        except urllib.error.URLError as exc:
+            return f"url_error:{exc}"
+        except Exception as exc:
+            return f"error:{type(exc).__name__}:{exc}"
 
     with sandbox(spec=spec, delete_on_exit=True) as sb:
-        result = sb.exec_python(call_allowed_route, timeout_seconds=60)
+        result = sb.exec_python(call_external_openai_endpoint, timeout_seconds=60)
         assert result.exit_code == 0, f"stderr: {result.stderr}"
         output = result.stdout.strip()
-        # The allowed route (e2e_mock_local) should serve the request
-        assert "Hello from nemoclaw mock backend" in output
-        assert "mock/test-model" in output
+        assert "Tunnel connection failed: 403 Forbidden" in output
diff --git a/e2e/python/test_policy_validation.py b/e2e/python/test_policy_validation.py
new file mode 100644
index 00000000..f4cfa149
--- /dev/null
+++ b/e2e/python/test_policy_validation.py
@@ -0,0 +1,164 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""E2E tests for server-side policy safety validation.
+
+These tests verify that the gRPC server rejects sandbox creation and policy
+updates that contain unsafe content (root process identity, path traversal,
+overly broad filesystem paths).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import grpc
+import pytest
+
+from navigator._proto import datamodel_pb2, navigator_pb2, sandbox_pb2
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from navigator import Sandbox, SandboxClient
+
+
+# =============================================================================
+# Policy helpers
+# =============================================================================
+
+_SAFE_FILESYSTEM = sandbox_pb2.FilesystemPolicy(
+    include_workdir=True,
+    read_only=["/usr", "/lib", "/etc", "/app", "/var/log"],
+    read_write=["/sandbox", "/tmp"],
+)
+_SAFE_LANDLOCK = sandbox_pb2.LandlockPolicy(compatibility="best_effort")
+_SAFE_PROCESS = sandbox_pb2.ProcessPolicy(run_as_user="sandbox", run_as_group="sandbox")
+
+
+def _safe_policy() -> sandbox_pb2.SandboxPolicy:
+    """Build a safe baseline policy for testing."""
+    return sandbox_pb2.SandboxPolicy(
+        version=1,
+        filesystem=_SAFE_FILESYSTEM,
+        landlock=_SAFE_LANDLOCK,
+        process=_SAFE_PROCESS,
+    )
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
+def test_create_sandbox_rejects_root_user(
+    sandbox_client: SandboxClient,
+) -> None:
+    """Server rejects CreateSandbox with run_as_user='root'."""
+    policy = sandbox_pb2.SandboxPolicy(
+        version=1,
+        filesystem=_SAFE_FILESYSTEM,
+        landlock=_SAFE_LANDLOCK,
+        process=sandbox_pb2.ProcessPolicy(
+            run_as_user="root",
+            run_as_group="sandbox",
+        ),
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+
+    stub = sandbox_client._stub
+    with pytest.raises(grpc.RpcError) as exc_info:
+        stub.CreateSandbox(navigator_pb2.CreateSandboxRequest(name="", spec=spec))
+
+    assert exc_info.value.code() == grpc.StatusCode.INVALID_ARGUMENT
+    assert "root" in exc_info.value.details().lower()
+
+
+def test_create_sandbox_rejects_path_traversal(
+    sandbox_client: SandboxClient,
+) -> None:
+    """Server rejects CreateSandbox with '..' in filesystem paths."""
+    policy = sandbox_pb2.SandboxPolicy(
+        version=1,
+        filesystem=sandbox_pb2.FilesystemPolicy(
+            include_workdir=True,
+            read_only=["/usr/../etc/shadow"],
+            read_write=["/tmp"],
+        ),
+        landlock=_SAFE_LANDLOCK,
+        process=_SAFE_PROCESS,
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+
+    stub = sandbox_client._stub
+    with pytest.raises(grpc.RpcError) as exc_info:
+        stub.CreateSandbox(navigator_pb2.CreateSandboxRequest(name="", spec=spec))
+
+    assert exc_info.value.code() == grpc.StatusCode.INVALID_ARGUMENT
+    assert "traversal" in exc_info.value.details().lower()
+
+
+def test_create_sandbox_rejects_overly_broad_paths(
+    sandbox_client: SandboxClient,
+) -> None:
+    """Server rejects CreateSandbox with read_write=['/']."""
+    policy = sandbox_pb2.SandboxPolicy(
+        version=1,
+        filesystem=sandbox_pb2.FilesystemPolicy(
+            include_workdir=True,
+            read_only=["/usr"],
+            read_write=["/"],
+        ),
+        landlock=_SAFE_LANDLOCK,
+        process=_SAFE_PROCESS,
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+
+    stub = sandbox_client._stub
+    with pytest.raises(grpc.RpcError) as exc_info:
+        stub.CreateSandbox(navigator_pb2.CreateSandboxRequest(name="", spec=spec))
+
+    assert exc_info.value.code() == grpc.StatusCode.INVALID_ARGUMENT
+    assert "broad" in exc_info.value.details().lower()
+
+
+def test_update_policy_rejects_immutable_fields(
+    sandbox: Callable[..., Sandbox],
+    sandbox_client: SandboxClient,
+) -> None:
+    """UpdateSandboxPolicy rejects removal of filesystem paths on a live sandbox.
+
+    Filesystem paths are enforced by Landlock at sandbox startup and cannot be
+    removed after the fact. This test verifies that the server rejects updates
+    that remove existing read_only paths, which also prevents unsafe content
+    from being introduced via policy updates.
+    """
+    safe_policy = _safe_policy()
+    spec = datamodel_pb2.SandboxSpec(policy=safe_policy)
+
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        sandbox_name = sb.sandbox.name
+        stub = sandbox_client._stub
+
+        # Try to update with a modified filesystem policy (immutable field)
+        unsafe_policy = sandbox_pb2.SandboxPolicy(
+            version=1,
+            filesystem=sandbox_pb2.FilesystemPolicy(
+                include_workdir=True,
+                read_only=["/usr/../etc/shadow"],
+                read_write=["/tmp"],
+            ),
+            landlock=_SAFE_LANDLOCK,
+            process=_SAFE_PROCESS,
+        )
+
+        with pytest.raises(grpc.RpcError) as exc_info:
+            stub.UpdateSandboxPolicy(
+                navigator_pb2.UpdateSandboxPolicyRequest(
+                    name=sandbox_name,
+                    policy=unsafe_policy,
+                )
+            )
+
+        assert exc_info.value.code() == grpc.StatusCode.INVALID_ARGUMENT
+        assert "cannot be removed" in exc_info.value.details().lower()
diff --git a/e2e/python/test_sandbox_policy.py b/e2e/python/test_sandbox_policy.py
index 11835f21..31344ba0 100644
--- a/e2e/python/test_sandbox_policy.py
+++ b/e2e/python/test_sandbox_policy.py
@@ -178,6 +178,84 @@ def fn():
     return fn
 
 
+def _forward_proxy_with_server():
+    """Return a closure that starts an HTTP server and sends a forward proxy request.
+
+    The closure starts a minimal HTTP server on the given port inside the sandbox,
+    then sends a plain HTTP forward proxy request (non-CONNECT) through the sandbox
+    proxy and returns the raw response.
+    """
+
+    def fn(proxy_host, proxy_port, target_host, target_port):
+        import socket
+        import threading
+        import time
+        from http.server import BaseHTTPRequestHandler, HTTPServer
+
+        class Handler(BaseHTTPRequestHandler):
+            def do_GET(self):
+                self.send_response(200)
+                body = b"forward-proxy-ok"
+                self.send_header("Content-Length", str(len(body)))
+                self.end_headers()
+                self.wfile.write(body)
+
+            def log_message(self, *args):
+                pass  # suppress log output
+
+        srv = HTTPServer(("0.0.0.0", int(target_port)), Handler)
+        threading.Thread(target=srv.handle_request, daemon=True).start()
+        time.sleep(0.5)
+
+        conn = socket.create_connection((proxy_host, int(proxy_port)), timeout=10)
+        try:
+            req = (
+                f"GET http://{target_host}:{target_port}/test HTTP/1.1\r\n"
+                f"Host: {target_host}:{target_port}\r\n\r\n"
+            )
+            conn.sendall(req.encode())
+            data = b""
+            conn.settimeout(5)
+            try:
+                while True:
+                    chunk = conn.recv(4096)
+                    if not chunk:
+                        break
+                    data += chunk
+            except socket.timeout:
+                pass
+            return data.decode("latin1")
+        finally:
+            conn.close()
+            srv.server_close()
+
+    return fn
+
+
+def _forward_proxy_raw():
+    """Return a closure that sends a forward proxy request (no server needed).
+
+    For testing deny cases — sends the request and returns whatever the proxy
+    responds with.
+    """
+
+    def fn(proxy_host, proxy_port, target_url):
+        import socket
+        from urllib.parse import urlparse
+
+        conn = socket.create_connection((proxy_host, int(proxy_port)), timeout=10)
+        try:
+            parsed = urlparse(target_url)
+            host_header = parsed.netloc or parsed.hostname
+            req = f"GET {target_url} HTTP/1.1\r\nHost: {host_header}\r\n\r\n"
+            conn.sendall(req.encode())
+            return conn.recv(4096).decode("latin1")
+        finally:
+            conn.close()
+
+    return fn
+
+
 def test_policy_applies_to_exec_commands(
     sandbox: Callable[..., Sandbox],
 ) -> None:
@@ -1174,3 +1252,378 @@ def test_live_policy_update_and_logs(
             connect_logs = [l for l in sandbox_logs if "CONNECT" in l.message]
             if connect_logs:
                 assert has_fields, "CONNECT logs should have structured fields"
+
+
+# =============================================================================
+# Forward proxy tests (plain HTTP, non-CONNECT)
+# =============================================================================
+
+# The sandbox's own IP within the network namespace
+_SANDBOX_IP = "10.200.0.2"
+_FORWARD_PROXY_PORT = 19876
+
+
+def test_forward_proxy_allows_private_ip_with_allowed_ips(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """FWD-1: Forward proxy GET to private IP with allowed_ips succeeds.
+
+    Starts an HTTP server inside the sandbox, sends a plain forward proxy
+    request through the sandbox proxy, and verifies the response is relayed.
+    """
+    policy = _base_policy(
+        network_policies={
+            "internal_http": sandbox_pb2.NetworkPolicyRule(
+                name="internal_http",
+                endpoints=[
+                    sandbox_pb2.NetworkEndpoint(
+                        host=_SANDBOX_IP,
+                        port=_FORWARD_PROXY_PORT,
+                        allowed_ips=["10.200.0.0/24"],
+                    ),
+                ],
+                binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+            ),
+        },
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(
+            _forward_proxy_with_server(),
+            args=(_PROXY_HOST, _PROXY_PORT, _SANDBOX_IP, _FORWARD_PROXY_PORT),
+        )
+        assert result.exit_code == 0, result.stderr
+        assert "200" in result.stdout, (
+            f"Expected 200 in forward proxy response, got: {result.stdout}"
+        )
+        assert "forward-proxy-ok" in result.stdout, (
+            f"Expected response body relayed, got: {result.stdout}"
+        )
+
+
+def test_forward_proxy_denied_without_allowed_ips(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """FWD-2: Forward proxy to private IP without allowed_ips -> 403.
+
+    Even though the endpoint matches, forward proxy requires explicit
+    allowed_ips on the endpoint.
+    """
+    policy = _base_policy(
+        network_policies={
+            "internal_http": sandbox_pb2.NetworkPolicyRule(
+                name="internal_http",
+                endpoints=[
+                    # No allowed_ips — forward proxy should be denied
+                    sandbox_pb2.NetworkEndpoint(
+                        host=_SANDBOX_IP,
+                        port=_FORWARD_PROXY_PORT,
+                    ),
+                ],
+                binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+            ),
+        },
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(
+            _forward_proxy_raw(),
+            args=(
+                _PROXY_HOST,
+                _PROXY_PORT,
+                f"http://{_SANDBOX_IP}:{_FORWARD_PROXY_PORT}/test",
+            ),
+        )
+        assert result.exit_code == 0, result.stderr
+        assert "403" in result.stdout, (
+            f"Expected 403 without allowed_ips, got: {result.stdout}"
+        )
+
+
+def test_forward_proxy_rejects_https_scheme(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """FWD-3: Forward proxy with https:// scheme -> 400.
+
+    HTTPS must use CONNECT tunneling, not forward proxy.
+    """
+    policy = _base_policy(
+        network_policies={
+            "internal_http": sandbox_pb2.NetworkPolicyRule(
+                name="internal_http",
+                endpoints=[
+                    sandbox_pb2.NetworkEndpoint(
+                        host=_SANDBOX_IP,
+                        port=_FORWARD_PROXY_PORT,
+                        allowed_ips=["10.200.0.0/24"],
+                    ),
+                ],
+                binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+            ),
+        },
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(
+            _forward_proxy_raw(),
+            args=(
+                _PROXY_HOST,
+                _PROXY_PORT,
+                f"https://{_SANDBOX_IP}:{_FORWARD_PROXY_PORT}/test",
+            ),
+        )
+        assert result.exit_code == 0, result.stderr
+        assert "400" in result.stdout, (
+            f"Expected 400 for HTTPS forward proxy, got: {result.stdout}"
+        )
+
+
+def test_forward_proxy_denied_no_policy_match(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """FWD-4: Forward proxy to unmatched host:port -> 403."""
+    policy = _base_policy(
+        network_policies={
+            "other": sandbox_pb2.NetworkPolicyRule(
+                name="other",
+                endpoints=[
+                    # Policy for a different host/port
+                    sandbox_pb2.NetworkEndpoint(
+                        host="10.200.0.1",
+                        port=9999,
+                        allowed_ips=["10.200.0.0/24"],
+                    ),
+                ],
+                binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+            ),
+        },
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(
+            _forward_proxy_raw(),
+            args=(
+                _PROXY_HOST,
+                _PROXY_PORT,
+                f"http://{_SANDBOX_IP}:{_FORWARD_PROXY_PORT}/test",
+            ),
+        )
+        assert result.exit_code == 0, result.stderr
+        assert "403" in result.stdout, (
+            f"Expected 403 for unmatched policy, got: {result.stdout}"
+        )
+
+
+def test_forward_proxy_public_ip_denied(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """FWD-5: Forward proxy to public IP -> 403.
+
+    Even with allowed_ips, forward proxy is restricted to private IPs.
+    Plain HTTP should never traverse the public internet.
+    """
+    policy = _base_policy(
+        network_policies={
+            "public": sandbox_pb2.NetworkPolicyRule(
+                name="public",
+                endpoints=[
+                    sandbox_pb2.NetworkEndpoint(
+                        host="example.com",
+                        port=80,
+                        allowed_ips=["93.184.0.0/16"],
+                    ),
+                ],
+                binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+            ),
+        },
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(
+            _forward_proxy_raw(),
+            args=(_PROXY_HOST, _PROXY_PORT, "http://example.com/"),
+        )
+        assert result.exit_code == 0, result.stderr
+        assert "403" in result.stdout, (
+            f"Expected 403 for public IP forward proxy, got: {result.stdout}"
+        )
+
+
+def test_forward_proxy_log_fields(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """FWD-6: Forward proxy requests produce structured FORWARD log lines."""
+    policy = _base_policy(
+        network_policies={
+            "internal_http": sandbox_pb2.NetworkPolicyRule(
+                name="internal_http",
+                endpoints=[
+                    sandbox_pb2.NetworkEndpoint(
+                        host=_SANDBOX_IP,
+                        port=_FORWARD_PROXY_PORT,
+                        allowed_ips=["10.200.0.0/24"],
+                    ),
+                ],
+                binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+            ),
+        },
+    )
+    spec = datamodel_pb2.SandboxSpec(policy=policy)
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        # Trigger an allowed forward proxy request (with server)
+        sb.exec_python(
+            _forward_proxy_with_server(),
+            args=(_PROXY_HOST, _PROXY_PORT, _SANDBOX_IP, _FORWARD_PROXY_PORT),
+        )
+        # Trigger a denied forward proxy request (no allowed_ips match)
+        sb.exec_python(
+            _forward_proxy_raw(),
+            args=(
+                _PROXY_HOST,
+                _PROXY_PORT,
+                "http://example.com/",
+            ),
+        )
+        # Read the log
+        result = sb.exec_python(_read_navigator_log())
+        assert result.exit_code == 0, result.stderr
+        log = result.stdout
+
+        assert "FORWARD" in log, "Expected FORWARD log lines"
+        # tracing key-value pairs quote string values: action="allow"
+        assert 'action="allow"' in log, "Expected allowed FORWARD in logs"
+        assert f"dst_host={_SANDBOX_IP}" in log, "Expected dst_host in FORWARD log"
+        assert f"dst_port={_FORWARD_PROXY_PORT}" in log, (
+            "Expected dst_port in FORWARD log"
+        )
+
+
+# =============================================================================
+# Baseline filesystem path enrichment tests (BFS-*)
+# =============================================================================
+
+
+def _verify_sandbox_functional():
+    """Return a closure that verifies basic sandbox functionality."""
+
+    def fn():
+        import json
+        import os
+        import sys
+
+        checks = {}
+        # Can resolve DNS config
+        checks["resolv_conf"] = os.path.exists("/etc/resolv.conf")
+        # Can access shared libraries
+        checks["lib_exists"] = os.path.isdir("/usr/lib")
+        # Python interpreter works
+        checks["python_version"] = sys.version
+        # Can write to /tmp
+        tmp_path = "/tmp/enrichment_test.txt"
+        try:
+            with open(tmp_path, "w") as f:
+                f.write("ok")
+            with open(tmp_path) as f:
+                checks["tmp_write"] = f.read() == "ok"
+            os.unlink(tmp_path)
+        except Exception as e:
+            checks["tmp_write"] = str(e)
+        # Can write to /sandbox
+        sb_path = "/sandbox/enrichment_test.txt"
+        try:
+            with open(sb_path, "w") as f:
+                f.write("ok")
+            with open(sb_path) as f:
+                checks["sandbox_write"] = f.read() == "ok"
+            os.unlink(sb_path)
+        except Exception as e:
+            checks["sandbox_write"] = str(e)
+        # Can read navigator log
+        checks["var_log"] = os.path.exists("/var/log/navigator.log")
+        return json.dumps(checks)
+
+    return fn
+
+
+def test_baseline_enrichment_missing_filesystem_policy(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """BFS-1: Sandbox with network_policies but NO filesystem_policy should
+    come up and function correctly thanks to baseline path enrichment."""
+    # Intentionally omit filesystem, landlock, and process fields —
+    # only provide network_policies.
+    spec = datamodel_pb2.SandboxSpec(
+        policy=sandbox_pb2.SandboxPolicy(
+            version=1,
+            network_policies={
+                "test": sandbox_pb2.NetworkPolicyRule(
+                    name="test",
+                    endpoints=[
+                        sandbox_pb2.NetworkEndpoint(host="example.com", port=443),
+                    ],
+                    binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+                ),
+            },
+        ),
+    )
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(_verify_sandbox_functional())
+        assert result.exit_code == 0, (
+            f"Sandbox with missing filesystem_policy failed to run: {result.stderr}"
+        )
+        import json
+
+        checks = json.loads(result.stdout)
+        assert checks["resolv_conf"] is True, "DNS config not accessible"
+        assert checks["lib_exists"] is True, "Shared libraries not accessible"
+        assert checks["tmp_write"] is True, f"/tmp not writable: {checks['tmp_write']}"
+        assert checks["sandbox_write"] is True, (
+            f"/sandbox not writable: {checks['sandbox_write']}"
+        )
+        assert checks["var_log"] is True, "Navigator log not accessible"
+
+
+def test_baseline_enrichment_incomplete_filesystem_policy(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """BFS-2: Sandbox with filesystem_policy that only has /sandbox should
+    still function because baseline enrichment adds missing paths."""
+    spec = datamodel_pb2.SandboxSpec(
+        policy=sandbox_pb2.SandboxPolicy(
+            version=1,
+            filesystem=sandbox_pb2.FilesystemPolicy(
+                include_workdir=True,
+                read_only=[],
+                read_write=["/sandbox"],
+            ),
+            landlock=sandbox_pb2.LandlockPolicy(compatibility="best_effort"),
+            process=sandbox_pb2.ProcessPolicy(
+                run_as_user="sandbox",
+                run_as_group="sandbox",
+            ),
+            network_policies={
+                "test": sandbox_pb2.NetworkPolicyRule(
+                    name="test",
+                    endpoints=[
+                        sandbox_pb2.NetworkEndpoint(host="example.com", port=443),
+                    ],
+                    binaries=[sandbox_pb2.NetworkBinary(path="/**")],
+                ),
+            },
+        ),
+    )
+    with sandbox(spec=spec, delete_on_exit=True) as sb:
+        result = sb.exec_python(_verify_sandbox_functional())
+        assert result.exit_code == 0, (
+            f"Sandbox with incomplete filesystem_policy failed to run: {result.stderr}"
+        )
+        import json
+
+        checks = json.loads(result.stdout)
+        assert checks["resolv_conf"] is True, "DNS config not accessible"
+        assert checks["lib_exists"] is True, "Shared libraries not accessible"
+        assert checks["tmp_write"] is True, f"/tmp not writable: {checks['tmp_write']}"
+        assert checks["sandbox_write"] is True, (
+            f"/sandbox not writable: {checks['sandbox_write']}"
+        )
+        assert checks["var_log"] is True, "Navigator log not accessible"
diff --git a/e2e/python/test_sandbox_providers.py b/e2e/python/test_sandbox_providers.py
index 7bcf12a6..a2ae7144 100644
--- a/e2e/python/test_sandbox_providers.py
+++ b/e2e/python/test_sandbox_providers.py
@@ -28,7 +28,6 @@
 def _default_policy() -> sandbox_pb2.SandboxPolicy:
     return sandbox_pb2.SandboxPolicy(
         version=1,
-        inference=sandbox_pb2.InferencePolicy(allowed_routes=["local"]),
         filesystem=sandbox_pb2.FilesystemPolicy(
             include_workdir=True,
             read_only=["/usr", "/lib", "/etc", "/app"],
diff --git a/e2e/python/test_sandbox_venv.py b/e2e/python/test_sandbox_venv.py
new file mode 100644
index 00000000..1760a0a8
--- /dev/null
+++ b/e2e/python/test_sandbox_venv.py
@@ -0,0 +1,132 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for the writable sandbox venv, PATH, and package installation.
+
+Verifies that:
+- /sandbox/.venv/bin is in PATH for both interactive and non-interactive sessions
+- pip install works inside the sandbox (pypi policy in dev-sandbox-policy.yaml)
+- uv pip install works (validates Landlock V2 cross-directory rename support)
+- uv run --with works for ephemeral dependency injection
+- Installed packages are importable after installation
+
+All tests use the default dev sandbox policy -- no custom policy overrides.
+The SDK omits the policy field from the spec so the sandbox container discovers
+its policy from /etc/navigator/policy.yaml (the dev-sandbox-policy.yaml baked
+into the image), which already includes the pypi network policy.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from navigator import Sandbox
+
+
+def test_sandbox_venv_in_path(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """Non-interactive exec sees /sandbox/.venv/bin in PATH."""
+    with sandbox(delete_on_exit=True) as sb:
+        result = sb.exec(["bash", "-c", "echo $PATH"], timeout_seconds=20)
+        assert result.exit_code == 0, result.stderr
+        path_dirs = result.stdout.strip().split(":")
+        assert "/sandbox/.venv/bin" in path_dirs, (
+            f"Expected /sandbox/.venv/bin in PATH, got: {result.stdout.strip()}"
+        )
+        # /sandbox/.venv/bin must come before /app/.venv/bin
+        sandbox_idx = path_dirs.index("/sandbox/.venv/bin")
+        app_idx = path_dirs.index("/app/.venv/bin")
+        assert sandbox_idx < app_idx, (
+            "/sandbox/.venv/bin must precede /app/.venv/bin in PATH"
+        )
+
+
+def test_pip_install_in_sandbox(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """pip install works inside the sandbox and installed packages are importable."""
+    with sandbox(delete_on_exit=True) as sb:
+        install = sb.exec(
+            ["pip", "install", "--quiet", "cowsay"],
+            timeout_seconds=60,
+        )
+        assert install.exit_code == 0, (
+            f"pip install failed:\nstdout: {install.stdout}\nstderr: {install.stderr}"
+        )
+
+        # Verify the package is importable
+        verify = sb.exec(
+            ["python", "-c", "import cowsay; print(cowsay.char_names[0])"],
+            timeout_seconds=20,
+        )
+        assert verify.exit_code == 0, (
+            f"import failed:\nstdout: {verify.stdout}\nstderr: {verify.stderr}"
+        )
+        assert verify.stdout.strip(), "Expected non-empty output from cowsay"
+
+
+def test_uv_pip_install_in_sandbox(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """uv pip install works inside the sandbox (validates Landlock V2 REFER support).
+
+    Under Landlock V1 this would fail with EXDEV (cross-device link, os error 18)
+    because uv uses cross-directory rename() for cache population and installation.
+    Landlock V2 adds the REFER right which permits this.
+    """
+    with sandbox(delete_on_exit=True) as sb:
+        install = sb.exec(
+            [
+                "uv",
+                "pip",
+                "install",
+                "--python",
+                "/sandbox/.venv/bin/python",
+                "--quiet",
+                "cowsay",
+            ],
+            timeout_seconds=60,
+        )
+        assert install.exit_code == 0, (
+            f"uv pip install failed:\nstdout: {install.stdout}\nstderr: {install.stderr}"
+        )
+
+        # Verify the package is importable
+        verify = sb.exec(
+            ["python", "-c", "import cowsay; print(cowsay.char_names[0])"],
+            timeout_seconds=20,
+        )
+        assert verify.exit_code == 0, (
+            f"import failed after uv install:\n"
+            f"stdout: {verify.stdout}\nstderr: {verify.stderr}"
+        )
+        assert verify.stdout.strip(), "Expected non-empty output from cowsay"
+
+
+def test_uv_run_with_ephemeral_dependency(
+    sandbox: Callable[..., Sandbox],
+) -> None:
+    """uv run --with installs a dependency on-the-fly and runs a script using it."""
+    with sandbox(delete_on_exit=True) as sb:
+        result = sb.exec(
+            [
+                "uv",
+                "run",
+                "--python",
+                "/sandbox/.venv/bin/python",
+                "--with",
+                "cowsay",
+                "python",
+                "-c",
+                "import cowsay; print(cowsay.char_names[0])",
+            ],
+            timeout_seconds=60,
+        )
+        assert result.exit_code == 0, (
+            f"uv run --with failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+        )
+        assert result.stdout.strip(), "Expected non-empty output from uv run"
diff --git a/e2e/rust/Cargo.lock b/e2e/rust/Cargo.lock
new file mode 100644
index 00000000..6dc48037
--- /dev/null
+++ b/e2e/rust/Cargo.lock
@@ -0,0 +1,803 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 5.3.0",
+ "wasip2",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 6.0.0",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "nemoclaw-e2e"
+version = "0.1.0"
+dependencies = [
+ "hex",
+ "rand",
+ "sha2",
+ "tempfile",
+ "tokio",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.2",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio"
+version = "1.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "typenum"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml
new file mode 100644
index 00000000..fa94eaed
--- /dev/null
+++ b/e2e/rust/Cargo.toml
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Standalone crate — the empty [workspace] table prevents Cargo from
+# treating this as part of the root workspace, avoiding Dockerfile
+# and Cargo.lock coupling.
+[workspace]
+
+[package]
+name = "nemoclaw-e2e"
+description = "End-to-end tests for the NemoClaw CLI"
+version = "0.1.0"
+edition = "2024"
+rust-version = "1.88"
+license = "Apache-2.0"
+publish = false
+
+[features]
+e2e = []
+
+[dependencies]
+tokio = { version = "1.43", features = ["full"] }
+tempfile = "3"
+sha2 = "0.10"
+hex = "0.4"
+rand = "0.9"
+
+[lints.rust]
+unsafe_code = "warn"
+rust_2018_idioms = { level = "warn", priority = -1 }
+
+[lints.clippy]
+all = { level = "warn", priority = -1 }
+pedantic = { level = "warn", priority = -1 }
+module_name_repetitions = "allow"
+must_use_candidate = "allow"
+missing_errors_doc = "allow"
+missing_panics_doc = "allow"
diff --git a/e2e/rust/src/harness/binary.rs b/e2e/rust/src/harness/binary.rs
new file mode 100644
index 00000000..d4160278
--- /dev/null
+++ b/e2e/rust/src/harness/binary.rs
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! CLI binary resolution for e2e tests.
+//!
+//! Resolves the `nemoclaw` binary at `<workspace>/target/debug/nemoclaw`.
+//! The binary must already be built — the `e2e:rust` mise task handles
+//! this by running `cargo build -p navigator-cli` before the tests.
+
+use std::path::{Path, PathBuf};
+
+/// Locate the workspace root by walking up from the crate's manifest directory.
+fn workspace_root() -> PathBuf {
+    let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
+    // e2e/rust/ is two levels below the workspace root.
+    manifest_dir
+        .ancestors()
+        .nth(2)
+        .expect("failed to resolve workspace root from CARGO_MANIFEST_DIR")
+        .to_path_buf()
+}
+
+/// Return the path to the `nemoclaw` CLI binary.
+///
+/// Expects the binary at `<workspace>/target/debug/nemoclaw`.
+///
+/// # Panics
+///
+/// Panics if the binary is not found. Run `cargo build -p navigator-cli`
+/// (or `mise run e2e:rust`) first.
+pub fn nemoclaw_bin() -> PathBuf {
+    let bin = workspace_root().join("target/debug/nemoclaw");
+    assert!(
+        bin.is_file(),
+        "nemoclaw binary not found at {bin:?} — run `cargo build -p navigator-cli` first"
+    );
+    bin
+}
+
+/// Create a [`tokio::process::Command`] pre-configured to invoke the
+/// `nemoclaw` CLI.
+///
+/// The command has `kill_on_drop(true)` set so that background child processes
+/// are cleaned up when the handle is dropped.
+pub fn nemoclaw_cmd() -> tokio::process::Command {
+    let mut cmd = tokio::process::Command::new(nemoclaw_bin());
+    cmd.kill_on_drop(true);
+    cmd
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn workspace_root_resolves() {
+        let root = workspace_root();
+        assert!(
+            root.join("Cargo.toml").is_file(),
+            "workspace root should contain Cargo.toml: {root:?}"
+        );
+    }
+}
diff --git a/e2e/rust/src/harness/mod.rs b/e2e/rust/src/harness/mod.rs
new file mode 100644
index 00000000..b3add2c0
--- /dev/null
+++ b/e2e/rust/src/harness/mod.rs
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Shared test harness modules for CLI e2e tests.
+
+pub mod binary;
+pub mod output;
+pub mod port;
+pub mod sandbox;
diff --git a/e2e/rust/src/harness/output.rs b/e2e/rust/src/harness/output.rs
new file mode 100644
index 00000000..c1c926e6
--- /dev/null
+++ b/e2e/rust/src/harness/output.rs
@@ -0,0 +1,96 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! CLI output parsing utilities.
+
+/// Strip ANSI escape codes (e.g. colors, bold) from a string.
+///
+/// Handles the common `ESC[<params>m` SGR sequences produced by the CLI's
+/// `owo-colors` output.
+pub fn strip_ansi(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut chars = s.chars().peekable();
+
+    while let Some(c) = chars.next() {
+        if c == '\x1b' {
+            // Consume the `[` and everything up to the terminating letter.
+            if chars.peek() == Some(&'[') {
+                chars.next(); // consume '['
+                              // Consume parameter bytes (digits, ';') and the final byte.
+                for c in chars.by_ref() {
+                    if c.is_ascii_alphabetic() {
+                        break;
+                    }
+                }
+            }
+        } else {
+            out.push(c);
+        }
+    }
+
+    out
+}
+
+/// Extract a field value from CLI tabular output.
+///
+/// Given output like:
+/// ```text
+///   Name:    fuzzy-panda
+///   Status:  Running
+/// ```
+///
+/// `extract_field(output, "Name")` returns `Some("fuzzy-panda")`.
+///
+/// The search is performed on ANSI-stripped text.
+pub fn extract_field(output: &str, field: &str) -> Option<String> {
+    let clean = strip_ansi(output);
+    let prefix = format!("{field}:");
+
+    for line in clean.lines() {
+        let trimmed = line.trim();
+        if let Some(rest) = trimmed.strip_prefix(&prefix) {
+            let value = rest.trim();
+            if !value.is_empty() {
+                return Some(value.to_string());
+            }
+        }
+    }
+
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strip_ansi_removes_color_codes() {
+        let colored = "\x1b[1m\x1b[32mName:\x1b[0m  fuzzy-panda";
+        assert_eq!(strip_ansi(colored), "Name:  fuzzy-panda");
+    }
+
+    #[test]
+    fn strip_ansi_passthrough_plain_text() {
+        let plain = "no colors here";
+        assert_eq!(strip_ansi(plain), plain);
+    }
+
+    #[test]
+    fn extract_field_finds_value() {
+        let output = "  Name:    fuzzy-panda\n  Status:  Running\n";
+        assert_eq!(extract_field(output, "Name"), Some("fuzzy-panda".into()));
+        assert_eq!(extract_field(output, "Status"), Some("Running".into()));
+    }
+
+    #[test]
+    fn extract_field_with_ansi() {
+        let output = "\x1b[1mName:\x1b[0m  fuzzy-panda\n";
+        assert_eq!(extract_field(output, "Name"), Some("fuzzy-panda".into()));
+    }
+
+    #[test]
+    fn extract_field_missing_returns_none() {
+        let output = "  Name:  fuzzy-panda\n";
+        assert_eq!(extract_field(output, "Missing"), None);
+    }
+}
diff --git a/e2e/rust/src/harness/port.rs b/e2e/rust/src/harness/port.rs
new file mode 100644
index 00000000..70f45499
--- /dev/null
+++ b/e2e/rust/src/harness/port.rs
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! TCP port utilities for e2e tests.
+
+use std::net::{Ipv4Addr, SocketAddrV4, TcpListener};
+use std::time::Duration;
+
+use tokio::net::TcpStream;
+use tokio::time::{interval, timeout};
+
+/// Wait for a TCP port to accept connections.
+///
+/// Polls once per second until either a connection succeeds or the timeout
+/// elapses. Returns `Ok(())` on success, `Err` on timeout.
+///
+/// # Errors
+///
+/// Returns an error if the port does not accept a connection within `max_wait`.
+pub async fn wait_for_port(host: &str, port: u16, max_wait: Duration) -> Result<(), String> {
+    let addr = format!("{host}:{port}");
+
+    let result = timeout(max_wait, async {
+        let mut tick = interval(Duration::from_secs(1));
+        loop {
+            tick.tick().await;
+            if TcpStream::connect(&addr).await.is_ok() {
+                return;
+            }
+        }
+    })
+    .await;
+
+    match result {
+        Ok(()) => Ok(()),
+        Err(_) => Err(format!(
+            "port {port} on {host} did not accept connections within {max_wait:?}"
+        )),
+    }
+}
+
+/// Find an available TCP port by binding to port 0.
+///
+/// The OS assigns an ephemeral port which is returned. The listener is dropped
+/// immediately, freeing the port for use by the test. There is a small TOCTOU
+/// window, but it is acceptable for test code.
+///
+/// # Panics
+///
+/// Panics if the OS cannot allocate an ephemeral port.
+pub fn find_free_port() -> u16 {
+    let listener =
+        TcpListener::bind(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0)).expect("bind to port 0");
+    listener
+        .local_addr()
+        .expect("local_addr after bind")
+        .port()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn find_free_port_returns_nonzero() {
+        let port = find_free_port();
+        assert_ne!(port, 0);
+    }
+
+    #[tokio::test]
+    async fn wait_for_port_succeeds_when_listening() {
+        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+        let port = listener.local_addr().unwrap().port();
+
+        wait_for_port("127.0.0.1", port, Duration::from_secs(5))
+            .await
+            .expect("should connect to listening port");
+    }
+
+    #[tokio::test]
+    async fn wait_for_port_times_out_when_nothing_listens() {
+        // Port 1 is almost certainly not listening and requires root.
+        let result = wait_for_port("127.0.0.1", 1, Duration::from_secs(2)).await;
+        assert!(result.is_err());
+    }
+}
diff --git a/e2e/rust/src/harness/sandbox.rs b/e2e/rust/src/harness/sandbox.rs
new file mode 100644
index 00000000..05fc2267
--- /dev/null
+++ b/e2e/rust/src/harness/sandbox.rs
@@ -0,0 +1,448 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Sandbox lifecycle management with automatic cleanup.
+//!
+//! [`SandboxGuard`] creates a sandbox and ensures it is deleted when the guard
+//! is dropped, replacing the `trap cleanup EXIT` pattern from the bash tests.
+
+use std::process::Stdio;
+use std::time::Duration;
+
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::time::timeout;
+
+use super::binary::nemoclaw_cmd;
+use super::output::{extract_field, strip_ansi};
+
+/// Default timeout for waiting for a sandbox to become ready.
+const SANDBOX_READY_TIMEOUT: Duration = Duration::from_secs(120);
+
+/// RAII guard that deletes a sandbox on drop.
+///
+/// For sandboxes created with `--keep` (long-running background command), the
+/// guard also holds the child process handle and kills it during cleanup.
+pub struct SandboxGuard {
+    /// The sandbox name, parsed from CLI output.
+    pub name: String,
+
+    /// The full captured stdout from the create command (for short-lived
+    /// sandboxes). Empty for `--keep` sandboxes where output is streamed.
+    pub create_output: String,
+
+    /// Background child process for `--keep` sandboxes.
+    child: Option<tokio::process::Child>,
+
+    /// Whether cleanup has already been performed.
+    cleaned_up: bool,
+}
+
+impl SandboxGuard {
+    /// Create a sandbox that runs a command to completion (no `--keep`).
+    ///
+    /// Captures the full CLI output and parses the sandbox name from it.
+    /// The sandbox is created synchronously (the CLI blocks until the command
+    /// finishes).
+    ///
+    /// # Arguments
+    ///
+    /// * `args` — Extra arguments to `nemoclaw sandbox create`, including
+    ///   `-- <command>` if needed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the CLI exits with a non-zero status or the sandbox
+    /// name cannot be parsed from the output.
+    pub async fn create(args: &[&str]) -> Result<Self, String> {
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox").arg("create");
+        for arg in args {
+            cmd.arg(arg);
+        }
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .map_err(|e| format!("failed to spawn nemoclaw: {e}"))?;
+
+        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+        let combined = format!("{stdout}{stderr}");
+
+        if !output.status.success() {
+            return Err(format!(
+                "sandbox create failed (exit {:?}):\n{combined}",
+                output.status.code()
+            ));
+        }
+
+        let name = extract_field(&combined, "Name").ok_or_else(|| {
+            format!("could not parse sandbox name from create output:\n{combined}")
+        })?;
+
+        Ok(Self {
+            name,
+            create_output: combined,
+            child: None,
+            cleaned_up: false,
+        })
+    }
+
+    /// Create a sandbox with `--keep` that runs a long-lived background
+    /// command.
+    ///
+    /// The CLI process runs in the background. This method polls its stdout
+    /// for `ready_marker` (a string the background command prints when it is
+    /// ready to accept work). Sandbox name is parsed from the output header.
+    ///
+    /// # Arguments
+    ///
+    /// * `command` — The command and arguments to run inside the sandbox
+    ///   (passed after `--`).
+    /// * `ready_marker` — A string to wait for in the combined output that
+    ///   signals readiness.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the process exits prematurely, the ready marker is
+    /// not seen within [`SANDBOX_READY_TIMEOUT`], or the sandbox name cannot
+    /// be parsed.
+    pub async fn create_keep(
+        command: &[&str],
+        ready_marker: &str,
+    ) -> Result<Self, String> {
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox")
+            .arg("create")
+            .arg("--keep")
+            .arg("--")
+            .args(command);
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        let mut child = cmd
+            .spawn()
+            .map_err(|e| format!("failed to spawn nemoclaw: {e}"))?;
+
+        let stdout = child.stdout.take().expect("stdout must be piped");
+        let mut reader = BufReader::new(stdout).lines();
+
+        let mut accumulated = String::new();
+        let mut name: Option<String> = None;
+        let mut ready = false;
+
+        let poll_result = timeout(SANDBOX_READY_TIMEOUT, async {
+            while let Ok(Some(line)) = reader.next_line().await {
+                let clean = strip_ansi(&line);
+                accumulated.push_str(&clean);
+                accumulated.push('\n');
+
+                // Try to extract the sandbox name from the header.
+                if name.is_none() {
+                    if let Some(n) = extract_field(&accumulated, "Name") {
+                        name = Some(n);
+                    }
+                }
+
+                // Check for the ready marker.
+                if clean.contains(ready_marker) {
+                    ready = true;
+                    break;
+                }
+            }
+        })
+        .await;
+
+        if poll_result.is_err() {
+            // Timeout — kill the child and report.
+            let _ = child.kill().await;
+            return Err(format!(
+                "sandbox did not become ready within {SANDBOX_READY_TIMEOUT:?}.\n\
+                 Output so far:\n{accumulated}"
+            ));
+        }
+
+        if !ready {
+            // The line reader ended before seeing the marker (process exited).
+            let _ = child.kill().await;
+            return Err(format!(
+                "sandbox create exited before ready marker '{ready_marker}' was seen.\n\
+                 Output:\n{accumulated}"
+            ));
+        }
+
+        let sandbox_name = name.ok_or_else(|| {
+            format!("could not parse sandbox name from create output:\n{accumulated}")
+        })?;
+
+        Ok(Self {
+            name: sandbox_name,
+            create_output: accumulated,
+            child: Some(child),
+            cleaned_up: false,
+        })
+    }
+
+    /// Create a sandbox that runs a command, with `--upload` to pre-load files.
+    ///
+    /// Equivalent to:
+    /// ```text
+    /// nemoclaw sandbox create --upload <local>:<dest> [extra_args...] -- <command>
+    /// ```
+    ///
+    /// The `--no-git-ignore` flag is passed to avoid needing a git repository.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the CLI exits with a non-zero status or the sandbox
+    /// name cannot be parsed.
+    pub async fn create_with_upload(
+        upload_local: &str,
+        upload_dest: &str,
+        command: &[&str],
+    ) -> Result<Self, String> {
+        let upload_spec = format!("{upload_local}:{upload_dest}");
+
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox")
+            .arg("create")
+            .arg("--upload")
+            .arg(&upload_spec)
+            .arg("--no-git-ignore")
+            .arg("--")
+            .args(command);
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .map_err(|e| format!("failed to spawn nemoclaw: {e}"))?;
+
+        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+        let combined = format!("{stdout}{stderr}");
+
+        if !output.status.success() {
+            return Err(format!(
+                "sandbox create --upload failed (exit {:?}):\n{combined}",
+                output.status.code()
+            ));
+        }
+
+        let name = extract_field(&combined, "Name").ok_or_else(|| {
+            format!("could not parse sandbox name from create output:\n{combined}")
+        })?;
+
+        Ok(Self {
+            name,
+            create_output: combined,
+            child: None,
+            cleaned_up: false,
+        })
+    }
+
+    /// Upload local files to the sandbox via `nemoclaw sandbox upload`.
+    ///
+    /// # Arguments
+    ///
+    /// * `local_path` — Local file or directory to upload.
+    /// * `dest` — Destination path in the sandbox (e.g. `/sandbox/uploaded`).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the upload command fails.
+    pub async fn upload(&self, local_path: &str, dest: &str) -> Result<String, String> {
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox")
+            .arg("upload")
+            .arg(&self.name)
+            .arg(local_path)
+            .arg(dest)
+            .arg("--no-git-ignore");
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .map_err(|e| format!("failed to spawn nemoclaw upload: {e}"))?;
+
+        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+        let combined = format!("{stdout}{stderr}");
+
+        if !output.status.success() {
+            return Err(format!(
+                "sandbox upload failed (exit {:?}):\n{combined}",
+                output.status.code()
+            ));
+        }
+
+        Ok(combined)
+    }
+
+    /// Upload local files with `.gitignore` filtering (default behavior).
+    ///
+    /// Unlike [`upload`], this does NOT pass `--no-git-ignore`, so the CLI
+    /// will filter out gitignored files. The `cwd` is set to the given
+    /// directory so that `git_repo_root()` inside the CLI resolves correctly.
+    ///
+    /// # Arguments
+    ///
+    /// * `local_path` — Local file or directory to upload.
+    /// * `dest` — Destination path in the sandbox.
+    /// * `cwd` — Working directory for the CLI process (should be inside a git
+    ///   repo).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the upload command fails.
+    pub async fn upload_with_gitignore(
+        &self,
+        local_path: &str,
+        dest: &str,
+        cwd: &std::path::Path,
+    ) -> Result<String, String> {
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox")
+            .arg("upload")
+            .arg(&self.name)
+            .arg(local_path)
+            .arg(dest)
+            .current_dir(cwd);
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .map_err(|e| format!("failed to spawn nemoclaw upload: {e}"))?;
+
+        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+        let combined = format!("{stdout}{stderr}");
+
+        if !output.status.success() {
+            return Err(format!(
+                "sandbox upload (with gitignore) failed (exit {:?}):\n{combined}",
+                output.status.code()
+            ));
+        }
+
+        Ok(combined)
+    }
+
+    /// Download files from the sandbox via `nemoclaw sandbox download`.
+    ///
+    /// # Arguments
+    ///
+    /// * `sandbox_path` — Path inside the sandbox to download.
+    /// * `local_dest` — Local destination directory.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the download command fails.
+    pub async fn download(
+        &self,
+        sandbox_path: &str,
+        local_dest: &str,
+    ) -> Result<String, String> {
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox")
+            .arg("download")
+            .arg(&self.name)
+            .arg(sandbox_path)
+            .arg(local_dest);
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .map_err(|e| format!("failed to spawn nemoclaw download: {e}"))?;
+
+        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+        let combined = format!("{stdout}{stderr}");
+
+        if !output.status.success() {
+            return Err(format!(
+                "sandbox download failed (exit {:?}):\n{combined}",
+                output.status.code()
+            ));
+        }
+
+        Ok(combined)
+    }
+
+    /// Spawn `nemoclaw forward start` as a background process.
+    ///
+    /// Returns the child process handle. The caller is responsible for killing
+    /// it (or it will be killed on drop since `kill_on_drop(true)` is set).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the process cannot be spawned.
+    pub fn spawn_forward(&self, port: u16) -> Result<tokio::process::Child, String> {
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("forward")
+            .arg("start")
+            .arg(port.to_string())
+            .arg(&self.name);
+        cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+
+        cmd.spawn()
+            .map_err(|e| format!("failed to spawn port forward: {e}"))
+    }
+
+    /// Delete the sandbox explicitly.
+    ///
+    /// Also kills the background child process if one exists. This is called
+    /// automatically by [`Drop`], but can be called manually for clarity.
+    pub async fn cleanup(&mut self) {
+        if self.cleaned_up {
+            return;
+        }
+        self.cleaned_up = true;
+
+        // Kill the background child process if present.
+        if let Some(ref mut child) = self.child {
+            let _ = child.kill().await;
+            let _ = child.wait().await;
+        }
+
+        // Delete the sandbox.
+        let mut cmd = nemoclaw_cmd();
+        cmd.arg("sandbox").arg("delete").arg(&self.name);
+        cmd.stdout(Stdio::null()).stderr(Stdio::null());
+
+        let _ = cmd.status().await;
+    }
+}
+
+impl Drop for SandboxGuard {
+    fn drop(&mut self) {
+        if self.cleaned_up {
+            return;
+        }
+
+        // We need to run async cleanup in a sync Drop. Use block_in_place to
+        // avoid blocking the tokio runtime. This is acceptable for test code.
+        let name = self.name.clone();
+        let mut child = self.child.take();
+
+        // Attempt cleanup with a new runtime if we're not inside one, or
+        // block_in_place if we are.
+        std::thread::spawn(move || {
+            let rt = tokio::runtime::Runtime::new().expect("create cleanup runtime");
+            rt.block_on(async {
+                if let Some(ref mut child) = child {
+                    let _: Result<(), _> = child.kill().await;
+                    let _ = child.wait().await;
+                }
+
+                let mut cmd = nemoclaw_cmd();
+                cmd.arg("sandbox").arg("delete").arg(&name);
+                cmd.stdout(Stdio::null()).stderr(Stdio::null());
+                let _ = cmd.status().await;
+            });
+        });
+    }
+}
diff --git a/e2e/rust/src/lib.rs b/e2e/rust/src/lib.rs
new file mode 100644
index 00000000..61422ebb
--- /dev/null
+++ b/e2e/rust/src/lib.rs
@@ -0,0 +1,12 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Test harness for `NemoClaw` CLI end-to-end tests.
+//!
+//! Provides utilities for:
+//! - Resolving and invoking the `nemoclaw` CLI binary
+//! - Managing sandbox lifecycle with automatic cleanup
+//! - Parsing CLI output (ANSI stripping, field extraction)
+//! - TCP port utilities (wait for port, find free port)
+
+pub mod harness;
diff --git a/e2e/rust/tests/cli_smoke.rs b/e2e/rust/tests/cli_smoke.rs
new file mode 100644
index 00000000..2d8f2182
--- /dev/null
+++ b/e2e/rust/tests/cli_smoke.rs
@@ -0,0 +1,166 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! CLI smoke tests that verify command structure and graceful error handling.
+//!
+//! These tests do NOT require a running gateway — they exercise the CLI binary
+//! directly, validating that the restructured command tree parses correctly and
+//! handles edge cases like missing gateway configuration.
+
+use std::process::Stdio;
+
+use nemoclaw_e2e::harness::binary::nemoclaw_cmd;
+use nemoclaw_e2e::harness::output::strip_ansi;
+
+/// Run `nemoclaw <args>` with an isolated (empty) config directory so it
+/// cannot discover any real gateway.
+async fn run_isolated(args: &[&str]) -> (String, i32) {
+    let tmpdir = tempfile::tempdir().expect("create isolated config dir");
+    let mut cmd = nemoclaw_cmd();
+    cmd.args(args)
+        .env("XDG_CONFIG_HOME", tmpdir.path())
+        .env("HOME", tmpdir.path())
+        .env_remove("NEMOCLAW_CLUSTER")
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped());
+
+    let output = cmd.output().await.expect("spawn nemoclaw");
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let combined = format!("{stdout}{stderr}");
+    let code = output.status.code().unwrap_or(-1);
+    (combined, code)
+}
+
+// -------------------------------------------------------------------
+// Top-level --help shows the restructured command tree
+// -------------------------------------------------------------------
+
+/// `nemoclaw --help` must list the new top-level commands: gateway, status,
+/// forward, logs, policy.
+#[tokio::test]
+async fn help_shows_restructured_commands() {
+    let (output, code) = run_isolated(&["--help"]).await;
+    assert_eq!(code, 0, "nemoclaw --help should exit 0");
+
+    let clean = strip_ansi(&output);
+    for cmd in ["gateway", "status", "sandbox", "forward", "logs", "policy"] {
+        assert!(
+            clean.contains(cmd),
+            "expected '{cmd}' in --help output:\n{clean}"
+        );
+    }
+}
+
+/// `nemoclaw gateway --help` must list start, stop, destroy, select, info.
+#[tokio::test]
+async fn gateway_help_shows_subcommands() {
+    let (output, code) = run_isolated(&["gateway", "--help"]).await;
+    assert_eq!(code, 0, "nemoclaw gateway --help should exit 0");
+
+    let clean = strip_ansi(&output);
+    for sub in ["start", "stop", "destroy", "select", "info"] {
+        assert!(
+            clean.contains(sub),
+            "expected '{sub}' in gateway --help output:\n{clean}"
+        );
+    }
+}
+
+/// `nemoclaw sandbox --help` must list upload and download alongside create,
+/// get, list, delete, connect.
+#[tokio::test]
+async fn sandbox_help_shows_upload_download() {
+    let (output, code) = run_isolated(&["sandbox", "--help"]).await;
+    assert_eq!(code, 0, "nemoclaw sandbox --help should exit 0");
+
+    let clean = strip_ansi(&output);
+    for sub in ["upload", "download", "create", "get", "list", "delete", "connect"] {
+        assert!(
+            clean.contains(sub),
+            "expected '{sub}' in sandbox --help output:\n{clean}"
+        );
+    }
+}
+
+/// `nemoclaw sandbox create --help` must show `--upload`, `--no-git-ignore`,
+/// `--bootstrap`/`--no-bootstrap`, and `--auto-providers`/`--no-auto-providers`.
+#[tokio::test]
+async fn sandbox_create_help_shows_new_flags() {
+    let (output, code) = run_isolated(&["sandbox", "create", "--help"]).await;
+    assert_eq!(code, 0, "nemoclaw sandbox create --help should exit 0");
+
+    let clean = strip_ansi(&output);
+    for flag in [
+        "--upload",
+        "--no-git-ignore",
+        "--bootstrap",
+        "--no-bootstrap",
+        "--auto-providers",
+        "--no-auto-providers",
+    ] {
+        assert!(
+            clean.contains(flag),
+            "expected '{flag}' in sandbox create --help:\n{clean}"
+        );
+    }
+}
+
+/// `nemoclaw gateway start --help` must show `--recreate`.
+#[tokio::test]
+async fn gateway_start_help_shows_recreate() {
+    let (output, code) = run_isolated(&["gateway", "start", "--help"]).await;
+    assert_eq!(code, 0, "nemoclaw gateway start --help should exit 0");
+
+    let clean = strip_ansi(&output);
+    assert!(
+        clean.contains("--recreate"),
+        "expected '--recreate' in gateway start --help:\n{clean}"
+    );
+}
+
+// -------------------------------------------------------------------
+// Graceful handling: `nemoclaw status` without a gateway
+// -------------------------------------------------------------------
+
+/// `nemoclaw status` with no gateway configured should exit 0 and print a
+/// friendly message instead of erroring.
+#[tokio::test]
+async fn status_without_gateway_prints_friendly_message() {
+    let (output, code) = run_isolated(&["status"]).await;
+    assert_eq!(
+        code, 0,
+        "nemoclaw status should exit 0 even without a gateway, got output:\n{output}"
+    );
+
+    let clean = strip_ansi(&output);
+    assert!(
+        clean.contains("No gateway configured"),
+        "expected 'No gateway configured' in status output:\n{clean}"
+    );
+    assert!(
+        clean.contains("nemoclaw gateway start"),
+        "expected hint to run 'nemoclaw gateway start':\n{clean}"
+    );
+}
+
+// -------------------------------------------------------------------
+// Hidden backwards-compat: `cluster admin deploy` is still parseable
+// -------------------------------------------------------------------
+
+/// `nemoclaw cluster admin deploy --help` should still work (hidden alias).
+#[tokio::test]
+async fn cluster_admin_deploy_help_is_accessible() {
+    let (output, code) = run_isolated(&["cluster", "admin", "deploy", "--help"]).await;
+    assert_eq!(
+        code, 0,
+        "cluster admin deploy --help should exit 0:\n{output}"
+    );
+
+    let clean = strip_ansi(&output);
+    // Should show the deploy options (name, remote, ssh-key, etc.)
+    assert!(
+        clean.contains("--name") || clean.contains("--remote"),
+        "expected deploy flags in cluster admin deploy --help:\n{clean}"
+    );
+}
diff --git a/e2e/rust/tests/custom_image.rs b/e2e/rust/tests/custom_image.rs
new file mode 100644
index 00000000..0dec3e1b
--- /dev/null
+++ b/e2e/rust/tests/custom_image.rs
@@ -0,0 +1,70 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e")]
+
+//! E2E test: build a custom container image and run a sandbox with it.
+//!
+//! Prerequisites:
+//! - A running nemoclaw gateway (`nemoclaw gateway start`)
+//! - Docker daemon running (for image build)
+//! - The `nemoclaw` binary (built automatically from the workspace)
+
+use std::io::Write;
+
+use nemoclaw_e2e::harness::output::strip_ansi;
+use nemoclaw_e2e::harness::sandbox::SandboxGuard;
+
+const DOCKERFILE_CONTENT: &str = r#"FROM python:3.12-slim
+
+# iproute2 is required for sandbox network namespace isolation.
+RUN apt-get update && apt-get install -y --no-install-recommends iproute2 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create the sandbox user/group so the supervisor can switch to it.
+RUN groupadd -g 1000 sandbox && \
+    useradd -m -u 1000 -g sandbox sandbox
+
+# Write a marker file so we can verify this is our custom image.
+RUN echo "custom-image-e2e-marker" > /opt/marker.txt
+
+CMD ["sleep", "infinity"]
+"#;
+
+const MARKER: &str = "custom-image-e2e-marker";
+
+/// Build a custom Docker image from a Dockerfile and verify that a sandbox
+/// created from it contains the expected marker file.
+#[tokio::test]
+async fn sandbox_from_custom_dockerfile() {
+    // Step 1 — Write a temporary Dockerfile.
+    let tmpdir = tempfile::tempdir().expect("create tmpdir");
+    let dockerfile_path = tmpdir.path().join("Dockerfile");
+    {
+        let mut f = std::fs::File::create(&dockerfile_path).expect("create Dockerfile");
+        f.write_all(DOCKERFILE_CONTENT.as_bytes())
+            .expect("write Dockerfile");
+    }
+
+    // Step 2 — Create a sandbox from the Dockerfile.
+    let dockerfile_str = dockerfile_path.to_str().expect("Dockerfile path is UTF-8");
+    let mut guard = SandboxGuard::create(&[
+        "--from",
+        dockerfile_str,
+        "--",
+        "cat",
+        "/opt/marker.txt",
+    ])
+    .await
+    .expect("sandbox create from Dockerfile");
+
+    // Step 3 — Verify the marker file content appears in the output.
+    let clean_output = strip_ansi(&guard.create_output);
+    assert!(
+        clean_output.contains(MARKER),
+        "expected marker '{MARKER}' in sandbox output:\n{clean_output}"
+    );
+
+    // Explicit cleanup (also happens in Drop, but explicit is clearer in tests).
+    guard.cleanup().await;
+}
diff --git a/e2e/rust/tests/port_forward.rs b/e2e/rust/tests/port_forward.rs
new file mode 100644
index 00000000..935b8ce3
--- /dev/null
+++ b/e2e/rust/tests/port_forward.rs
@@ -0,0 +1,139 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e")]
+
+//! E2E test: TCP port forwarding through a sandbox.
+//!
+//! Prerequisites:
+//! - A running nemoclaw gateway (`nemoclaw gateway start`)
+//! - The `nemoclaw` binary (built automatically from the workspace)
+
+use std::time::Duration;
+
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::TcpStream;
+
+use nemoclaw_e2e::harness::port::{find_free_port, wait_for_port};
+use nemoclaw_e2e::harness::sandbox::SandboxGuard;
+
+/// Python script that runs a single-threaded TCP echo server inside the
+/// sandbox. It prints `echo-server-ready` to stdout once listening, which
+/// the harness uses as the readiness marker.
+fn echo_server_script(port: u16) -> String {
+    format!(
+        r"
+import socket, sys, signal
+signal.signal(signal.SIGHUP, signal.SIG_IGN)
+signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
+port = {port}
+sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+sock.bind(('127.0.0.1', port))
+sock.listen(1)
+sock.settimeout(300)
+print('echo-server-ready', flush=True)
+try:
+    while True:
+        conn, _ = sock.accept()
+        data = conn.recv(4096)
+        if data:
+            conn.sendall(b'echo:' + data)
+        conn.close()
+except (socket.timeout, OSError):
+    pass
+finally:
+    sock.close()
+"
+    )
+}
+
+/// Create a sandbox with a TCP echo server, forward the port locally, send
+/// data through it, and verify the echoed response.
+#[tokio::test]
+async fn port_forward_echo() {
+    let port = find_free_port();
+    let script = echo_server_script(port);
+
+    // ---------------------------------------------------------------
+    // Step 1 — Create a sandbox with the echo server running.
+    // ---------------------------------------------------------------
+    let mut guard =
+        SandboxGuard::create_keep(&["python3", "-c", &script], "echo-server-ready")
+            .await
+            .expect("sandbox create with echo server");
+
+    // ---------------------------------------------------------------
+    // Step 2 — Start port forwarding in the background.
+    // ---------------------------------------------------------------
+    let mut forward_child = guard
+        .spawn_forward(port)
+        .expect("spawn port forward");
+
+    // Wait for the local port to accept connections.
+    wait_for_port("127.0.0.1", port, Duration::from_secs(30))
+        .await
+        .expect("local port should open for forwarding");
+
+    // Give the SSH tunnel a moment to fully establish the direct-tcpip channel.
+    tokio::time::sleep(Duration::from_secs(2)).await;
+
+    // ---------------------------------------------------------------
+    // Step 3 — Send data through the forwarded port and verify response.
+    // ---------------------------------------------------------------
+    let expected = "echo:hello-nav";
+    let mut last_response = String::new();
+
+    for attempt in 1..=5 {
+        match try_echo(port).await {
+            Ok(resp) if resp.starts_with(expected) => {
+                last_response = resp;
+                break;
+            }
+            Ok(resp) => {
+                last_response = resp;
+                eprintln!("attempt {attempt}: unexpected response '{last_response}', retrying...");
+            }
+            Err(e) => {
+                eprintln!("attempt {attempt}: connection error: {e}, retrying...");
+            }
+        }
+        tokio::time::sleep(Duration::from_secs(2)).await;
+    }
+
+    assert!(
+        last_response.starts_with(expected),
+        "expected response starting with '{expected}', got '{last_response}'"
+    );
+
+    // ---------------------------------------------------------------
+    // Cleanup — kill forward process, then sandbox guard handles the rest.
+    // ---------------------------------------------------------------
+    let _ = forward_child.kill().await;
+    let _ = forward_child.wait().await;
+    guard.cleanup().await;
+}
+
+/// Attempt to send `hello-nav\n` to the echo server and read the response.
+async fn try_echo(port: u16) -> Result<String, String> {
+    let mut stream = TcpStream::connect(format!("127.0.0.1:{port}"))
+        .await
+        .map_err(|e| format!("connect: {e}"))?;
+
+    stream
+        .write_all(b"hello-nav\n")
+        .await
+        .map_err(|e| format!("write: {e}"))?;
+
+    let mut buf = vec![0u8; 4096];
+    let n = tokio::time::timeout(Duration::from_secs(10), stream.read(&mut buf))
+        .await
+        .map_err(|_| "read timeout".to_string())?
+        .map_err(|e| format!("read: {e}"))?;
+
+    let response = String::from_utf8_lossy(&buf[..n])
+        .trim_end_matches(['\r', '\n'])
+        .to_string();
+
+    Ok(response)
+}
diff --git a/e2e/rust/tests/sync.rs b/e2e/rust/tests/sync.rs
new file mode 100644
index 00000000..4a3a7547
--- /dev/null
+++ b/e2e/rust/tests/sync.rs
@@ -0,0 +1,289 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e")]
+
+//! E2E test: bidirectional file upload/download with a sandbox.
+//!
+//! Prerequisites:
+//! - A running nemoclaw gateway (`nemoclaw gateway start`)
+//! - The `nemoclaw` binary (built automatically from the workspace)
+
+use std::fs;
+use std::io::Write;
+use std::process::Stdio;
+
+use sha2::{Digest, Sha256};
+
+use nemoclaw_e2e::harness::sandbox::SandboxGuard;
+
+/// Create a long-running sandbox, upload and download files, and verify
+/// contents.
+///
+/// Covers:
+/// 1. Directory round-trip (nested files)
+/// 2. Large file round-trip (~512 KiB) with SHA-256 checksum verification
+/// 3. Single-file round-trip
+#[tokio::test]
+async fn sandbox_file_upload_download_round_trip() {
+    // ---------------------------------------------------------------
+    // Step 1 — Create a sandbox with `--keep` running `sleep infinity`.
+    // ---------------------------------------------------------------
+    let mut guard = SandboxGuard::create_keep(&["sleep", "infinity"], "Ready")
+        .await
+        .expect("sandbox create --keep");
+
+    let tmpdir = tempfile::tempdir().expect("create tmpdir");
+
+    // ---------------------------------------------------------------
+    // Step 2 — Upload: push a local directory into the sandbox.
+    // ---------------------------------------------------------------
+    let upload_dir = tmpdir.path().join("upload");
+    fs::create_dir_all(upload_dir.join("subdir")).expect("create upload dirs");
+    fs::write(upload_dir.join("greeting.txt"), "hello-from-local").expect("write greeting.txt");
+    fs::write(upload_dir.join("subdir/nested.txt"), "nested-content").expect("write nested.txt");
+
+    let upload_str = upload_dir.to_str().expect("upload path is UTF-8");
+    guard
+        .upload(upload_str, "/sandbox/uploaded")
+        .await
+        .expect("upload directory");
+
+    // ---------------------------------------------------------------
+    // Step 3 — Download: pull the uploaded files back and verify.
+    // ---------------------------------------------------------------
+    let download_dir = tmpdir.path().join("download");
+    fs::create_dir_all(&download_dir).expect("create download dir");
+
+    let download_str = download_dir.to_str().expect("download path is UTF-8");
+    guard
+        .download("/sandbox/uploaded", download_str)
+        .await
+        .expect("download directory");
+
+    // Verify top-level file.
+    let greeting = fs::read_to_string(download_dir.join("greeting.txt"))
+        .expect("read greeting.txt after download");
+    assert_eq!(
+        greeting, "hello-from-local",
+        "greeting.txt content mismatch"
+    );
+
+    // Verify nested file.
+    let nested = fs::read_to_string(download_dir.join("subdir/nested.txt"))
+        .expect("read subdir/nested.txt after download");
+    assert_eq!(nested, "nested-content", "subdir/nested.txt content mismatch");
+
+    // ---------------------------------------------------------------
+    // Step 4 — Large-file round-trip (~512 KiB) to exercise multi-chunk
+    //          SSH transport.
+    // ---------------------------------------------------------------
+    let large_dir = tmpdir.path().join("large_upload");
+    fs::create_dir_all(&large_dir).expect("create large_upload dir");
+
+    let large_file = large_dir.join("large.bin");
+    {
+        let mut f = fs::File::create(&large_file).expect("create large.bin");
+        let mut rng_data = vec![0u8; 512 * 1024]; // 512 KiB
+        rand::fill(&mut rng_data[..]);
+        f.write_all(&rng_data).expect("write large.bin");
+    }
+
+    let expected_hash = {
+        let data = fs::read(&large_file).expect("read large.bin for hash");
+        let mut hasher = Sha256::new();
+        hasher.update(&data);
+        hex::encode(hasher.finalize())
+    };
+
+    let large_dir_str = large_dir.to_str().expect("large_dir path is UTF-8");
+    guard
+        .upload(large_dir_str, "/sandbox/large_test")
+        .await
+        .expect("upload large file");
+
+    let large_down = tmpdir.path().join("large_download");
+    fs::create_dir_all(&large_down).expect("create large_download dir");
+
+    let large_down_str = large_down.to_str().expect("large_down path is UTF-8");
+    guard
+        .download("/sandbox/large_test", large_down_str)
+        .await
+        .expect("download large file");
+
+    let actual_data = fs::read(large_down.join("large.bin")).expect("read large.bin after download");
+    let actual_hash = {
+        let mut hasher = Sha256::new();
+        hasher.update(&actual_data);
+        hex::encode(hasher.finalize())
+    };
+
+    assert_eq!(
+        expected_hash, actual_hash,
+        "large.bin SHA-256 mismatch after round-trip"
+    );
+    assert_eq!(
+        actual_data.len(),
+        512 * 1024,
+        "large.bin size mismatch: expected {} bytes, got {}",
+        512 * 1024,
+        actual_data.len()
+    );
+
+    // ---------------------------------------------------------------
+    // Step 5 — Single-file round-trip.
+    // ---------------------------------------------------------------
+    let single_file = tmpdir.path().join("single.txt");
+    fs::write(&single_file, "single-file-payload").expect("write single.txt");
+
+    let single_str = single_file.to_str().expect("single path is UTF-8");
+    guard
+        .upload(single_str, "/sandbox")
+        .await
+        .expect("upload single file");
+
+    let single_down = tmpdir.path().join("single_down");
+    fs::create_dir_all(&single_down).expect("create single_down dir");
+
+    let single_down_str = single_down.to_str().expect("single_down path is UTF-8");
+    guard
+        .download("/sandbox/single.txt", single_down_str)
+        .await
+        .expect("download single file");
+
+    let single_content = fs::read_to_string(single_down.join("single.txt"))
+        .expect("read single.txt after download");
+    assert_eq!(
+        single_content, "single-file-payload",
+        "single.txt content mismatch"
+    );
+
+    // ---------------------------------------------------------------
+    // Cleanup (guard also cleans up on drop).
+    // ---------------------------------------------------------------
+    guard.cleanup().await;
+}
+
+/// Verify that `sandbox upload` respects `.gitignore` by default.
+///
+/// Creates a temporary git repository with a `.gitignore` that excludes
+/// `*.log` files, uploads the directory (without `--no-git-ignore`), and
+/// confirms that tracked files arrive but ignored files do not.
+#[tokio::test]
+async fn upload_respects_gitignore_by_default() {
+    // ---------------------------------------------------------------
+    // Step 1 — Create a sandbox with `--keep`.
+    // ---------------------------------------------------------------
+    let mut guard = SandboxGuard::create_keep(&["sleep", "infinity"], "Ready")
+        .await
+        .expect("sandbox create --keep");
+
+    // ---------------------------------------------------------------
+    // Step 2 — Set up a temp git repo with tracked + ignored files.
+    // ---------------------------------------------------------------
+    let tmpdir = tempfile::tempdir().expect("create tmpdir");
+    let repo = tmpdir.path().join("repo");
+    fs::create_dir_all(&repo).expect("create repo dir");
+
+    // Initialize git repo and add files.
+    let git_init = tokio::process::Command::new("git")
+        .args(["init"])
+        .current_dir(&repo)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await
+        .expect("git init");
+    assert!(git_init.success(), "git init should succeed");
+
+    // Configure git user for the commit.
+    let _ = tokio::process::Command::new("git")
+        .args(["config", "user.email", "test@test.com"])
+        .current_dir(&repo)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await;
+    let _ = tokio::process::Command::new("git")
+        .args(["config", "user.name", "Test"])
+        .current_dir(&repo)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await;
+
+    // Create .gitignore, a tracked file, and an ignored file.
+    fs::write(repo.join(".gitignore"), "*.log\nbuild/\n").expect("write .gitignore");
+    fs::write(repo.join("tracked.txt"), "i-am-tracked").expect("write tracked.txt");
+    fs::write(repo.join("ignored.log"), "i-should-be-filtered").expect("write ignored.log");
+    fs::create_dir_all(repo.join("build")).expect("create build dir");
+    fs::write(repo.join("build/output.bin"), "build-artifact").expect("write build/output.bin");
+
+    // git add + commit so git ls-files works.
+    let _ = tokio::process::Command::new("git")
+        .args(["add", "."])
+        .current_dir(&repo)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await
+        .expect("git add");
+    let _ = tokio::process::Command::new("git")
+        .args(["commit", "-m", "init"])
+        .current_dir(&repo)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .await
+        .expect("git commit");
+
+    // ---------------------------------------------------------------
+    // Step 3 — Upload WITH gitignore filtering (default).
+    // ---------------------------------------------------------------
+    let repo_str = repo.to_str().expect("repo path is UTF-8");
+    guard
+        .upload_with_gitignore(repo_str, "/sandbox/filtered", &repo)
+        .await
+        .expect("upload with gitignore filtering");
+
+    // ---------------------------------------------------------------
+    // Step 4 — Verify: tracked file exists, ignored files do not.
+    // ---------------------------------------------------------------
+    // Download the uploaded directory and verify contents.
+    let download_dir = tmpdir.path().join("verify");
+    fs::create_dir_all(&download_dir).expect("create verify dir");
+    let download_str = download_dir.to_str().expect("verify path is UTF-8");
+
+    guard
+        .download("/sandbox/filtered", download_str)
+        .await
+        .expect("download filtered upload");
+
+    // tracked.txt should be present.
+    let tracked = fs::read_to_string(download_dir.join("tracked.txt"))
+        .expect("tracked.txt should exist after filtered upload");
+    assert_eq!(tracked, "i-am-tracked", "tracked.txt content mismatch");
+
+    // .gitignore itself should be present (it's tracked).
+    assert!(
+        download_dir.join(".gitignore").exists(),
+        ".gitignore should be uploaded (it's a tracked file)"
+    );
+
+    // ignored.log should NOT be present.
+    assert!(
+        !download_dir.join("ignored.log").exists(),
+        "ignored.log should be filtered out by .gitignore"
+    );
+
+    // build/ directory should NOT be present.
+    assert!(
+        !download_dir.join("build").exists(),
+        "build/ directory should be filtered out by .gitignore"
+    );
+
+    // ---------------------------------------------------------------
+    // Cleanup.
+    // ---------------------------------------------------------------
+    guard.cleanup().await;
+}
diff --git a/e2e/rust/tests/upload_create.rs b/e2e/rust/tests/upload_create.rs
new file mode 100644
index 00000000..e365bfc9
--- /dev/null
+++ b/e2e/rust/tests/upload_create.rs
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e")]
+
+//! E2E test: `sandbox create --upload` pre-loads files before running a command.
+//!
+//! Validates that the `--upload <local>:<dest>` flag on `sandbox create`
+//! transfers files into the sandbox before the user command executes,
+//! so the command can read the uploaded content.
+//!
+//! Prerequisites:
+//! - A running nemoclaw gateway (`nemoclaw gateway start`)
+//! - The `nemoclaw` binary (built automatically from the workspace)
+
+use std::fs;
+
+use nemoclaw_e2e::harness::output::strip_ansi;
+use nemoclaw_e2e::harness::sandbox::SandboxGuard;
+
+/// Create a sandbox with `--upload dir:/sandbox/data` and run a command that
+/// reads the uploaded files, verifying the content appears in stdout.
+#[tokio::test]
+async fn create_with_upload_provides_files_to_command() {
+    let tmpdir = tempfile::tempdir().expect("create tmpdir");
+
+    // Create a directory with files to upload.
+    let upload_dir = tmpdir.path().join("project");
+    fs::create_dir_all(upload_dir.join("src")).expect("create project/src");
+    fs::write(upload_dir.join("marker.txt"), "upload-create-marker").expect("write marker.txt");
+    fs::write(upload_dir.join("src/main.py"), "print('hello')").expect("write main.py");
+
+    let upload_str = upload_dir.to_str().expect("upload path is UTF-8");
+
+    // The command reads the marker file — if upload worked, its content
+    // appears in the output.
+    let mut guard = SandboxGuard::create_with_upload(
+        upload_str,
+        "/sandbox/data",
+        &["cat", "/sandbox/data/marker.txt"],
+    )
+    .await
+    .expect("sandbox create --upload");
+
+    let clean = strip_ansi(&guard.create_output);
+    assert!(
+        clean.contains("upload-create-marker"),
+        "expected uploaded marker content in sandbox output:\n{clean}"
+    );
+
+    guard.cleanup().await;
+}
+
+/// `--upload` with a single file (not a directory) should work.
+#[tokio::test]
+async fn create_with_upload_single_file() {
+    let tmpdir = tempfile::tempdir().expect("create tmpdir");
+    let file_path = tmpdir.path().join("config.txt");
+    fs::write(&file_path, "single-file-upload-test").expect("write config.txt");
+
+    let file_str = file_path.to_str().expect("file path is UTF-8");
+
+    let mut guard = SandboxGuard::create_with_upload(
+        file_str,
+        "/sandbox",
+        &["cat", "/sandbox/config.txt"],
+    )
+    .await
+    .expect("sandbox create --upload single file");
+
+    let clean = strip_ansi(&guard.create_output);
+    assert!(
+        clean.contains("single-file-upload-test"),
+        "expected single-file content in sandbox output:\n{clean}"
+    );
+
+    guard.cleanup().await;
+}
diff --git a/examples/bring-your-own-container/README.md b/examples/bring-your-own-container/README.md
index b69e1239..22b0167e 100644
--- a/examples/bring-your-own-container/README.md
+++ b/examples/bring-your-own-container/README.md
@@ -6,7 +6,7 @@ your local machine through port forwarding.
 
 ## Prerequisites
 
-- A running NemoClaw cluster (`nemoclaw cluster admin deploy`)
+- A running NemoClaw gateway (`nemoclaw gateway start`)
 - Docker daemon running
 
 ## What's in this example
diff --git a/examples/inference/README.md b/examples/local-inference/README.md
similarity index 52%
rename from examples/inference/README.md
rename to examples/local-inference/README.md
index c2025031..814b4ae6 100644
--- a/examples/inference/README.md
+++ b/examples/local-inference/README.md
@@ -1,26 +1,24 @@
 # Inference Routing Example
 
 This example demonstrates NemoClaw's inference interception and routing.
-A sandbox process makes standard API calls (OpenAI, Anthropic, etc.) and
-NemoClaw transparently intercepts, enforces policy, and reroutes them to
-a configured backend — without any code changes in the sandboxed application.
+A sandbox process sends inference traffic to `inference.local`, and
+NemoClaw intercepts and reroutes it to the configured backend.
 
 ## How It Works
 
-1. The sandbox proxy intercepts outgoing HTTPS connections.
-2. OPA policy determines the action: if the binary has no explicit network
-   policy but inference routing is configured, the connection is inspected.
+1. The sandbox process sends HTTPS traffic to `inference.local`.
+2. The sandbox proxy intercepts that explicit inference endpoint locally.
 3. The proxy TLS-terminates, parses the HTTP request, and detects known
    inference patterns (e.g., `POST /v1/chat/completions`).
-4. Matching requests are forwarded directly to the policy-allowed inference
-   backend via the sandbox's local router. Non-inference requests are denied.
+4. Matching requests are forwarded to the configured backend via the sandbox's
+   local router. Non-inference requests are denied.
 
 ## Files
 
 | File | Description |
 |---|---|
-| `inference.py` | Python script that calls the OpenAI SDK — works unmodified inside a sandbox |
-| `sandbox-policy.yaml` | Sandbox policy with inference routing enabled (route hint: `local`) |
+| `inference.py` | Python script that calls the OpenAI SDK through `https://inference.local/v1` |
+| `sandbox-policy.yaml` | Minimal sandbox policy for the example |
 | `routes.yaml` | Example YAML route file for standalone (no-cluster) mode |
 
 ## Quick Start
@@ -53,35 +51,30 @@ requests locally — no gRPC server or cluster required.
 
 ```bash
 mise run cluster
-nemoclaw cluster status
+nemoclaw status
 ```
 
-#### 2. Create an inference route
+#### 2. Configure cluster inference
 
-Point the route at any OpenAI-compatible endpoint (local or remote):
+First make sure a provider record exists for the backend you want to use:
 
 ```bash
-# Local model (e.g., LM Studio, Ollama, vLLM)
-nemoclaw inference create \
-  --routing-hint local \
-  --base-url http://<HOST>:<PORT> \
-  --model-id <MODEL_NAME>
-
-# Remote provider (e.g., OpenAI, NVIDIA NIM)
-nemoclaw inference create \
-  --routing-hint local \
-  --base-url https://api.openai.com \
-  --api-key sk-... \
-  --model-id gpt-4o-mini
+nemoclaw provider list
 ```
 
-If `--protocol` is omitted, NemoClaw auto-detects supported protocols by
-probing the endpoint (sends minimal requests with `max_tokens: 1`).
+Then configure the cluster-managed `inference.local` route:
 
-Verify the route:
+```bash
+# Example: use an existing provider record
+nemoclaw cluster inference set \
+  --provider openai-prod \
+  --model gpt-4o-mini
+```
+
+Verify the active config:
 
 ```bash
-nemoclaw inference list
+nemoclaw cluster inference get
 ```
 
 #### 3. Run the example inside a sandbox
@@ -94,9 +87,9 @@ nemoclaw sandbox create \
   -- python examples/inference/inference.py
 ```
 
-The script targets `https://api.openai.com` by default, but NemoClaw
-intercepts the connection and routes it to whatever backend the `local`
-route points at.
+The script targets `https://inference.local/v1` directly. NemoClaw
+intercepts that connection and routes it to whatever backend cluster
+inference is configured to use.
 
 Expected output:
 
@@ -117,22 +110,12 @@ python examples/inference/inference.py
 
 ```bash
 nemoclaw sandbox delete inference-demo
-nemoclaw inference delete <route-name>
 ```
 
-## Customizing the Policy
-
-Edit `sandbox-policy.yaml` to control which routes are available:
-
-```yaml
-inference:
-  allowed_routes:
-    - local          # matches the --routing-hint used in step 2
-    - production     # add more route hints as needed
-```
+## Customizing Routes
 
-The `allowed_routes` list determines which inference routes a sandbox can
-use. Routes are matched by their `routing_hint` field.
+Edit `routes.yaml` to change which backend endpoint/model standalone mode uses.
+In cluster mode, use `nemoclaw cluster inference set` instead.
 
 ## Supported Protocols
 
diff --git a/examples/inference/inference.py b/examples/local-inference/inference.py
similarity index 86%
rename from examples/inference/inference.py
rename to examples/local-inference/inference.py
index e88960b2..2cf07e9b 100644
--- a/examples/inference/inference.py
+++ b/examples/local-inference/inference.py
@@ -3,7 +3,7 @@
 
 from openai import OpenAI
 
-client = OpenAI(api_key="dummy")
+client = OpenAI(api_key="dummy", base_url="https://inference.local/v1")
 
 response = client.chat.completions.create(
     model="router",
diff --git a/examples/inference/routes.yaml b/examples/local-inference/routes.yaml
similarity index 96%
rename from examples/inference/routes.yaml
rename to examples/local-inference/routes.yaml
index 381103c9..e0721360 100644
--- a/examples/inference/routes.yaml
+++ b/examples/local-inference/routes.yaml
@@ -11,7 +11,7 @@
 #     -- python examples/inference/inference.py
 
 routes:
-  - routing_hint: local
+  - name: inference.local
     endpoint: http://localhost:1234/v1
     model: local-model
     protocols:
diff --git a/examples/inference/sandbox-policy.yaml b/examples/local-inference/sandbox-policy.yaml
similarity index 62%
rename from examples/inference/sandbox-policy.yaml
rename to examples/local-inference/sandbox-policy.yaml
index 9f3385ab..549a31c9 100644
--- a/examples/inference/sandbox-policy.yaml
+++ b/examples/local-inference/sandbox-policy.yaml
@@ -25,10 +25,4 @@ process:
   run_as_user: sandbox
   run_as_group: sandbox
 
-# No network_policies needed for inference routing — any outgoing connection
-# from a binary not explicitly allowed will be intercepted and checked for
-# inference API patterns when inference routing is configured below.
-
-inference:
-  allowed_routes:
-  - local
+# No network policies means all outbound connections are denied and only inference.local is allowed.
diff --git a/examples/sync-files.md b/examples/sync-files.md
index 568bfdfc..51ccc64d 100644
--- a/examples/sync-files.md
+++ b/examples/sync-files.md
@@ -1,26 +1,26 @@
 # Syncing Files To and From a Sandbox
 
 Move code, data, and artifacts between your local machine and a NemoClaw
-sandbox using `nemoclaw sandbox sync`.
+sandbox using `nemoclaw sandbox upload` and `nemoclaw sandbox download`.
 
 ## Push local files into a sandbox
 
 Upload your current project directory into `/sandbox` on the sandbox:
 
 ```bash
-nemoclaw sandbox sync my-sandbox --up .
+nemoclaw sandbox upload my-sandbox .
 ```
 
 Push a specific directory to a custom destination:
 
 ```bash
-nemoclaw sandbox sync my-sandbox --up ./src /sandbox/src
+nemoclaw sandbox upload my-sandbox ./src /sandbox/src
 ```
 
 Push a single file:
 
 ```bash
-nemoclaw sandbox sync my-sandbox --up ./config.yaml /sandbox/config.yaml
+nemoclaw sandbox upload my-sandbox ./config.yaml /sandbox/config.yaml
 ```
 
 ## Pull files from a sandbox
@@ -28,13 +28,13 @@ nemoclaw sandbox sync my-sandbox --up ./config.yaml /sandbox/config.yaml
 Download sandbox output to your local machine:
 
 ```bash
-nemoclaw sandbox sync my-sandbox --down /sandbox/output ./output
+nemoclaw sandbox download my-sandbox /sandbox/output ./output
 ```
 
 Pull results to the current directory:
 
 ```bash
-nemoclaw sandbox sync my-sandbox --down /sandbox/results
+nemoclaw sandbox download my-sandbox /sandbox/results
 ```
 
 ## Sync on create
@@ -55,14 +55,14 @@ This collects tracked and untracked (non-ignored) files via
 nemoclaw sandbox create --name dev --sync --keep
 
 # Make local changes, then push them
-nemoclaw sandbox sync dev --up ./src /sandbox/src
+nemoclaw sandbox upload dev ./src /sandbox/src
 
 # Run tests inside the sandbox
 nemoclaw sandbox connect dev
 # (inside sandbox) pytest
 
 # Pull test artifacts back
-nemoclaw sandbox sync dev --down /sandbox/coverage ./coverage
+nemoclaw sandbox download dev /sandbox/coverage ./coverage
 ```
 
 ## How it works
diff --git a/examples/vscode-remote-sandbox.md b/examples/vscode-remote-sandbox.md
index 9067d3b5..198b733a 100644
--- a/examples/vscode-remote-sandbox.md
+++ b/examples/vscode-remote-sandbox.md
@@ -6,7 +6,7 @@ extension so you get a full IDE experience inside the sandbox environment.
 
 ## Prerequisites
 
-- A running nemoclaw cluster (`nemoclaw cluster admin deploy`)
+- A running nemoclaw gateway (`nemoclaw gateway start`)
 - [VSCode](https://code.visualstudio.com/) with the
   [Remote - SSH](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-ssh)
   extension installed
diff --git a/mise.toml b/mise.toml
index 34baf1d7..0c0c8b91 100644
--- a/mise.toml
+++ b/mise.toml
@@ -38,7 +38,6 @@ SCCACHE_DIR = "{{config_root}}/.cache/sccache"
 NAV_PYPI_REPOSITORY_URL = "https://urm.nvidia.com/artifactory/api/pypi/nv-shared-pypi-local"
 
 # Shared build constants (overridable via environment)
-K3S_VERSION = "{{env.K3S_VERSION | default(value='v1.29.8-k3s1')}}"
 DOCKER_BUILDKIT = "1"
 
 [vars]
diff --git a/proto/inference.proto b/proto/inference.proto
index adad2f7c..7f48abbf 100644
--- a/proto/inference.proto
+++ b/proto/inference.proto
@@ -8,93 +8,80 @@ package navigator.inference.v1;
 option java_multiple_files = true;
 option java_package = "com.anthropic.navigator.inference.v1";
 
-// Inference service provides LLM route management and sandbox route delivery.
+// Inference service provides cluster inference configuration and bundle delivery.
 service Inference {
-  // Return the pre-filtered set of routes a sandbox is allowed to execute.
-  rpc GetSandboxInferenceBundle(GetSandboxInferenceBundleRequest)
-      returns (GetSandboxInferenceBundleResponse);
+  // Return the resolved inference route bundle for sandbox-local execution.
+  rpc GetInferenceBundle(GetInferenceBundleRequest)
+      returns (GetInferenceBundleResponse);
 
-  // Create a new inference route.
-  rpc CreateInferenceRoute(CreateInferenceRouteRequest) returns (InferenceRouteResponse);
+  // Set cluster-level inference configuration.
+  //
+  // This controls how requests sent to `inference.local` are routed.
+  rpc SetClusterInference(SetClusterInferenceRequest)
+      returns (SetClusterInferenceResponse);
 
-  // Update an existing inference route.
-  rpc UpdateInferenceRoute(UpdateInferenceRouteRequest) returns (InferenceRouteResponse);
+  // Get cluster-level inference configuration.
+  rpc GetClusterInference(GetClusterInferenceRequest)
+      returns (GetClusterInferenceResponse);
 
-  // Delete an inference route.
-  rpc DeleteInferenceRoute(DeleteInferenceRouteRequest) returns (DeleteInferenceRouteResponse);
-
-  // List inference routes.
-  rpc ListInferenceRoutes(ListInferenceRoutesRequest) returns (ListInferenceRoutesResponse);
 }
 
-message InferenceRouteSpec {
-  string routing_hint = 1;
-  string base_url = 2;
-  // Protocols this route can serve (e.g. "openai_chat_completions").
-  repeated string protocols = 3;
-  string api_key = 4;
-  string model_id = 5;
-  bool enabled = 6;
+// Persisted cluster inference configuration.
+//
+// Only `provider_name` and `model_id` are stored; endpoint, protocols,
+// credentials, and auth style are resolved from the provider at bundle time.
+message ClusterInferenceConfig {
+  // Provider record name backing this route.
+  string provider_name = 1;
+  // Model identifier to force on generation calls.
+  string model_id = 2;
 }
 
-// Inference route model stored by Navigator.
+// Storage envelope for the managed cluster inference route.
 message InferenceRoute {
   string id = 1;
-  InferenceRouteSpec spec = 2;
-  // Human-friendly name, unique per object type. Auto-generated if not provided.
+  ClusterInferenceConfig config = 2;
+  // Object name (always "inference.local" for the managed route).
   string name = 3;
+  // Monotonic version incremented on every update.
+  uint64 version = 4;
 }
 
-message CreateInferenceRouteRequest {
-  InferenceRouteSpec route = 1;
-  // Optional name. If empty, a random 6-char name is generated.
-  string name = 2;
-}
-
-message UpdateInferenceRouteRequest {
-  // Route name (canonical lookup key).
-  string name = 1;
-  InferenceRouteSpec route = 2;
-}
-
-message DeleteInferenceRouteRequest {
-  // Route name (canonical lookup key).
-  string name = 1;
-}
-
-message DeleteInferenceRouteResponse {
-  bool deleted = 1;
+message SetClusterInferenceRequest {
+  // Provider record name to use for credentials + endpoint mapping.
+  string provider_name = 1;
+  // Model identifier to force on generation calls.
+  string model_id = 2;
 }
 
-message ListInferenceRoutesRequest {
-  uint32 limit = 1;
-  uint32 offset = 2;
+message SetClusterInferenceResponse {
+  string provider_name = 1;
+  string model_id = 2;
+  uint64 version = 3;
 }
 
-message ListInferenceRoutesResponse {
-  repeated InferenceRoute routes = 1;
-}
+message GetClusterInferenceRequest {}
 
-message InferenceRouteResponse {
-  InferenceRoute route = 1;
+message GetClusterInferenceResponse {
+  string provider_name = 1;
+  string model_id = 2;
+  uint64 version = 3;
 }
 
-message GetSandboxInferenceBundleRequest {
-  // Sandbox to fetch routes for (used for policy-based filtering).
-  string sandbox_id = 1;
-}
+message GetInferenceBundleRequest {}
 
 // A single resolved route ready for sandbox-local execution.
-message SandboxResolvedRoute {
-  string routing_hint = 1;
+message ResolvedRoute {
+  string name = 1;
   string base_url = 2;
   repeated string protocols = 3;
   string api_key = 4;
   string model_id = 5;
+  string provider_type = 6;
 }
 
-message GetSandboxInferenceBundleResponse {
-  repeated SandboxResolvedRoute routes = 1;
+message GetInferenceBundleResponse {
+  repeated ResolvedRoute routes = 1;
   // Opaque revision tag for cache freshness checks.
   string revision = 2;
   // Timestamp (epoch ms) when this bundle was generated.
diff --git a/proto/sandbox.proto b/proto/sandbox.proto
index 7733ff02..062026bd 100644
--- a/proto/sandbox.proto
+++ b/proto/sandbox.proto
@@ -17,32 +17,6 @@ message SandboxPolicy {
   ProcessPolicy process = 4;
   // Network access policies keyed by name (e.g. "claude_code", "gitlab").
   map<string, NetworkPolicyRule> network_policies = 5;
-  // Inference access policy.
-  InferencePolicy inference = 6;
-}
-
-// Inference access policy.
-message InferencePolicy {
-  // Which routing hints this sandbox is allowed to use.
-  // Matched against InferenceRouteSpec.routing_hint, NOT InferenceRoute.name.
-  // e.g. ["local"] for private-only, ["local", "frontier"] for full access.
-  // Empty means no inference allowed.
-  repeated string allowed_routes = 1;
-  // API patterns to detect inference requests at the proxy layer.
-  // If empty, built-in defaults (OpenAI chat/completions) are used.
-  repeated InferenceApiPattern api_patterns = 2;
-}
-
-// Pattern for detecting inference API requests at the proxy layer.
-message InferenceApiPattern {
-  // HTTP method (e.g. "POST").
-  string method = 1;
-  // Path glob pattern (e.g. "/v1/chat/completions").
-  string path_glob = 2;
-  // Source protocol identifier (e.g. "openai_chat_completions").
-  string protocol = 3;
-  // Human-readable kind (e.g. "chat_completion").
-  string kind = 4;
 }
 
 // Filesystem access policy.
diff --git a/python/navigator/__init__.py b/python/navigator/__init__.py
index d5abf4bb..7d86b7bf 100644
--- a/python/navigator/__init__.py
+++ b/python/navigator/__init__.py
@@ -6,10 +6,10 @@
 from __future__ import annotations
 
 from navigator.sandbox import (
+    ClusterInferenceConfig,
     ExecChunk,
     ExecResult,
     InferenceRouteClient,
-    InferenceRouteRef,
     Sandbox,
     SandboxClient,
     SandboxError,
@@ -26,10 +26,10 @@
     __version__ = "0.0.0"
 
 __all__ = [
+    "ClusterInferenceConfig",
     "ExecChunk",
     "ExecResult",
     "InferenceRouteClient",
-    "InferenceRouteRef",
     "Sandbox",
     "SandboxClient",
     "SandboxError",
diff --git a/python/navigator/sandbox.py b/python/navigator/sandbox.py
index 756e9418..97154279 100644
--- a/python/navigator/sandbox.py
+++ b/python/navigator/sandbox.py
@@ -21,7 +21,6 @@
     inference_pb2_grpc,
     navigator_pb2,
     navigator_pb2_grpc,
-    sandbox_pb2,
 )
 
 if TYPE_CHECKING:
@@ -376,13 +375,14 @@ def exec_python(
 
 
 @dataclass(frozen=True)
-class InferenceRouteRef:
-    id: str
-    name: str
+class ClusterInferenceConfig:
+    provider_name: str
+    model_id: str
+    version: int
 
 
 class InferenceRouteClient:
-    """gRPC client for managing inference routes."""
+    """gRPC client for cluster-level inference configuration."""
 
     def __init__(self, channel: grpc.Channel, *, timeout: float = 30.0) -> None:
         self._stub = inference_pb2_grpc.InferenceStub(channel)
@@ -392,47 +392,35 @@ def __init__(self, channel: grpc.Channel, *, timeout: float = 30.0) -> None:
     def from_sandbox_client(cls, client: SandboxClient) -> InferenceRouteClient:
         return cls(client._channel, timeout=client._timeout)
 
-    def create(
+    def set_cluster(
         self,
         *,
-        name: str,
-        routing_hint: str,
-        base_url: str,
-        protocols: builtins.list[str],
-        api_key: str,
+        provider_name: str,
         model_id: str,
-        enabled: bool = True,
-    ) -> InferenceRouteRef:
-        spec = inference_pb2.InferenceRouteSpec(
-            routing_hint=routing_hint,
-            base_url=base_url,
-            protocols=protocols,
-            api_key=api_key,
-            model_id=model_id,
-            enabled=enabled,
-        )
-        response = self._stub.CreateInferenceRoute(
-            inference_pb2.CreateInferenceRouteRequest(route=spec, name=name),
+    ) -> ClusterInferenceConfig:
+        response = self._stub.SetClusterInference(
+            inference_pb2.SetClusterInferenceRequest(
+                provider_name=provider_name,
+                model_id=model_id,
+            ),
             timeout=self._timeout,
         )
-        route = response.route
-        return InferenceRouteRef(id=route.id, name=route.name)
-
-    def delete(self, name: str) -> bool:
-        response = self._stub.DeleteInferenceRoute(
-            inference_pb2.DeleteInferenceRouteRequest(name=name),
-            timeout=self._timeout,
+        return ClusterInferenceConfig(
+            provider_name=response.provider_name,
+            model_id=response.model_id,
+            version=response.version,
         )
-        return bool(response.deleted)
 
-    def list(
-        self, *, limit: int = 100, offset: int = 0
-    ) -> builtins.list[InferenceRouteRef]:
-        response = self._stub.ListInferenceRoutes(
-            inference_pb2.ListInferenceRoutesRequest(limit=limit, offset=offset),
+    def get_cluster(self) -> ClusterInferenceConfig:
+        response = self._stub.GetClusterInference(
+            inference_pb2.GetClusterInferenceRequest(),
             timeout=self._timeout,
         )
-        return [InferenceRouteRef(id=r.id, name=r.name) for r in response.routes]
+        return ClusterInferenceConfig(
+            provider_name=response.provider_name,
+            model_id=response.model_id,
+            version=response.version,
+        )
 
 
 class Sandbox:
@@ -594,24 +582,13 @@ def _sandbox_ref(sandbox: datamodel_pb2.Sandbox) -> SandboxRef:
     )
 
 
-def _default_policy() -> sandbox_pb2.SandboxPolicy:
-    return sandbox_pb2.SandboxPolicy(
-        version=1,
-        inference=sandbox_pb2.InferencePolicy(allowed_routes=["local"]),
-        filesystem=sandbox_pb2.FilesystemPolicy(
-            include_workdir=True,
-            read_only=["/usr", "/lib", "/etc", "/app"],
-            read_write=["/sandbox", "/tmp"],
-        ),
-        landlock=sandbox_pb2.LandlockPolicy(compatibility="best_effort"),
-        process=sandbox_pb2.ProcessPolicy(
-            run_as_user="sandbox", run_as_group="sandbox"
-        ),
-    )
-
-
 def _default_spec() -> datamodel_pb2.SandboxSpec:
-    return datamodel_pb2.SandboxSpec(policy=_default_policy())
+    # Omit the policy field so the sandbox container discovers its policy
+    # from /etc/navigator/policy.yaml (baked into the image at build time).
+    # This avoids duplicating policy defaults between the SDK and the
+    # container image and ensures sandboxes get the full dev-sandbox-policy
+    # (including network_policies) out of the box.
+    return datamodel_pb2.SandboxSpec()
 
 
 def _xdg_config_home() -> pathlib.Path:
diff --git a/tasks/gator.toml b/tasks/gator.toml
deleted file mode 100644
index 1420d204..00000000
--- a/tasks/gator.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Gator TUI tasks
-
-[gator]
-description = "Launch the Gator TUI"
-run = "nemoclaw gator"
-hide = true
-
-["gator:dev"]
-description = "Launch the Gator TUI with hot-reload on file changes"
-run = "cargo watch -s 'nemoclaw gator'"
-hide = true
diff --git a/tasks/scripts/cluster-bootstrap.sh b/tasks/scripts/cluster-bootstrap.sh
index 7f1129b4..f0af2353 100755
--- a/tasks/scripts/cluster-bootstrap.sh
+++ b/tasks/scripts/cluster-bootstrap.sh
@@ -209,7 +209,7 @@ VOLUME_NAME="navigator-cluster-${CLUSTER_NAME}"
 if [ "${MODE}" = "fast" ]; then
   if docker inspect "${CONTAINER_NAME}" >/dev/null 2>&1 || docker volume inspect "${VOLUME_NAME}" >/dev/null 2>&1; then
     echo "Recreating cluster '${CLUSTER_NAME}' from scratch..."
-    nemoclaw cluster admin destroy --name "${CLUSTER_NAME}"
+    nemoclaw gateway destroy --name "${CLUSTER_NAME}"
   fi
 fi
 
@@ -236,7 +236,7 @@ if [ -z "${NEMOCLAW_CLUSTER_IMAGE:-}" ]; then
   export NEMOCLAW_CLUSTER_IMAGE="navigator/cluster:${IMAGE_TAG}"
 fi
 
-DEPLOY_CMD=(nemoclaw cluster admin deploy --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}" --update-kube-config)
+DEPLOY_CMD=(nemoclaw gateway start --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}" --update-kube-config)
 
 if [ -n "${GATEWAY_HOST:-}" ]; then
   DEPLOY_CMD+=(--gateway-host "${GATEWAY_HOST}")
diff --git a/tasks/scripts/docker-build-cluster.sh b/tasks/scripts/docker-build-cluster.sh
index fc960a51..1d467918 100755
--- a/tasks/scripts/docker-build-cluster.sh
+++ b/tasks/scripts/docker-build-cluster.sh
@@ -7,7 +7,7 @@
 #
 # Environment:
 #   IMAGE_TAG                - Image tag (default: dev)
-#   K3S_VERSION              - k3s version (set by mise.toml [env])
+#   K3S_VERSION              - k3s version override (optional; default in Dockerfile.cluster)
 
 #   DOCKER_PLATFORM          - Target platform (optional)
 #   DOCKER_BUILDER           - Buildx builder name (default: auto-select)
@@ -68,7 +68,7 @@ docker buildx build \
   ${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \
   -f deploy/docker/Dockerfile.cluster \
   -t ${IMAGE_NAME}:${IMAGE_TAG} \
-  --build-arg K3S_VERSION=${K3S_VERSION} \
+  ${K3S_VERSION:+--build-arg K3S_VERSION=${K3S_VERSION}} \
   ${OUTPUT_FLAG} \
   .
 
diff --git a/tasks/scripts/docker-build-component.sh b/tasks/scripts/docker-build-component.sh
index 8516967c..5da3f427 100755
--- a/tasks/scripts/docker-build-component.sh
+++ b/tasks/scripts/docker-build-component.sh
@@ -135,6 +135,14 @@ if [[ -n "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]]; then
   SCCACHE_ARGS=(--build-arg "SCCACHE_MEMCACHED_ENDPOINT=${SCCACHE_MEMCACHED_ENDPOINT}")
 fi
 
+VERSION_ARGS=()
+if [[ -n "${NEMOCLAW_CARGO_VERSION:-}" ]]; then
+  VERSION_ARGS=(--build-arg "NEMOCLAW_CARGO_VERSION=${NEMOCLAW_CARGO_VERSION}")
+elif [[ "${COMPONENT}" == "server" ]]; then
+  CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
+  VERSION_ARGS=(--build-arg "NEMOCLAW_CARGO_VERSION=${CARGO_VERSION}")
+fi
+
 LOCK_HASH=$(sha256_16 Cargo.lock)
 RUST_SCOPE=${RUST_TOOLCHAIN_SCOPE:-$(detect_rust_scope "${DOCKERFILE}")}
 CACHE_SCOPE_INPUT="v1|${COMPONENT}|${VARIANT:-base}|${LOCK_HASH}|${RUST_SCOPE}"
@@ -145,6 +153,7 @@ docker buildx build \
   ${DOCKER_PLATFORM:+--platform ${DOCKER_PLATFORM}} \
   ${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \
   ${SCCACHE_ARGS[@]+"${SCCACHE_ARGS[@]}"} \
+  ${VERSION_ARGS[@]+"${VERSION_ARGS[@]}"} \
   --build-arg "CARGO_TARGET_CACHE_SCOPE=${CARGO_TARGET_CACHE_SCOPE}" \
   -f "${DOCKERFILE}" \
   -t "${IMAGE_NAME}:${IMAGE_TAG}" \
diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index 9871009e..1c3bd9a1 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -11,7 +11,7 @@
 #
 # Environment:
 #   IMAGE_TAG                - Image tag (default: dev)
-#   K3S_VERSION              - k3s version (set by mise.toml [env])
+#   K3S_VERSION              - k3s version override (optional; default in Dockerfile.cluster)
 
 #   DOCKER_PLATFORMS         - Target platforms (default: linux/amd64,linux/arm64)
 #   RUST_BUILD_PROFILE       - Rust build profile for sandbox (default: release)
@@ -80,6 +80,10 @@ fi
 # ---------------------------------------------------------------------------
 IMAGE_TAG=${IMAGE_TAG:-dev}
 PLATFORMS=${DOCKER_PLATFORMS:-linux/amd64,linux/arm64}
+CARGO_VERSION=${NEMOCLAW_CARGO_VERSION:-}
+if [[ -z "${CARGO_VERSION}" ]]; then
+  CARGO_VERSION=$(uv run python tasks/scripts/release.py get-version --cargo)
+fi
 EXTRA_BUILD_FLAGS=""
 TAG_LATEST=${TAG_LATEST:-false}
 EXTRA_DOCKER_TAGS_RAW=${EXTRA_DOCKER_TAGS:-}
@@ -159,6 +163,7 @@ for component in sandbox server; do
   if [ "$component" = "sandbox" ]; then
     BUILD_ARGS="--build-arg RUST_BUILD_PROFILE=${RUST_BUILD_PROFILE:-release}"
   fi
+  BUILD_ARGS="${BUILD_ARGS} --build-arg NEMOCLAW_CARGO_VERSION=${CARGO_VERSION}"
   if [ -n "${SCCACHE_MEMCACHED_ENDPOINT:-}" ]; then
     BUILD_ARGS="${BUILD_ARGS} --build-arg SCCACHE_MEMCACHED_ENDPOINT=${SCCACHE_MEMCACHED_ENDPOINT}"
   fi
@@ -197,7 +202,7 @@ docker buildx build \
   --platform "${PLATFORMS}" \
   -f deploy/docker/Dockerfile.cluster \
   -t "${CLUSTER_IMAGE}:${IMAGE_TAG}" \
-  --build-arg K3S_VERSION=${K3S_VERSION} \
+  ${K3S_VERSION:+--build-arg K3S_VERSION=${K3S_VERSION}} \
   ${EXTRA_BUILD_FLAGS} \
   --push \
   .
diff --git a/tasks/term.toml b/tasks/term.toml
new file mode 100644
index 00000000..82b2e24a
--- /dev/null
+++ b/tasks/term.toml
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# TUI tasks
+
+[term]
+description = "Launch the NemoClaw TUI"
+run = "nemoclaw term"
+hide = true
+
+["term:dev"]
+description = "Launch the NemoClaw TUI with hot-reload on file changes"
+run = "cargo watch -s 'nemoclaw term'"
+hide = true
diff --git a/tasks/test.toml b/tasks/test.toml
index 96d08737..b34f47d7 100644
--- a/tasks/test.toml
+++ b/tasks/test.toml
@@ -8,13 +8,8 @@ description = "Run all tests (Rust + Python)"
 depends = ["test:rust", "test:python"]
 
 [e2e]
-description = "Run default end-to-end test lane"
-depends = ["test:e2e:sandbox"]
-
-["test:e2e"]
-description = "Alias for e2e"
-depends = ["e2e"]
-hide = true
+description = "Run all end-to-end tests (Rust + Python)"
+depends = ["e2e:rust", "e2e:python"]
 
 ["test:rust"]
 description = "Run Rust tests"
@@ -28,27 +23,13 @@ env = { UV_NO_SYNC = "1" }
 run = "uv run pytest python/"
 hide = true
 
-["test:e2e:sandbox"]
-description = "Run sandbox end-to-end tests (E2E_PARALLEL=N or 'auto'; default 5)"
+["e2e:rust"]
+description = "Run Rust CLI e2e tests (requires a running cluster)"
+depends = ["cluster"]
+run = ["cargo build -p navigator-cli", "cargo test --manifest-path e2e/rust/Cargo.toml --features e2e"]
+
+["e2e:python"]
+description = "Run Python e2e tests (E2E_PARALLEL=N or 'auto'; default 5)"
 depends = ["python:proto", "cluster"]
 env = { UV_NO_SYNC = "1", PYTHONPATH = "python" }
 run = "uv run pytest -o python_files='test_*.py' -n ${E2E_PARALLEL:-5} e2e/python"
-hide = true
-
-["test:e2e:port-forward"]
-description = "Run port-forward integration test"
-depends = ["cluster"]
-run = "bash e2e/bash/test_port_forward.sh"
-hide = true
-
-["test:e2e:custom-image"]
-description = "Run custom image build and sandbox e2e test"
-depends = ["cluster"]
-run = "bash e2e/bash/test_sandbox_custom_image.sh"
-hide = true
-
-["test:e2e:sync"]
-description = "Run sandbox file sync e2e test"
-depends = ["cluster"]
-run = "bash e2e/bash/test_sandbox_sync.sh"
-hide = true