BackendStack21 · jkyberneees · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/cmd/odek/shell.go b/cmd/odek/shell.go
@@ -71,8 +71,9 @@ Use for: reading files, listing directories, running tests, building code, and g
 In sandbox mode (--sandbox), commands run inside the Docker container with restricted permissions.
 In host mode (default), commands run with the same permissions as the odek process.
 
-Risk classes: safe, local_write, system_write, destructive, network_egress, code_execution, install, blocked
-High-risk operations may prompt for approval (configurable via dangerous section in odek.json).`
+Risk classes: safe, local_write, system_write, destructive, network_egress, code_execution, install, unknown, blocked
+High-risk operations may prompt for approval (configurable via dangerous section in odek.json).
+The gate fails closed: an unrecognised command classifies as "unknown" and is denied by default.`
 }
 
 func (t *shellTool) Schema() any {

diff --git a/cmd/odek/subagent.go b/cmd/odek/subagent.go
@@ -492,8 +492,8 @@ func truncate(s string, n int) string {
 // outside CWD, an MCP server response). We:
 //   - Force NonInteractiveAction to deny (sub-agents have no TTY).
 //   - Clamp the action for Destructive, CodeExecution, Install,
-//     SystemWrite, and NetworkEgress to Deny so the sub-agent cannot
-//     escalate beyond LocalWrite without coming back through the
+//     SystemWrite, NetworkEgress, and Unknown to Deny so the sub-agent
+//     cannot escalate beyond LocalWrite without coming back through the
 //     parent.
 //
 // maxRisk caps the highest risk class the sub-agent will execute.
@@ -522,15 +522,15 @@ func applySubagentTrust(dc *danger.DangerousConfig, trustLevel, maxRisk string)
 			danger.Install,
 			danger.SystemWrite,
 			danger.NetworkEgress,
+			danger.Unknown,
 			danger.Blocked,
 		} {
 			dc.Classes[cls] = danger.Deny
 		}
 	}
 
 	if maxRisk != "" {
-		cap := danger.RiskClass(maxRisk)
-		capRank := riskRank(cap)
+		capRank := danger.Rank(danger.RiskClass(maxRisk))
 		for _, cls := range []danger.RiskClass{
 			danger.Safe,
 			danger.LocalWrite,
@@ -539,35 +539,12 @@ func applySubagentTrust(dc *danger.DangerousConfig, trustLevel, maxRisk string)
 			danger.NetworkEgress,
 			danger.CodeExecution,
 			danger.Install,
+			danger.Unknown,
 			danger.Blocked,
 		} {
-			if riskRank(cls) > capRank {
+			if danger.Rank(cls) > capRank {
 				dc.Classes[cls] = danger.Deny
 			}
 		}
 	}
 }
-
-// riskRank mirrors internal/danger.rank but is duplicated here to keep
-// applySubagentTrust local. Order matches internal/danger/classifier.go.
-func riskRank(cls danger.RiskClass) int {
-	switch cls {
-	case danger.Blocked:
-		return 8
-	case danger.Destructive:
-		return 7
-	case danger.SystemWrite:
-		return 6
-	case danger.CodeExecution:
-		return 5
-	case danger.NetworkEgress:
-		return 4
-	case danger.Install:
-		return 3
-	case danger.LocalWrite:
-		return 2
-	case danger.Safe:
-		return 1
-	}
-	return 0
-}
diff --git a/cmd/odek/subagent_trust_test.go b/cmd/odek/subagent_trust_test.go
@@ -35,6 +35,7 @@ func TestApplySubagentTrust_Untrusted_LocksDownEscalationClasses(t *testing.T) {
 		danger.Install,
 		danger.SystemWrite,
 		danger.NetworkEgress,
+		danger.Unknown,
 		danger.Blocked,
 	} {
 		if got := dc.Classes[cls]; got != danger.Deny {
@@ -63,6 +64,7 @@ func TestApplySubagentTrust_MaxRisk_ClampsAbove(t *testing.T) {
 		danger.NetworkEgress,
 		danger.CodeExecution,
 		danger.Install,
+		danger.Unknown,
 		danger.Blocked,
 	} {
 		if got := dc.Classes[cls]; got != danger.Deny {
@@ -76,3 +78,24 @@ func TestApplySubagentTrust_MaxRisk_ClampsAbove(t *testing.T) {
 		}
 	}
 }
+
+// TestApplySubagentTrust_MaxRiskUnknown_KeepsSafeOpen guards the fix for the
+// cap miscomputation: before Unknown was added to riskRank's shared ordering,
+// max_risk="unknown" computed rank 0 and force-denied even Safe. It must now
+// leave Safe/LocalWrite open and deny only the classes above Unknown.
+func TestApplySubagentTrust_MaxRiskUnknown_KeepsSafeOpen(t *testing.T) {
+	dc := danger.DangerousConfig{}
+	applySubagentTrust(&dc, "", "unknown")
+
+	for _, cls := range []danger.RiskClass{danger.Safe, danger.LocalWrite} {
+		if got, ok := dc.Classes[cls]; ok && got == danger.Deny {
+			t.Errorf("Class %s must stay open with max_risk=unknown, got %q", cls, got)
+		}
+	}
+	// Only classes ranked above Unknown (Destructive, Blocked) are denied.
+	for _, cls := range []danger.RiskClass{danger.Destructive, danger.Blocked} {
+		if got := dc.Classes[cls]; got != danger.Deny {
+			t.Errorf("Class %s = %q, want deny with max_risk=unknown", cls, got)
+		}
+	}
+}
diff --git a/cmd/odek/wsapprover.go b/cmd/odek/wsapprover.go
@@ -36,11 +36,13 @@ type approvalRequest struct {
 	FrictionApprovals int    `json:"friction_approvals,omitempty"`
 }
 
-// allowTrustForClass mirrors the TTYApprover policy: destructive and
-// blocked must never be class-trusted, so an attacker cannot social-
-// engineer a single broad approval into long-term carte blanche.
+// allowTrustForClass mirrors the TTYApprover policy: destructive, blocked,
+// and unknown must never be class-trusted, so an attacker cannot social-
+// engineer a single broad approval into long-term carte blanche. Unknown is
+// the fail-closed catch-all for unrecognised verbs; class-trusting it would
+// blanket-approve every future obfuscated/novel command.
 func allowTrustForClass(cls danger.RiskClass) bool {
-	return cls != danger.Destructive && cls != danger.Blocked
+	return cls != danger.Destructive && cls != danger.Blocked && cls != danger.Unknown
 }
 
 // approvalResponse is received from the browser when the user responds.

diff --git a/cmd/odek/wsapprover_test.go b/cmd/odek/wsapprover_test.go
@@ -348,6 +348,7 @@ func TestWSApprover_AllowTrustFlag_PerClass(t *testing.T) {
 		{danger.Install, true},
 		{danger.Destructive, false},
 		{danger.Blocked, false},
+		{danger.Unknown, false},
 	}
 	for _, tc := range cases {
 		t.Run(string(tc.cls), func(t *testing.T) {
@@ -433,3 +434,18 @@ func TestWSApprover_TrustResponse_CoercedToApprove_ForBlocked(t *testing.T) {
 		t.Error("blocked class was cached as trusted — class trust must be impossible")
 	}
 }
+
+// TestWSApprover_TrustResponse_CoercedToApprove_ForUnknown verifies the
+// fail-closed Unknown class cannot be class-trusted: a forged "trust" is
+// treated as a single approve and never cached, so unrecognised verbs can't
+// be blanket-approved by one social-engineered grant.
+func TestWSApprover_TrustResponse_CoercedToApprove_ForUnknown(t *testing.T) {
+	a := newWSApprover(nil)
+	_, err := promptAndCaptureRequest(t, a, danger.Unknown, "trust")
+	if err != nil {
+		t.Errorf("expected nil error (coerced to approve), got: %v", err)
+	}
+	if a.approveAll[danger.Unknown] {
+		t.Error("unknown class was cached as trusted — class trust must be impossible")
+	}
+}
diff --git a/docker/README.md b/docker/README.md
@@ -14,7 +14,7 @@ Ready-to-run Compose setup for Odek in two permission profiles:
 > `run`/`repl`/`telegram` are unsandboxed by default.)
 
 For the full walkthrough, threat model, and tuning, see
-[`../DOCKER_COMPOSE_USER_GUIDE.md`](../DOCKER_COMPOSE_USER_GUIDE.md).
+[`../docs/DOCKER_COMPOSE_USER_GUIDE.md`](../docs/DOCKER_COMPOSE_USER_GUIDE.md).
 
 ## Files
 

diff --git a/docs/CLI.md b/docs/CLI.md
@@ -123,8 +123,13 @@ When running without `--sandbox`, odek classifies every shell command by risk an
 | 🔴 network_egress | **prompt** | `curl`, `git push`, `ssh`, `scp` |
 | 🔴 code_execution | **prompt** | `curl url \| bash`, `eval`, `node -e`, `go run` |
 | 🟠 install | **prompt** | `npm install`, `pip install`, `go install <path>` |
+| 🔴 unknown | **deny** | any command whose program name isn't recognised |
 | ⬛ blocked | **deny** | Fork bombs, `dd` to block devices |
 
+odek **fails closed**: a command whose verb matches no known-safe or known-dangerous
+pattern is classified `unknown` and denied by default. Permit a specific tool by adding
+its exact invocation to `allowlist`, or soften the class with `"unknown": "prompt"`.
+
 The approval prompt accepts:
 
 - `A` — Approve once

diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md
@@ -206,7 +206,7 @@ CI (`.github/workflows/test.yml`) runs the unit suite under `-race` on every pus
 | `internal/ws` | WebSocket constant verification |
 | `internal/resource` | @-reference parsing, file resolution, session resolution, security |
 | `internal/render` | Terminal output, no-color mode, nil safety, tool call/result rendering |
-| `internal/danger` | Command classification across 8 risk classes, config overrides, allow/denylist, classifier-bypass attempts, approver friction |
+| `internal/danger` | Command classification across 9 risk classes (incl. fail-closed `unknown`), config overrides, allow/denylist, classifier-bypass attempts, approver friction |
 | `internal/memory` | Facts CRUD, buffer ring, episodes, merge detector (go-vector), ReplaceEntry/AppendEntry, memory tool, security scan, LLM ranking, episode provenance |
 | `internal/skills` | Loading, triggers, self-improvement heuristics, curation, LLM-enhanced generation, import, tools, AnalyzeMessages/RunAutoSaveLoop, ValidateSkillName, isPrivateHost |
 | `internal/telegram` | Bot client, long-polling, command handlers, session management, plan CRUD, voice/photo download, health server, retry/backoff |

diff --git a/DOCKER_COMPOSE_USER_GUIDE.md → docs/DOCKER_COMPOSE_USER_GUIDE.md b/DOCKER_COMPOSE_USER_GUIDE.md → docs/DOCKER_COMPOSE_USER_GUIDE.md
@@ -138,10 +138,10 @@ These JSON files are mounted to `/home/odek/.odek/config.json` inside the contai
 
 ### 5a. Restricted policy — `config.restricted.json`
 
-This is essentially Odek's default behavior, made explicit. Commands are risk‑classified;
-destructive ones are denied, the rest prompt for approval. Crucially, `non_interactive`
-is set to **`deny`** so that if the agent runs in a container *without* an attached
-terminal or Web UI, high‑risk commands are blocked rather than silently allowed.
+Commands are risk‑classified; destructive and unrecognised ones are denied, the rest
+prompt for approval. Crucially, `non_interactive` is set to **`deny`** so that if the
+agent runs in a container *without* an attached terminal or Web UI, anything that would
+prompt is blocked rather than silently allowed.
 
 ```json
 {
@@ -163,23 +163,81 @@ terminal or Web UI, high‑risk commands are blocked rather than silently allowe
 }
 ```
 
-**How the classes map** (built‑in risk model):
+#### What each field does
 
-| Class | Examples | Restricted action |
-| --- | --- | --- |
-| `safe` | `ls`, `cat`, `echo` | allow |
-| `local_write` | write files in the working dir | allow |
-| `system_write` | `chmod`, `chown`, `mkdir /etc` | prompt |
-| `network_egress` | `curl`, `wget`, DNS lookups | prompt |
-| `code_execution` | `go run`, `python x.py` | prompt |
-| `install` | `npm install`, `apk add` | prompt |
-| `destructive` | `rm -rf`, `git rm`, `docker rm` | **deny** |
-| `blocked` | fork bombs, `dd` to block devices | **always deny** (cannot be overridden) |
+| Field | Meaning |
+| --- | --- |
+| `sandbox` | `false` runs commands directly in this container (the Compose setup already *is* the sandbox). `true` would nest a second Docker sandbox — not what you want here. |
+| `action` | **Global default** action for any class **not** listed under `classes`. `"prompt"` here, `"allow"` = godmode, `"deny"` = lockdown. ⚠️ This overrides the *built‑in* per‑class defaults (see the gotcha below). |
+| `non_interactive` | What to do with a **prompt**‑level command when there is no human channel (no TTY, no Web UI). `"deny"` blocks it; `"allow"` runs it. Always set this to `"deny"` for unattended/automated containers. |
+| `classes` | Per‑class action overrides. The most specific setting — it wins over `action` and the built‑in defaults. Only list the classes you want to pin. |
+| `allowlist` | Commands that always run, **exact string match**, no classification. Highest priority of all. Use for a handful of trusted exact commands (e.g. `"npm run deploy"`). |
+| `denylist` | Commands that are always denied, **prefix match** after trimming. Beats classification and even godmode — but **not** the allowlist. |
+
+#### How the classes map (built‑in risk model)
+
+| Class | Examples | Built‑in default | This profile |
+| --- | --- | --- | --- |
+| `safe` | `ls`, `cat`, `grep`, `git status` | allow | prompt¹ |
+| `local_write` | write files in the working dir | allow | allow |
+| `install` | `npm install`, `pip install`, `apk add` | prompt | prompt |
+| `network_egress` | `curl`, `wget`, `ssh`, DNS lookups | prompt | prompt |
+| `code_execution` | `curl … \| sh`, `bash -c`, `python -c`, `go run` | prompt | prompt |
+| `system_write` | `sudo`, writes to `/etc`, reads of `~/.ssh` | prompt | prompt |
+| `unknown` | any command whose program name Odek does **not** recognise | deny | prompt¹ → denied unattended |
+| `destructive` | `rm -rf /`, `dd … of=/dev/sda`, `mkfs` | deny | **deny** |
+| `blocked` | fork bombs, fully‑specified `dd` to a block device | **always deny** | **always deny** (cannot be overridden) |
+
+> ¹ `safe` and `unknown` are not listed under `classes`, so the global
+> `action: "prompt"` applies to them — see the gotcha below. With a human channel
+> they prompt; unattended (`non_interactive: "deny"`) they are denied.
+
+Odek **fails closed**: the `unknown` class catches any command whose verb isn't in the
+built‑in safe/dangerous tables, so a novel or obfuscated command can't slip through as
+"safe". To permit a specific unrecognised tool, add its exact invocation to `allowlist`,
+or relax the class with `"unknown": "prompt"`.
+
+#### How an action is resolved (precedence, first match wins)
+
+1. Command exactly matches an **`allowlist`** entry → **allow**.
+2. Command starts with a **`denylist`** entry → **deny**.
+3. Otherwise classify it, then: explicit **`classes`** entry → `blocked` is **always deny** → global **`action`** (if set) → built‑in class default.
+4. If the result is **prompt** and there's no human channel, **`non_interactive`** decides.
+
+> **Gotcha — `action` overrides *every* unlisted class.** Because `action: "prompt"` is
+> set, any class you don't list under `classes` resolves to *prompt*, including `safe`.
+> So with this profile as written, even `ls` prompts (and is denied unattended). Two ways
+> to get the usual "safe commands just run" behavior:
+>
+> - add `"safe": "allow"` to `classes` (keep `action: "prompt"` as the catch‑all for
+>   everything else, including `unknown`), **or**
+> - **omit `action` entirely** and only override the classes you care about — then unlisted
+>   classes keep their built‑in defaults (safe/local_write allow; destructive/blocked/unknown
+>   deny; system_write/network_egress/code_execution/install prompt).
+>
+> The second form is the better default if you want `unknown` to stay deny‑by‑default
+> rather than prompt.
 
 > Approvals require a human channel: the **Web UI** (`odek serve`, modal approval over
 > WebSocket) or an **interactive terminal** (`odek repl` with `docker compose run -it`).
 > Without either, `non_interactive: "deny"` is what keeps you safe.
 
+#### Customising the policy
+
+```jsonc
+// Tighter: also block all outbound network and package installs.
+"classes": { "network_egress": "deny", "install": "deny", /* … */ }
+
+// Looser: pre‑approve a few exact commands you trust, keep everything else gated.
+"allowlist": ["npm ci", "npm run build", "go build ./..."]
+
+// Allow one normally‑unrecognised tool without loosening the whole class:
+"allowlist": ["terraform plan"]          // exact match only
+
+// Full lockdown: deny everything except the allowlist.
+"action": "deny"
+```
+
 ### 5b. Godmode policy — `config.godmode.json`
 
 YOLO mode. Every risk class returns `allow`; no prompts. The only thing still blocked is
@@ -547,9 +605,9 @@ Voice and photo messages are supported too. Sessions persist per chat in the loc
 
 ## Reference
 
-- `docs/SANDBOXING.md` — Odek's nested‑Docker sandbox model (the `--sandbox` feature).
-- `docs/SECURITY.md` — threat model, approval flow, YOLO mode, attack‑vector matrix.
-- `docs/CONFIG.md` — full configuration layering and environment variables.
-- `docs/CLI.md` — all subcommands and flags, including the `dangerous` schema.
-- `docs/WEBUI.md` — Web UI protocol and the WebSocket approval flow.
-- `docs/TELEGRAM.md` — Telegram bot architecture, config variables, and slash commands.
+- [`SANDBOXING.md`](SANDBOXING.md) — Odek's nested‑Docker sandbox model (the `--sandbox` feature).
+- [`SECURITY.md`](SECURITY.md) — threat model, approval flow, YOLO mode, attack‑vector matrix.
+- [`CONFIG.md`](CONFIG.md) — full configuration layering and environment variables.
+- [`CLI.md`](CLI.md) — all subcommands and flags, including the `dangerous` schema.
+- [`WEBUI.md`](WEBUI.md) — Web UI protocol and the WebSocket approval flow.
+- [`TELEGRAM.md`](TELEGRAM.md) — Telegram bot architecture, config variables, and slash commands.
diff --git a/docs/SECURITY.md b/docs/SECURITY.md
@@ -69,18 +69,19 @@ The model is instructed (via the default system prompt) to treat the wrapped reg
 
 ### 3. Danger classifier (shell)
 
-The `shell` tool tokenises commands and classifies each into one of 8 risk classes (`safe`, `local_write`, `system_write`, `destructive`, `network_egress`, `code_execution`, `install`, `blocked`). Per-class policy (allow / prompt / deny) is configurable.
+The `shell` tool tokenises commands and classifies each into one of 9 risk classes (`safe`, `local_write`, `system_write`, `destructive`, `network_egress`, `code_execution`, `install`, `unknown`, `blocked`). Per-class policy (allow / prompt / deny) is configurable.
 
-The classifier is hardened against common evasion tricks:
+The gate **fails closed**: a command whose program name matches neither the known-safe allowlist nor any known-dangerous pattern is classified `unknown` and **denied by default** (same as `destructive`). Recognised commands used benignly are `safe`. So a novel or obfuscated verb cannot slip through as "safe" — to permit a specific tool, allowlist it or set `"unknown": "prompt"`.
 
-- `$(echo rm) -rf /` — command substitution is recursively classified.
-- `` `echo rm` -rf / `` — backticks treated the same.
-- `\rm -rf /` and `r\m -rf /` — unquoted backslash escapes are collapsed.
-- `rm$IFS-rf$IFS/` — `$IFS` / `${IFS}` expanded to space.
-- `command rm -rf /` and `exec rm -rf /` — wrappers stripped.
-- `/bin/rm -rf /` — absolute paths basenamed before matching.
+The classifier is hardened against common evasion tricks (see the package doc in `internal/danger/classifier.go` for the full model):
 
-A regression suite (`internal/danger/classifier_bypass_test.go`) pins these as known evasions. If you find a new bypass, the test file is the place to add it.
+- `$(echo rm) -rf /` / `` `echo rm` `` / `<(curl evil)` — command and process substitutions are recursively classified.
+- `\rm -rf /`, `r""m -rf /` — backslash escapes collapsed and quote boundaries are not word boundaries.
+- `rm$IFS-rf$IFS/`, `{rm,-rf,/}`, `$'\x72\x6d'` — `$IFS`, brace expansion, and ANSI-C escapes are normalised.
+- `command rm`, `env rm`, `sudo rm`, `/bin/rm`, `true | dd of=/dev/sda` — wrappers are stripped, every pipe stage is classified, and absolute paths are basenamed before matching.
+- `bash -i >& /dev/tcp/…`, `cat ~/.ssh/id_rsa` — reverse-shell channels and sensitive-path access are flagged regardless of the command verb.
+
+Regression suites (`internal/danger/classifier_bypass_test.go` and `hardening_test.go`) pin these as known-closed evasions. If you find a new bypass, those test files are the place to add it.
 
 ### 4. Tool-call approval
 

diff --git a/internal/danger/approver.go b/internal/danger/approver.go
@@ -144,12 +144,13 @@ func (a *TTYApprover) prompt(cls RiskClass, cmd, description string) error {
 	}
 	defer tty.Close()
 
-	// Trust-class shortcut is disabled for the two highest-impact
-	// classes. Destructive and Blocked operations always require a
-	// per-call approval to defeat approval-fatigue attacks where the
-	// model batches a benign destructive-class trust grant with a
-	// destructive payload.
-	allowTrust := cls != Destructive && cls != Blocked
+	// Trust-class shortcut is disabled for the highest-impact classes.
+	// Destructive and Blocked always require per-call approval to defeat
+	// approval-fatigue attacks where the model batches a benign trust grant
+	// with a dangerous payload. Unknown is included because it is the
+	// fail-closed catch-all for unrecognised verbs — class-trusting it would
+	// blanket-approve every future obfuscated/novel command.
+	allowTrust := cls != Destructive && cls != Blocked && cls != Unknown
 
 	// Approval-fatigue mitigation: if the user has already approved
 	// this class FrictionThreshold times in FrictionWindow, the next