Skip to content
5 changes: 3 additions & 2 deletions cmd/odek/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ Use for: reading files, listing directories, running tests, building code, and g
In sandbox mode (--sandbox), commands run inside the Docker container with restricted permissions.
In host mode (default), commands run with the same permissions as the odek process.

Risk classes: safe, local_write, system_write, destructive, network_egress, code_execution, install, blocked
High-risk operations may prompt for approval (configurable via dangerous section in odek.json).`
Risk classes: safe, local_write, system_write, destructive, network_egress, code_execution, install, unknown, blocked
High-risk operations may prompt for approval (configurable via dangerous section in odek.json).
The gate fails closed: an unrecognised command classifies as "unknown" and is denied by default.`
}

func (t *shellTool) Schema() any {
Expand Down
35 changes: 6 additions & 29 deletions cmd/odek/subagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,8 +492,8 @@ func truncate(s string, n int) string {
// outside CWD, an MCP server response). We:
// - Force NonInteractiveAction to deny (sub-agents have no TTY).
// - Clamp the action for Destructive, CodeExecution, Install,
// SystemWrite, and NetworkEgress to Deny so the sub-agent cannot
// escalate beyond LocalWrite without coming back through the
// SystemWrite, NetworkEgress, and Unknown to Deny so the sub-agent
// cannot escalate beyond LocalWrite without coming back through the
// parent.
//
// maxRisk caps the highest risk class the sub-agent will execute.
Expand Down Expand Up @@ -522,15 +522,15 @@ func applySubagentTrust(dc *danger.DangerousConfig, trustLevel, maxRisk string)
danger.Install,
danger.SystemWrite,
danger.NetworkEgress,
danger.Unknown,
danger.Blocked,
} {
dc.Classes[cls] = danger.Deny
}
}

if maxRisk != "" {
cap := danger.RiskClass(maxRisk)
capRank := riskRank(cap)
capRank := danger.Rank(danger.RiskClass(maxRisk))
for _, cls := range []danger.RiskClass{
danger.Safe,
danger.LocalWrite,
Expand All @@ -539,35 +539,12 @@ func applySubagentTrust(dc *danger.DangerousConfig, trustLevel, maxRisk string)
danger.NetworkEgress,
danger.CodeExecution,
danger.Install,
danger.Unknown,
danger.Blocked,
} {
if riskRank(cls) > capRank {
if danger.Rank(cls) > capRank {
dc.Classes[cls] = danger.Deny
}
}
}
}

// riskRank mirrors internal/danger.rank but is duplicated here to keep
// applySubagentTrust local. Order matches internal/danger/classifier.go.
func riskRank(cls danger.RiskClass) int {
switch cls {
case danger.Blocked:
return 8
case danger.Destructive:
return 7
case danger.SystemWrite:
return 6
case danger.CodeExecution:
return 5
case danger.NetworkEgress:
return 4
case danger.Install:
return 3
case danger.LocalWrite:
return 2
case danger.Safe:
return 1
}
return 0
}
23 changes: 23 additions & 0 deletions cmd/odek/subagent_trust_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func TestApplySubagentTrust_Untrusted_LocksDownEscalationClasses(t *testing.T) {
danger.Install,
danger.SystemWrite,
danger.NetworkEgress,
danger.Unknown,
danger.Blocked,
} {
if got := dc.Classes[cls]; got != danger.Deny {
Expand Down Expand Up @@ -63,6 +64,7 @@ func TestApplySubagentTrust_MaxRisk_ClampsAbove(t *testing.T) {
danger.NetworkEgress,
danger.CodeExecution,
danger.Install,
danger.Unknown,
danger.Blocked,
} {
if got := dc.Classes[cls]; got != danger.Deny {
Expand All @@ -76,3 +78,24 @@ func TestApplySubagentTrust_MaxRisk_ClampsAbove(t *testing.T) {
}
}
}

// TestApplySubagentTrust_MaxRiskUnknown_KeepsSafeOpen guards the fix for the
// cap miscomputation: before Unknown was added to riskRank's shared ordering,
// max_risk="unknown" computed rank 0 and force-denied even Safe. It must now
// leave Safe/LocalWrite open and deny only the classes above Unknown.
func TestApplySubagentTrust_MaxRiskUnknown_KeepsSafeOpen(t *testing.T) {
dc := danger.DangerousConfig{}
applySubagentTrust(&dc, "", "unknown")

for _, cls := range []danger.RiskClass{danger.Safe, danger.LocalWrite} {
if got, ok := dc.Classes[cls]; ok && got == danger.Deny {
t.Errorf("Class %s must stay open with max_risk=unknown, got %q", cls, got)
}
}
// Only classes ranked above Unknown (Destructive, Blocked) are denied.
for _, cls := range []danger.RiskClass{danger.Destructive, danger.Blocked} {
if got := dc.Classes[cls]; got != danger.Deny {
t.Errorf("Class %s = %q, want deny with max_risk=unknown", cls, got)
}
}
}
10 changes: 6 additions & 4 deletions cmd/odek/wsapprover.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@ type approvalRequest struct {
FrictionApprovals int `json:"friction_approvals,omitempty"`
}

// allowTrustForClass mirrors the TTYApprover policy: destructive and
// blocked must never be class-trusted, so an attacker cannot social-
// engineer a single broad approval into long-term carte blanche.
// allowTrustForClass mirrors the TTYApprover policy: destructive, blocked,
// and unknown must never be class-trusted, so an attacker cannot social-
// engineer a single broad approval into long-term carte blanche. Unknown is
// the fail-closed catch-all for unrecognised verbs; class-trusting it would
// blanket-approve every future obfuscated/novel command.
func allowTrustForClass(cls danger.RiskClass) bool {
return cls != danger.Destructive && cls != danger.Blocked
return cls != danger.Destructive && cls != danger.Blocked && cls != danger.Unknown
}

// approvalResponse is received from the browser when the user responds.
Expand Down
16 changes: 16 additions & 0 deletions cmd/odek/wsapprover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ func TestWSApprover_AllowTrustFlag_PerClass(t *testing.T) {
{danger.Install, true},
{danger.Destructive, false},
{danger.Blocked, false},
{danger.Unknown, false},
}
for _, tc := range cases {
t.Run(string(tc.cls), func(t *testing.T) {
Expand Down Expand Up @@ -433,3 +434,18 @@ func TestWSApprover_TrustResponse_CoercedToApprove_ForBlocked(t *testing.T) {
t.Error("blocked class was cached as trusted — class trust must be impossible")
}
}

// TestWSApprover_TrustResponse_CoercedToApprove_ForUnknown verifies the
// fail-closed Unknown class cannot be class-trusted: a forged "trust" is
// treated as a single approve and never cached, so unrecognised verbs can't
// be blanket-approved by one social-engineered grant.
func TestWSApprover_TrustResponse_CoercedToApprove_ForUnknown(t *testing.T) {
a := newWSApprover(nil)
_, err := promptAndCaptureRequest(t, a, danger.Unknown, "trust")
if err != nil {
t.Errorf("expected nil error (coerced to approve), got: %v", err)
}
if a.approveAll[danger.Unknown] {
t.Error("unknown class was cached as trusted — class trust must be impossible")
}
}
2 changes: 1 addition & 1 deletion docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Ready-to-run Compose setup for Odek in two permission profiles:
> `run`/`repl`/`telegram` are unsandboxed by default.)

For the full walkthrough, threat model, and tuning, see
[`../DOCKER_COMPOSE_USER_GUIDE.md`](../DOCKER_COMPOSE_USER_GUIDE.md).
[`../docs/DOCKER_COMPOSE_USER_GUIDE.md`](../docs/DOCKER_COMPOSE_USER_GUIDE.md).

## Files

Expand Down
5 changes: 5 additions & 0 deletions docs/CLI.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,13 @@ When running without `--sandbox`, odek classifies every shell command by risk an
| 🔴 network_egress | **prompt** | `curl`, `git push`, `ssh`, `scp` |
| 🔴 code_execution | **prompt** | `curl url \| bash`, `eval`, `node -e`, `go run` |
| 🟠 install | **prompt** | `npm install`, `pip install`, `go install <path>` |
| 🔴 unknown | **deny** | any command whose program name isn't recognised |
| ⬛ blocked | **deny** | Fork bombs, `dd` to block devices |

odek **fails closed**: a command whose verb matches no known-safe or known-dangerous
pattern is classified `unknown` and denied by default. Permit a specific tool by adding
its exact invocation to `allowlist`, or soften the class with `"unknown": "prompt"`.

The approval prompt accepts:

- `A` — Approve once
Expand Down
2 changes: 1 addition & 1 deletion docs/DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ CI (`.github/workflows/test.yml`) runs the unit suite under `-race` on every pus
| `internal/ws` | WebSocket constant verification |
| `internal/resource` | @-reference parsing, file resolution, session resolution, security |
| `internal/render` | Terminal output, no-color mode, nil safety, tool call/result rendering |
| `internal/danger` | Command classification across 8 risk classes, config overrides, allow/denylist, classifier-bypass attempts, approver friction |
| `internal/danger` | Command classification across 9 risk classes (incl. fail-closed `unknown`), config overrides, allow/denylist, classifier-bypass attempts, approver friction |
| `internal/memory` | Facts CRUD, buffer ring, episodes, merge detector (go-vector), ReplaceEntry/AppendEntry, memory tool, security scan, LLM ranking, episode provenance |
| `internal/skills` | Loading, triggers, self-improvement heuristics, curation, LLM-enhanced generation, import, tools, AnalyzeMessages/RunAutoSaveLoop, ValidateSkillName, isPrivateHost |
| `internal/telegram` | Bot client, long-polling, command handlers, session management, plan CRUD, voice/photo download, health server, retry/backoff |
Expand Down
100 changes: 79 additions & 21 deletions DOCKER_COMPOSE_USER_GUIDE.md → docs/DOCKER_COMPOSE_USER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ These JSON files are mounted to `/home/odek/.odek/config.json` inside the contai

### 5a. Restricted policy — `config.restricted.json`

This is essentially Odek's default behavior, made explicit. Commands are risk‑classified;
destructive ones are denied, the rest prompt for approval. Crucially, `non_interactive`
is set to **`deny`** so that if the agent runs in a container *without* an attached
terminal or Web UI, high‑risk commands are blocked rather than silently allowed.
Commands are risk‑classified; destructive and unrecognised ones are denied, the rest
prompt for approval. Crucially, `non_interactive` is set to **`deny`** so that if the
agent runs in a container *without* an attached terminal or Web UI, anything that would
prompt is blocked rather than silently allowed.

```json
{
Expand All @@ -163,23 +163,81 @@ terminal or Web UI, high‑risk commands are blocked rather than silently allowe
}
```

**How the classes map** (built‑in risk model):
#### What each field does

| Class | Examples | Restricted action |
| --- | --- | --- |
| `safe` | `ls`, `cat`, `echo` | allow |
| `local_write` | write files in the working dir | allow |
| `system_write` | `chmod`, `chown`, `mkdir /etc` | prompt |
| `network_egress` | `curl`, `wget`, DNS lookups | prompt |
| `code_execution` | `go run`, `python x.py` | prompt |
| `install` | `npm install`, `apk add` | prompt |
| `destructive` | `rm -rf`, `git rm`, `docker rm` | **deny** |
| `blocked` | fork bombs, `dd` to block devices | **always deny** (cannot be overridden) |
| Field | Meaning |
| --- | --- |
| `sandbox` | `false` runs commands directly in this container (the Compose setup already *is* the sandbox). `true` would nest a second Docker sandbox — not what you want here. |
| `action` | **Global default** action for any class **not** listed under `classes`. `"prompt"` here, `"allow"` = godmode, `"deny"` = lockdown. ⚠️ This overrides the *built‑in* per‑class defaults (see the gotcha below). |
| `non_interactive` | What to do with a **prompt**‑level command when there is no human channel (no TTY, no Web UI). `"deny"` blocks it; `"allow"` runs it. Always set this to `"deny"` for unattended/automated containers. |
| `classes` | Per‑class action overrides. The most specific setting — it wins over `action` and the built‑in defaults. Only list the classes you want to pin. |
| `allowlist` | Commands that always run, **exact string match**, no classification. Highest priority of all. Use for a handful of trusted exact commands (e.g. `"npm run deploy"`). |
| `denylist` | Commands that are always denied, **prefix match** after trimming. Beats classification and even godmode — but **not** the allowlist. |

#### How the classes map (built‑in risk model)

| Class | Examples | Built‑in default | This profile |
| --- | --- | --- | --- |
| `safe` | `ls`, `cat`, `grep`, `git status` | allow | prompt¹ |
| `local_write` | write files in the working dir | allow | allow |
| `install` | `npm install`, `pip install`, `apk add` | prompt | prompt |
| `network_egress` | `curl`, `wget`, `ssh`, DNS lookups | prompt | prompt |
| `code_execution` | `curl … \| sh`, `bash -c`, `python -c`, `go run` | prompt | prompt |
| `system_write` | `sudo`, writes to `/etc`, reads of `~/.ssh` | prompt | prompt |
| `unknown` | any command whose program name Odek does **not** recognise | deny | prompt¹ → denied unattended |
| `destructive` | `rm -rf /`, `dd … of=/dev/sda`, `mkfs` | deny | **deny** |
| `blocked` | fork bombs, fully‑specified `dd` to a block device | **always deny** | **always deny** (cannot be overridden) |

> ¹ `safe` and `unknown` are not listed under `classes`, so the global
> `action: "prompt"` applies to them — see the gotcha below. With a human channel
> they prompt; unattended (`non_interactive: "deny"`) they are denied.

Odek **fails closed**: the `unknown` class catches any command whose verb isn't in the
built‑in safe/dangerous tables, so a novel or obfuscated command can't slip through as
"safe". To permit a specific unrecognised tool, add its exact invocation to `allowlist`,
or relax the class with `"unknown": "prompt"`.

#### How an action is resolved (precedence, first match wins)

1. Command exactly matches an **`allowlist`** entry → **allow**.
2. Command starts with a **`denylist`** entry → **deny**.
3. Otherwise classify it, then: explicit **`classes`** entry → `blocked` is **always deny** → global **`action`** (if set) → built‑in class default.
4. If the result is **prompt** and there's no human channel, **`non_interactive`** decides.

> **Gotcha — `action` overrides *every* unlisted class.** Because `action: "prompt"` is
> set, any class you don't list under `classes` resolves to *prompt*, including `safe`.
> So with this profile as written, even `ls` prompts (and is denied unattended). Two ways
> to get the usual "safe commands just run" behavior:
>
> - add `"safe": "allow"` to `classes` (keep `action: "prompt"` as the catch‑all for
> everything else, including `unknown`), **or**
> - **omit `action` entirely** and only override the classes you care about — then unlisted
> classes keep their built‑in defaults (safe/local_write allow; destructive/blocked/unknown
> deny; system_write/network_egress/code_execution/install prompt).
>
> The second form is the better default if you want `unknown` to stay deny‑by‑default
> rather than prompt.

> Approvals require a human channel: the **Web UI** (`odek serve`, modal approval over
> WebSocket) or an **interactive terminal** (`odek repl` with `docker compose run -it`).
> Without either, `non_interactive: "deny"` is what keeps you safe.

#### Customising the policy

```jsonc
// Tighter: also block all outbound network and package installs.
"classes": { "network_egress": "deny", "install": "deny", /* … */ }

// Looser: pre‑approve a few exact commands you trust, keep everything else gated.
"allowlist": ["npm ci", "npm run build", "go build ./..."]

// Allow one normally‑unrecognised tool without loosening the whole class:
"allowlist": ["terraform plan"] // exact match only

// Full lockdown: deny everything except the allowlist.
"action": "deny"
```

### 5b. Godmode policy — `config.godmode.json`

YOLO mode. Every risk class returns `allow`; no prompts. The only thing still blocked is
Expand Down Expand Up @@ -547,9 +605,9 @@ Voice and photo messages are supported too. Sessions persist per chat in the loc

## Reference

- `docs/SANDBOXING.md` — Odek's nested‑Docker sandbox model (the `--sandbox` feature).
- `docs/SECURITY.md` — threat model, approval flow, YOLO mode, attack‑vector matrix.
- `docs/CONFIG.md` — full configuration layering and environment variables.
- `docs/CLI.md` — all subcommands and flags, including the `dangerous` schema.
- `docs/WEBUI.md` — Web UI protocol and the WebSocket approval flow.
- `docs/TELEGRAM.md` — Telegram bot architecture, config variables, and slash commands.
- [`SANDBOXING.md`](SANDBOXING.md) — Odek's nested‑Docker sandbox model (the `--sandbox` feature).
- [`SECURITY.md`](SECURITY.md) — threat model, approval flow, YOLO mode, attack‑vector matrix.
- [`CONFIG.md`](CONFIG.md) — full configuration layering and environment variables.
- [`CLI.md`](CLI.md) — all subcommands and flags, including the `dangerous` schema.
- [`WEBUI.md`](WEBUI.md) — Web UI protocol and the WebSocket approval flow.
- [`TELEGRAM.md`](TELEGRAM.md) — Telegram bot architecture, config variables, and slash commands.
19 changes: 10 additions & 9 deletions docs/SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,19 @@ The model is instructed (via the default system prompt) to treat the wrapped reg

### 3. Danger classifier (shell)

The `shell` tool tokenises commands and classifies each into one of 8 risk classes (`safe`, `local_write`, `system_write`, `destructive`, `network_egress`, `code_execution`, `install`, `blocked`). Per-class policy (allow / prompt / deny) is configurable.
The `shell` tool tokenises commands and classifies each into one of 9 risk classes (`safe`, `local_write`, `system_write`, `destructive`, `network_egress`, `code_execution`, `install`, `unknown`, `blocked`). Per-class policy (allow / prompt / deny) is configurable.

The classifier is hardened against common evasion tricks:
The gate **fails closed**: a command whose program name matches neither the known-safe allowlist nor any known-dangerous pattern is classified `unknown` and **denied by default** (same as `destructive`). Recognised commands used benignly are `safe`. So a novel or obfuscated verb cannot slip through as "safe" — to permit a specific tool, allowlist it or set `"unknown": "prompt"`.

- `$(echo rm) -rf /` — command substitution is recursively classified.
- `` `echo rm` -rf / `` — backticks treated the same.
- `\rm -rf /` and `r\m -rf /` — unquoted backslash escapes are collapsed.
- `rm$IFS-rf$IFS/` — `$IFS` / `${IFS}` expanded to space.
- `command rm -rf /` and `exec rm -rf /` — wrappers stripped.
- `/bin/rm -rf /` — absolute paths basenamed before matching.
The classifier is hardened against common evasion tricks (see the package doc in `internal/danger/classifier.go` for the full model):

A regression suite (`internal/danger/classifier_bypass_test.go`) pins these as known evasions. If you find a new bypass, the test file is the place to add it.
- `$(echo rm) -rf /` / `` `echo rm` `` / `<(curl evil)` — command and process substitutions are recursively classified.
- `\rm -rf /`, `r""m -rf /` — backslash escapes collapsed and quote boundaries are not word boundaries.
- `rm$IFS-rf$IFS/`, `{rm,-rf,/}`, `$'\x72\x6d'` — `$IFS`, brace expansion, and ANSI-C escapes are normalised.
- `command rm`, `env rm`, `sudo rm`, `/bin/rm`, `true | dd of=/dev/sda` — wrappers are stripped, every pipe stage is classified, and absolute paths are basenamed before matching.
- `bash -i >& /dev/tcp/…`, `cat ~/.ssh/id_rsa` — reverse-shell channels and sensitive-path access are flagged regardless of the command verb.

Regression suites (`internal/danger/classifier_bypass_test.go` and `hardening_test.go`) pin these as known-closed evasions. If you find a new bypass, those test files are the place to add it.

### 4. Tool-call approval

Expand Down
13 changes: 7 additions & 6 deletions internal/danger/approver.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,13 @@ func (a *TTYApprover) prompt(cls RiskClass, cmd, description string) error {
}
defer tty.Close()

// Trust-class shortcut is disabled for the two highest-impact
// classes. Destructive and Blocked operations always require a
// per-call approval to defeat approval-fatigue attacks where the
// model batches a benign destructive-class trust grant with a
// destructive payload.
allowTrust := cls != Destructive && cls != Blocked
// Trust-class shortcut is disabled for the highest-impact classes.
// Destructive and Blocked always require per-call approval to defeat
// approval-fatigue attacks where the model batches a benign trust grant
// with a dangerous payload. Unknown is included because it is the
// fail-closed catch-all for unrecognised verbs — class-trusting it would
// blanket-approve every future obfuscated/novel command.
allowTrust := cls != Destructive && cls != Blocked && cls != Unknown

// Approval-fatigue mitigation: if the user has already approved
// this class FrictionThreshold times in FrictionWindow, the next
Expand Down
Loading
Loading