From 6cb0838fd40139860c0ba39611cbad22a204cd1b Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Wed, 6 May 2026 08:23:06 -0400 Subject: [PATCH 1/3] cli-golden: fixture corpus + TS-CLI snapshots + diff runner (#248 part c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 1 D5 of the Rust CLI port (#248). Captures the current TS CLI's output across a synthetic fixture ledger so the eight Wave 2 fan-out PRs have a deterministic target to assert against. - tests/fixtures/cli-golden/ — hand-built ledger covering claude / codex / opencode sources, plus the activity buckets compare and hotspots aggregate over (coding, testing, review, delegation) - tests/fixtures/cli-golden/scripts/{build-ledger,capture-snapshots}.mjs — node scripts that regenerate the ledger and re-run every TS-CLI invocation against it under a sealed env (RELAYBURN_HOME at the fixture, HOME at a tmp dir so ingestAll finds no real sessions, RELAYBURN_ARCHIVE=0 to force the streaming path) - tests/fixtures/cli-golden/snapshots/ — 16 captured stdout files covering summary / hotspots / overhead / overhead-trim / compare / state-status (TTY + --json) plus help text for ingest / run / mcp-server / top-level - tests/fixtures/cli-golden/invocations.json — args + sealed env per snapshot, with enabled: bool toggles Wave 2 PRs flip on per command as the Rust implementation lands - crates/relayburn-cli/tests/golden.rs — Rust integration test that iterates invocations.json, spawns the Rust burn binary against the fixture, and prints a unified diff on mismatch. Gated on BURN_GOLDEN=1 so cargo test --workspace stays green while the Rust CLI is a stub; per-invocation enabled: false keeps even BURN_GOLDEN=1 runs silent until Wave 2 wires up each command. - tests/fixtures/cli-golden/README.md documents what's snapshotted, how to refresh, the per-command Wave 2 ownership table, and how each Wave 2 PR should flip its enabled flags. Refresh: pnpm run golden:capture (or run the script directly). --- CHANGELOG.md | 1 + Cargo.lock | 2 + crates/relayburn-cli/Cargo.toml | 6 + crates/relayburn-cli/tests/golden.rs | 338 +++++++++++++++ package.json | 3 +- tests/fixtures/cli-golden/README.md | 159 +++++++ tests/fixtures/cli-golden/invocations.json | 98 +++++ tests/fixtures/cli-golden/ledger/.gitignore | 13 + .../cli-golden/ledger/ledger.content.idx | 7 + tests/fixtures/cli-golden/ledger/ledger.idx | 17 + tests/fixtures/cli-golden/ledger/ledger.jsonl | 18 + tests/fixtures/cli-golden/project/CLAUDE.md | 23 + .../cli-golden/scripts/build-ledger.mjs | 397 ++++++++++++++++++ .../cli-golden/scripts/capture-snapshots.mjs | 139 ++++++ .../snapshots/compare-json.stdout.txt | 184 ++++++++ .../cli-golden/snapshots/compare.stdout.txt | 21 + .../snapshots/hotspots-json.stdout.txt | 118 ++++++ .../cli-golden/snapshots/hotspots.stdout.txt | 30 ++ .../snapshots/ingest-help.stdout.txt | 11 + .../snapshots/mcp-server-help.stdout.txt | 11 + .../snapshots/overhead-json.stdout.txt | 126 ++++++ .../snapshots/overhead-trim-json.stdout.txt | 73 ++++ .../snapshots/overhead-trim.stdout.txt | 42 ++ .../cli-golden/snapshots/overhead.stdout.txt | 7 + .../cli-golden/snapshots/run-help.stdout.txt | 11 + .../snapshots/state-status-json.stdout.txt | 49 +++ .../snapshots/state-status.stdout.txt | 19 + .../snapshots/summary-json.stdout.txt | 194 +++++++++ .../cli-golden/snapshots/summary.stdout.txt | 14 + .../snapshots/top-level-help.stdout.txt | 51 +++ 30 files changed, 2181 insertions(+), 1 deletion(-) create mode 100644 crates/relayburn-cli/tests/golden.rs create mode 100644 tests/fixtures/cli-golden/README.md create mode 100644 tests/fixtures/cli-golden/invocations.json create mode 100644 tests/fixtures/cli-golden/ledger/.gitignore create mode 100644 tests/fixtures/cli-golden/ledger/ledger.content.idx create mode 100644 tests/fixtures/cli-golden/ledger/ledger.idx create mode 100644 tests/fixtures/cli-golden/ledger/ledger.jsonl create mode 100644 tests/fixtures/cli-golden/project/CLAUDE.md create mode 100644 tests/fixtures/cli-golden/scripts/build-ledger.mjs create mode 100644 tests/fixtures/cli-golden/scripts/capture-snapshots.mjs create mode 100644 tests/fixtures/cli-golden/snapshots/compare-json.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/compare.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/ingest-help.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/mcp-server-help.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/overhead-json.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/overhead-trim-json.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/overhead-trim.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/overhead.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/run-help.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/state-status.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/summary.stdout.txt create mode 100644 tests/fixtures/cli-golden/snapshots/top-level-help.stdout.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f180615..cd12a4bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Cross-package release notes for relayburn. Package changelogs contain package-le ## [Unreleased] +- `relayburn-cli` (Rust): add the CLI golden-output test rig — synthetic fixture ledger under `tests/fixtures/cli-golden/`, a node script that captures TS-CLI stdout/stderr across 16 invocations (summary / hotspots / overhead / overhead-trim / compare / state-status in TTY + `--json`, plus help text for ingest / run / mcp-server / top-level), and `crates/relayburn-cli/tests/golden.rs` — a `BURN_GOLDEN=1`-gated diff runner Wave 2 PRs flip on per-command via `enabled: true` in `invocations.json`. (#248) - `relayburn-ingest` (Rust): port the per-process gap-warning state machine (`gap` module — `record_session_gap`, `emit_gap_warning`, `count_tool_call_gaps`, `reset_ingest_gap_warnings`, `set_ingest_gap_writer`) and `reingest_missing_content` (`reingest` module). Suppression mirrors the TS surface: one warning per fresh affected session, silent on steady-state, re-fires after the affected set decays back to empty. `relayburn-ledger` adds `Ledger::list_user_turn_session_ids` to power the `reingest_missing_content` skip filter alongside `list_content_session_ids`. (#278) - `relayburn-analyze` (Rust): port the behavioral-pattern detectors (`patterns` module). `detect_patterns` runs retry-loop, failure-run, cancellation-run, compaction-loss, edit-revert, OpenCode skill-recall-dup, OpenCode skill-pruning-protection, OpenCode system-prompt-tax, and edit-heavy detectors against an ordered turn stream, with optional content-sidecar / tool-result-event / user-turn enrichment. Public surface: `detect_patterns`, `DetectPatternsOptions`; per-pattern result structs are re-exported from `findings` (`RetryLoop`, `FailureRun`, `CancellationRun`, `CompactionLoss`, `EditRevertCycle`, `SkillRecallDup`, `SkillPruningProtection`, `SystemPromptTax`, `EditHeavySession`, `SessionPatternSummary`, `PatternsResult`, `PatternEventSource`). (#275) - `relayburn-analyze` (Rust): port the tool-output-bloat detector — Signal A's `BASH_MAX_OUTPUT_LENGTH` static-config check (with `~/.claude/settings.json` + `/.claude/settings.json` loader) and Signal B's cross-harness observed-bloat aggregation, plus the `WasteFinding` adapter. Public surface mirrors `@relayburn/analyze`: `BASH_MAX_OUTPUT_ENV_KEY`, `DEFAULT_BLOAT_TOKEN_THRESHOLD`, `detect_observed_bloat`, `detect_static_config_bloat`, `detect_tool_output_bloat`, `load_claude_settings`, `project_claude_settings_path`, `user_claude_settings_path`, `tool_output_bloat_to_finding`. (#271) diff --git a/Cargo.lock b/Cargo.lock index a5b79ccf..65e4be5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -417,6 +417,8 @@ name = "relayburn-cli" version = "0.0.0" dependencies = [ "relayburn-sdk", + "serde", + "serde_json", ] [[package]] diff --git a/crates/relayburn-cli/Cargo.toml b/crates/relayburn-cli/Cargo.toml index aa00e6c1..913c375e 100644 --- a/crates/relayburn-cli/Cargo.toml +++ b/crates/relayburn-cli/Cargo.toml @@ -22,3 +22,9 @@ path = "src/main.rs" # (currently 0.0.1) without forcing a lockstep bump on every release. # Tighten this once the SDK ships a stable 0.x line. relayburn-sdk = { path = "../relayburn-sdk", version = "0.0" } + +[dev-dependencies] +# Used by `tests/golden.rs` to load the invocations contract that drives +# both the TS-CLI snapshot capture and the Rust diff runner. +serde = { workspace = true } +serde_json = { workspace = true } diff --git a/crates/relayburn-cli/tests/golden.rs b/crates/relayburn-cli/tests/golden.rs new file mode 100644 index 00000000..aac89a0b --- /dev/null +++ b/crates/relayburn-cli/tests/golden.rs @@ -0,0 +1,338 @@ +//! TS-CLI vs Rust-CLI golden-output diff runner. +//! +//! For each invocation listed in `tests/fixtures/cli-golden/invocations.json`, +//! this test: +//! 1. Spawns the Rust `burn` binary against the fixture ledger and project, +//! with the same sealed env the TS capture used (`HOME` pointed at an +//! empty tmp dir, `RELAYBURN_HOME` at the fixture, `RELAYBURN_ARCHIVE=0`, +//! `NO_COLOR=1`). +//! 2. Reads the captured TS stdout snapshot (and stderr if present). +//! 3. Normalizes the live Rust output the same way the capture script does +//! (absolute fixture paths → `${RELAYBURN_HOME}` / `${PROJECT}`, +//! wall-clock millisecond fields → `${MTIME}` / `${TS}`). +//! 4. Asserts the normalized Rust output matches the snapshot byte-for-byte +//! and prints a unified diff on mismatch. +//! +//! ## Why this is `#[ignore]`d on `main` +//! +//! Today the Rust CLI is a `eprintln!("not yet implemented") + exit(1)` stub +//! — every snapshot will fail. That's deliberate: this PR (#248-c) ships the +//! *target* the Wave 2 fan-out PRs (#248 D1–D8 in `RUST_PORT_WAVE_PLAN.md`) +//! get to assert against. As each command lands its Rust implementation, +//! the matching invocation in `invocations.json` flips its `enabled` flag +//! to `true` and the test starts enforcing parity. +//! +//! Run the full enforced suite locally with: +//! BURN_GOLDEN=1 cargo test --test golden -- --include-ignored +//! +//! Refresh the TS snapshots after a CLI behavior change with: +//! pnpm run build && \ +//! node tests/fixtures/cli-golden/scripts/capture-snapshots.mjs +//! +//! See `tests/fixtures/cli-golden/README.md` for the full Wave 2 contract. + +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct Invocation { + name: String, + args: Vec, + #[serde(default)] + expect_status: Option, + /// Set to true once the Rust CLI implements the command surface this + /// snapshot covers. Wave 2 PRs flip this per-command. Until then the + /// test for that invocation is skipped *unconditionally* (the diff + /// runner reports "skipped: not yet enabled" rather than failing). + #[serde(default)] + enabled: bool, + /// Optional extra env to set for this specific invocation. Mirrors + /// `inv.env` in the JSON contract so capture-snapshots.mjs and + /// golden.rs stay aligned. + #[serde(default)] + env: BTreeMap, +} + +#[test] +fn golden_diff_against_ts_cli_snapshots() { + let golden_gate = std::env::var("BURN_GOLDEN").ok(); + if golden_gate.as_deref() != Some("1") { + // CI runs `cargo test --workspace` without BURN_GOLDEN set, so the + // diff runner is silent there. Local devs run `BURN_GOLDEN=1 + // cargo test --test golden -- --nocapture` to enforce the gate; + // once Wave 2 finishes, the gate flips on by default in CI. + eprintln!( + "[golden] BURN_GOLDEN!=1 — skipping (set BURN_GOLDEN=1 to enforce). \ + Even when enforced, individual invocations stay skipped until their \ + `enabled: true` flag is set in invocations.json." + ); + } + + let fixture_dir = repo_root().join("tests").join("fixtures").join("cli-golden"); + assert!( + fixture_dir.is_dir(), + "fixture corpus missing at {}", + fixture_dir.display() + ); + + let invocations_path = fixture_dir.join("invocations.json"); + let raw = fs::read_to_string(&invocations_path).unwrap_or_else(|err| { + panic!( + "failed to read invocations from {}: {err}", + invocations_path.display() + ) + }); + let invocations: Vec = serde_json::from_str(&raw) + .unwrap_or_else(|err| panic!("invocations.json is malformed: {err}")); + + let snapshots_dir = fixture_dir.join("snapshots"); + let ledger_home = fixture_dir.join("ledger"); + let project_dir = fixture_dir.join("project"); + + // Sealed HOME so the Rust binary's eventual ingest sweep doesn't + // discover the developer's real session stores. + let sealed_home = tempdir_under(&fixture_dir); + + let burn = burn_binary_path(); + + let mut failures = Vec::new(); + for inv in &invocations { + if !inv.enabled { + eprintln!("[golden] skip {} (enabled=false)", inv.name); + continue; + } + if golden_gate.as_deref() != Some("1") { + eprintln!("[golden] skip {} (BURN_GOLDEN!=1)", inv.name); + continue; + } + + let snapshot_stdout = snapshots_dir.join(format!("{}.stdout.txt", inv.name)); + let expected_stdout = fs::read_to_string(&snapshot_stdout).unwrap_or_else(|err| { + panic!( + "snapshot missing for {} ({}): {err}", + inv.name, + snapshot_stdout.display() + ) + }); + let snapshot_stderr = snapshots_dir.join(format!("{}.stderr.txt", inv.name)); + let expected_stderr = if snapshot_stderr.is_file() { + fs::read_to_string(&snapshot_stderr).unwrap_or_default() + } else { + String::new() + }; + + let mut cmd = Command::new(&burn); + cmd.args(&inv.args) + .current_dir(repo_root()) + .env_clear() + // Keep PATH so the binary can find shared libraries; everything + // else gets a sealed value. + .env("PATH", std::env::var_os("PATH").unwrap_or_default()) + .env("HOME", &sealed_home) + .env("RELAYBURN_HOME", &ledger_home) + .env("RELAYBURN_CONTENT_STORE", "off") + .env("RELAYBURN_ARCHIVE", "0") + .env("NO_COLOR", "1") + .env("FORCE_COLOR", "0"); + for (k, v) in &inv.env { + cmd.env(k, v); + } + + let output = match cmd.output() { + Ok(o) => o, + Err(err) => { + failures.push(format!("{}: spawn failed: {err}", inv.name)); + continue; + } + }; + + let expected_status = inv.expect_status.unwrap_or(0); + let actual_status = output.status.code().unwrap_or(-1); + let stdout = normalize( + std::str::from_utf8(&output.stdout).unwrap_or(""), + &ledger_home, + &project_dir, + ); + let stderr = normalize( + std::str::from_utf8(&output.stderr).unwrap_or(""), + &ledger_home, + &project_dir, + ); + + let mut diffs = Vec::new(); + if actual_status != expected_status { + diffs.push(format!( + " exit status: expected {expected_status}, got {actual_status}" + )); + } + if stdout != expected_stdout { + diffs.push(format!( + " stdout mismatch:\n{}", + indent(&unified_diff(&expected_stdout, &stdout), " "), + )); + } + if stderr != expected_stderr { + diffs.push(format!( + " stderr mismatch:\n{}", + indent(&unified_diff(&expected_stderr, &stderr), " "), + )); + } + if !diffs.is_empty() { + failures.push(format!("{}:\n{}", inv.name, diffs.join("\n"))); + } else { + eprintln!("[golden] ok {}", inv.name); + } + } + + let _ = fs::remove_dir_all(&sealed_home); + + if !failures.is_empty() { + panic!( + "{} golden diff failure(s):\n\n{}", + failures.len(), + failures.join("\n\n") + ); + } +} + +fn repo_root() -> PathBuf { + // CARGO_MANIFEST_DIR is `.../crates/relayburn-cli`. Walk up two levels + // to land at the workspace root regardless of which worktree we're in. + let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest + .parent() + .and_then(|p| p.parent()) + .map(PathBuf::from) + .expect("CARGO_MANIFEST_DIR has no two-levels-up parent") +} + +fn burn_binary_path() -> PathBuf { + // CARGO_BIN_EXE_ is set by cargo for integration tests on the + // crate that owns the [[bin]]. Falls back to a workspace-relative path + // if a developer runs the test outside cargo (rare but possible). + if let Some(p) = option_env!("CARGO_BIN_EXE_burn") { + return PathBuf::from(p); + } + repo_root() + .join("target") + .join("debug") + .join(if cfg!(windows) { "burn.exe" } else { "burn" }) +} + +/// Apply the same path / mtime placeholders the capture script uses so the +/// snapshot stays portable across machines. Keep this in sync with +/// `tests/fixtures/cli-golden/scripts/capture-snapshots.mjs::normalize`. +fn normalize(text: &str, ledger_home: &Path, project_dir: &Path) -> String { + let mut out = text.replace( + ledger_home.to_str().expect("ledger home is utf8"), + "${RELAYBURN_HOME}", + ); + out = out.replace( + project_dir.to_str().expect("project dir is utf8"), + "${PROJECT}", + ); + out = squash_numeric_field(&out, "ledgerMtimeMsCurrent", "${MTIME}"); + out = squash_numeric_field(&out, "lastBuiltAt", "${TS}"); + out = squash_numeric_field(&out, "lastRebuildAt", "${TS}"); + out +} + +/// Replace `"": ` (with any whitespace after the colon) with +/// `"": ""`. Mirrors the JS regex in normalize() in the +/// capture script. +fn squash_numeric_field(text: &str, key: &str, placeholder: &str) -> String { + let needle = format!("\"{key}\":"); + let mut out = String::with_capacity(text.len()); + let mut rest = text; + while let Some(idx) = rest.find(&needle) { + out.push_str(&rest[..idx]); + out.push_str(&needle); + let after_key = &rest[idx + needle.len()..]; + let trimmed_start = after_key.trim_start_matches(|c: char| c == ' ' || c == '\t'); + let ws_consumed = after_key.len() - trimmed_start.len(); + // If the value isn't a bare integer (e.g. `null`), bail and emit + // the original bytes untouched. + let digits_end = trimmed_start + .find(|c: char| !c.is_ascii_digit()) + .unwrap_or(trimmed_start.len()); + if digits_end == 0 { + out.push_str(&after_key[..ws_consumed]); + rest = &after_key[ws_consumed..]; + continue; + } + out.push(' '); + out.push('"'); + out.push_str(placeholder); + out.push('"'); + rest = &trimmed_start[digits_end..]; + } + out.push_str(rest); + out +} + +fn unified_diff(expected: &str, actual: &str) -> String { + // Hand-rolled minimal LCS-free diff: walk both side-by-side and emit + // `-`/`+` markers wherever a line differs. This is intentionally not a + // full Myers diff — it's enough to make a per-line drift obvious in + // the panic message without dragging in a `similar` dependency for + // a stub test. + let exp_lines: Vec<&str> = expected.lines().collect(); + let act_lines: Vec<&str> = actual.lines().collect(); + let max = exp_lines.len().max(act_lines.len()); + let mut out = String::new(); + for i in 0..max { + let e = exp_lines.get(i).copied(); + let a = act_lines.get(i).copied(); + match (e, a) { + (Some(e), Some(a)) if e == a => { + out.push_str(" "); + out.push_str(e); + out.push('\n'); + } + (Some(e), Some(a)) => { + out.push_str("- "); + out.push_str(e); + out.push('\n'); + out.push_str("+ "); + out.push_str(a); + out.push('\n'); + } + (Some(e), None) => { + out.push_str("- "); + out.push_str(e); + out.push('\n'); + } + (None, Some(a)) => { + out.push_str("+ "); + out.push_str(a); + out.push('\n'); + } + (None, None) => break, + } + } + out +} + +fn indent(text: &str, prefix: &str) -> String { + text.lines() + .map(|l| format!("{prefix}{l}")) + .collect::>() + .join("\n") +} + +fn tempdir_under(parent: &Path) -> PathBuf { + use std::time::{SystemTime, UNIX_EPOCH}; + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let pid = std::process::id(); + let dir = parent.join(format!(".golden-home-{pid}-{nanos}")); + fs::create_dir_all(&dir).expect("create sealed HOME"); + dir +} diff --git a/package.json b/package.json index 68692d95..25f15393 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "test:ts": "tsc --build && pnpm run test", "clean": "tsc --build --clean", "tokenizer:measure": "pnpm run build && node scripts/measure-user-turn-tokenizer.mjs", - "pricing:update": "node scripts/update-pricing.mjs" + "pricing:update": "node scripts/update-pricing.mjs", + "golden:capture": "pnpm run build && node tests/fixtures/cli-golden/scripts/capture-snapshots.mjs" }, "devDependencies": { "@types/node": "^22.10.0", diff --git a/tests/fixtures/cli-golden/README.md b/tests/fixtures/cli-golden/README.md new file mode 100644 index 00000000..92125a9f --- /dev/null +++ b/tests/fixtures/cli-golden/README.md @@ -0,0 +1,159 @@ +# CLI golden snapshots + +This corpus captures the **TS CLI** output across a fixture ledger so the +**Rust CLI** port (#248, Wave 2 in `RUST_PORT_WAVE_PLAN.md`) can golden-diff +against it. It exists so eight Wave 2 fan-out PRs have a stable target to +assert against; today the Rust binary is a stub and the diff runner is a +no-op until Wave 2 flips invocations on one at a time. + +## Layout + +``` +tests/fixtures/cli-golden/ +├── README.md — you are here +├── invocations.json — args + sealed env per snapshot; the contract +│ shared between capture-snapshots.mjs and the +│ Rust diff runner +├── ledger/ — generated synthetic ledger +│ ├── ledger.jsonl — turns + user_turns + tool_result_events + +│ ├── ledger.idx — relationships, hand-built for stable output +│ └── ledger.content.idx +├── project/ — fake project directory +│ └── CLAUDE.md — overhead-eligible file so `burn overhead` +│ returns non-empty +├── scripts/ +│ ├── build-ledger.mjs — repopulates `ledger/` from scratch +│ └── capture-snapshots.mjs — runs every TS-CLI invocation and writes +│ normalized stdout/stderr to `snapshots/` +└── snapshots/ — one `.stdout.txt` per invocation; an + optional `.stderr.txt` if the command + wrote anything to stderr +``` + +## What's snapshotted + +`invocations.json` lists every TS-CLI surface the diff runner knows about. +The current set covers every read-path command (`summary`, `hotspots`, +`overhead`, `overhead trim`, `compare`, `state status`) in both TTY and +`--json` flavors, plus the help text for the action-path commands +(`burn ingest --help`, `burn run --help`, `burn mcp-server --help`) and the +top-level `burn --help`. + +Action-path commands themselves are deliberately *not* snapshotted: their +output depends on a real spawn lifecycle (running an agent harness or a +watch loop), which can't be reproduced from a static ledger. Help text is +the proxy. + +`burn overhead trim` is captured non-interactively via the regular +`overhead trim` invocation; the TS implementation prints a unified-diff +recommendation to stdout and never enters an interactive flow, so no +special handling is needed. + +## Provenance + +- **TS commit at capture:** see `git log -1 --format=%H` on the branch this + PR landed from. The CHANGELOG entry under `[Unreleased]` will name the + PR (`#248-c`) so future captures can be cross-referenced. +- **Pricing snapshot:** vendored `packages/analyze/pricing/models.dev.json` + on the same commit. Cost columns in `summary`, `hotspots`, and `compare` + snapshots only stay stable if pricing doesn't drift. +- **Activity classifier rules:** the fixture ledger sets `activity` on + every `TurnRecord` directly so the snapshots don't depend on the rule + tables in `packages/reader/src/classifier.ts`. A classifier-rule change + *will not* drift these snapshots; re-run capture only if you want the + fresh classification to flow through. + +## Refresh procedure + +```bash +# from the repo root, on a clean workspace +pnpm run golden:capture +git -C diff tests/fixtures/cli-golden/snapshots +``` + +Equivalently, without pnpm: + +```bash +pnpm run build +node tests/fixtures/cli-golden/scripts/capture-snapshots.mjs +``` + +The capture script: + +1. Wipes `tests/fixtures/cli-golden/ledger/` and rebuilds it via + `build-ledger.mjs`. +2. For each entry in `invocations.json`, spawns + `packages/cli/dist/cli.js` with a sealed env: + - `HOME=` so `ingestAll` finds no agent sessions + - `RELAYBURN_HOME=tests/fixtures/cli-golden/ledger` + - `RELAYBURN_CONTENT_STORE=off` so no content sidecars are materialized + - `RELAYBURN_ARCHIVE=0` to force the streaming-ledger fallback (the + SQLite archive path is a perf optimization the Rust port may not + have on day one; the streaming path produces identical aggregates) + - `NO_COLOR=1`, `FORCE_COLOR=0` for stable, ANSI-free output. +3. Writes captured stdout to `snapshots/.stdout.txt` and (if + non-empty) stderr to `snapshots/.stderr.txt`. +4. Normalizes two classes of machine-specific noise before writing: + - the absolute fixture HOME path → `${RELAYBURN_HOME}` + - the absolute fixture project path → `${PROJECT}` + - wall-clock millisecond fields in `state status --json` + (`ledgerMtimeMsCurrent`, `lastBuiltAt`, `lastRebuildAt`) → `${MTIME}` + / `${TS}` + The Rust diff runner applies the same substitutions before comparing. + +## How Wave 2 PRs use this + +The diff runner lives at `crates/relayburn-cli/tests/golden.rs`. It is +gated on the env var `BURN_GOLDEN=1` so plain `cargo test --workspace` +in CI stays green while the Rust CLI is being filled in. Per-invocation +gating happens via the `enabled: bool` flag on each entry in +`invocations.json`. + +The default state on `main` today is **all `enabled: false`**: the test +runs to completion, prints a "skip (enabled=false)" line for each +invocation, and reports success. As each Wave 2 PR lands its slice of +the Rust CLI, the matching invocations flip to `enabled: true` and the +diff runner starts enforcing parity for them. The mapping is: + +| Wave 2 dev | PR scope | Flip these enabled flags | +|------------|-----------------------------------------|--------------------------------------------------------------------------| +| D1 | `burn summary` + `burn hotspots` | `summary`, `summary-json`, `hotspots`, `hotspots-json` | +| D2 | `burn overhead` + `burn overhead trim` | `overhead`, `overhead-json`, `overhead-trim`, `overhead-trim-json` | +| D3 | `burn compare` | `compare`, `compare-json` | +| D4 | `burn state` (status / rebuild / prune) | `state-status`, `state-status-json` | +| D5 | `burn run` + Claude adapter | `run-help`, `top-level-help` | +| D6 | Codex adapter | (no new help-only snapshot — covered by `top-level-help`) | +| D7 | OpenCode adapter | (no new help-only snapshot — covered by `top-level-help`) | +| D8 | `burn ingest` + `burn mcp-server` | `ingest-help`, `mcp-server-help`, `top-level-help` | + +The expected PR sequence: a Wave 2 dev implements their command, runs +`BURN_GOLDEN=1 cargo test --test golden -- --nocapture` locally, watches +the diff runner pass, flips the matching `enabled: true` in this fixture's +`invocations.json` in the same PR, and re-runs to verify CI stays green. + +The very last Wave 2 PR (whichever lands last) should also remove the +`BURN_GOLDEN=1` env-var guard from `crates/relayburn-cli/tests/golden.rs` +so the diff runner runs by default in CI from then on. + +## Running the diff runner manually + +```bash +# Build the Rust binary first; the integration test references it via +# CARGO_BIN_EXE_burn so cargo handles wiring as long as we go through it. +cargo build --workspace + +# Pre-Wave-2: every invocation is enabled=false so this is a fast no-op. +BURN_GOLDEN=1 cargo test --test golden -- --nocapture + +# To prove the runner actually fails against a stub: temporarily flip one +# invocation to enabled=true and re-run; you'll get a unified diff between +# the snapshot and the stub binary's "not yet implemented" output. Revert +# the flag before committing. +``` + +## Adding a new snapshot + +1. Add an entry to `invocations.json` with `enabled: false`. +2. Run `pnpm run golden:capture` to regenerate snapshots. +3. Commit the new snapshot file plus the invocations.json change. +4. The Wave 2 PR that owns the matching command flips `enabled: true`. diff --git a/tests/fixtures/cli-golden/invocations.json b/tests/fixtures/cli-golden/invocations.json new file mode 100644 index 00000000..9baaa964 --- /dev/null +++ b/tests/fixtures/cli-golden/invocations.json @@ -0,0 +1,98 @@ +[ + { + "name": "summary", + "args": ["summary"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "summary-json", + "args": ["summary", "--json"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "hotspots", + "args": ["hotspots"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "hotspots-json", + "args": ["hotspots", "--json"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "overhead", + "args": ["overhead", "--project", "tests/fixtures/cli-golden/project"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "overhead-json", + "args": ["overhead", "--project", "tests/fixtures/cli-golden/project", "--json"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "overhead-trim", + "args": ["overhead", "trim", "--project", "tests/fixtures/cli-golden/project"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "overhead-trim-json", + "args": ["overhead", "trim", "--project", "tests/fixtures/cli-golden/project", "--json"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "compare", + "args": ["compare", "claude-sonnet-4-6,claude-haiku-4-5", "--include-partial"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "compare-json", + "args": ["compare", "claude-sonnet-4-6,claude-haiku-4-5", "--include-partial", "--json"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "state-status", + "args": ["state", "status"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "state-status-json", + "args": ["state", "status", "--json"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "ingest-help", + "args": ["ingest", "--help"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "run-help", + "args": ["run", "--help"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "mcp-server-help", + "args": ["mcp-server", "--help"], + "expectStatus": 0, + "enabled": false + }, + { + "name": "top-level-help", + "args": ["--help"], + "expectStatus": 0, + "enabled": false + } +] diff --git a/tests/fixtures/cli-golden/ledger/.gitignore b/tests/fixtures/cli-golden/ledger/.gitignore new file mode 100644 index 00000000..c858ae17 --- /dev/null +++ b/tests/fixtures/cli-golden/ledger/.gitignore @@ -0,0 +1,13 @@ +# archive.sqlite is rematerialized by `state status` / `summary` and is not +# the source of truth — it's a perf cache that the streaming-ledger fallback +# (RELAYBURN_ARCHIVE=0) can rebuild on demand. Keeping the binary out of git +# means snapshot regeneration doesn't churn a multi-KB blob on every run. +archive.sqlite +archive.sqlite-shm +archive.sqlite-wal +burn.sqlite +burn.sqlite-shm +burn.sqlite-wal +cursors.json +hwm.json +config.json diff --git a/tests/fixtures/cli-golden/ledger/ledger.content.idx b/tests/fixtures/cli-golden/ledger/ledger.content.idx new file mode 100644 index 00000000..a6722234 --- /dev/null +++ b/tests/fixtures/cli-golden/ledger/ledger.content.idx @@ -0,0 +1,7 @@ +27a09143e3948a0b +9f9d62037fdeeade +2052b66a199d64c6 +fe03a3443dbedb5d +0e6d32f4e4f21273 +9243aeee461b79bc +81b10a089446ffdb diff --git a/tests/fixtures/cli-golden/ledger/ledger.idx b/tests/fixtures/cli-golden/ledger/ledger.idx new file mode 100644 index 00000000..106674fd --- /dev/null +++ b/tests/fixtures/cli-golden/ledger/ledger.idx @@ -0,0 +1,17 @@ +c6b461a27b40e6f3 +1aaa0edf31ed2f84 +36b75860e5fbe6de +abf24545cb754ba3 +46af69d0d4bfed42 +c59b0c5b383b5f4a +57dbdd94eff6b250 +59b271cc21c6cace +39941f4e06ab451f +6eb61682f8617655 +75dc1224d5f114e8 +5af68a6e28a936dd +38b9f97690568ba7 +ceffb1401a35554f +453297eede8f7297 +3b44a9a0329eacda +8169a17f016785e4 diff --git a/tests/fixtures/cli-golden/ledger/ledger.jsonl b/tests/fixtures/cli-golden/ledger/ledger.jsonl new file mode 100644 index 00000000..bbdbb1f4 --- /dev/null +++ b/tests/fixtures/cli-golden/ledger/ledger.jsonl @@ -0,0 +1,18 @@ +{"v":1,"kind":"turn","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","messageId":"msg-c1-1","turnIndex":0,"ts":"2026-04-20T00:00:00.000Z","model":"claude-sonnet-4-6","project":"/tmp/golden-project","projectKey":"golden-project","usage":{"input":1500,"output":220,"reasoning":0,"cacheRead":5000,"cacheCreate5m":0,"cacheCreate1h":0},"toolCalls":[{"id":"tu-c1-r1","name":"Read","target":"/tmp/golden-project/src/foo.ts","argsHash":"r1"}],"fidelity":{"class":"full","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":true,"hasCacheCreateTokens":true,"hasToolCalls":true,"hasToolResultEvents":true,"hasSessionRelationships":true,"hasRawContent":true}},"activity":"coding","hasEdits":false}} +{"v":1,"kind":"turn","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","messageId":"msg-c1-2","turnIndex":1,"ts":"2026-04-20T00:01:00.000Z","model":"claude-sonnet-4-6","project":"/tmp/golden-project","projectKey":"golden-project","usage":{"input":1800,"output":350,"reasoning":0,"cacheRead":6000,"cacheCreate5m":200,"cacheCreate1h":0},"toolCalls":[{"id":"tu-c1-e1","name":"Edit","target":"/tmp/golden-project/src/foo.ts","argsHash":"e1","editPreHash":"pre1","editPostHash":"post1"}],"fidelity":{"class":"full","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":true,"hasCacheCreateTokens":true,"hasToolCalls":true,"hasToolResultEvents":true,"hasSessionRelationships":true,"hasRawContent":true}},"activity":"coding","hasEdits":true}} +{"v":1,"kind":"turn","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","messageId":"msg-c1-3","turnIndex":2,"ts":"2026-04-20T00:02:00.000Z","model":"claude-sonnet-4-6","project":"/tmp/golden-project","projectKey":"golden-project","usage":{"input":1200,"output":180,"reasoning":0,"cacheRead":7000,"cacheCreate5m":0,"cacheCreate1h":0},"toolCalls":[{"id":"tu-c1-b1","name":"Bash","target":"npm test","argsHash":"b1"}],"fidelity":{"class":"full","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":true,"hasCacheCreateTokens":true,"hasToolCalls":true,"hasToolResultEvents":true,"hasSessionRelationships":true,"hasRawContent":true}},"activity":"testing","hasEdits":false}} +{"v":1,"kind":"turn","record":{"v":1,"source":"claude-code","sessionId":"22222222-2222-2222-2222-222222222222","messageId":"msg-c2-1","turnIndex":0,"ts":"2026-04-21T00:00:00.000Z","model":"claude-haiku-4-5","project":"/tmp/golden-project","projectKey":"golden-project","usage":{"input":900,"output":120,"reasoning":0,"cacheRead":2000,"cacheCreate5m":0,"cacheCreate1h":0},"toolCalls":[{"id":"tu-c2-e1","name":"Edit","target":"/tmp/golden-project/src/bar.ts","argsHash":"e2","editPreHash":"pre2","editPostHash":"post2"}],"fidelity":{"class":"full","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":true,"hasCacheCreateTokens":true,"hasToolCalls":true,"hasToolResultEvents":true,"hasSessionRelationships":true,"hasRawContent":true}},"activity":"coding","hasEdits":true}} +{"v":1,"kind":"turn","record":{"v":1,"source":"claude-code","sessionId":"22222222-2222-2222-2222-222222222222","messageId":"msg-c2-2","turnIndex":1,"ts":"2026-04-21T00:01:00.000Z","model":"claude-haiku-4-5","project":"/tmp/golden-project","projectKey":"golden-project","usage":{"input":800,"output":100,"reasoning":0,"cacheRead":2500,"cacheCreate5m":0,"cacheCreate1h":0},"toolCalls":[{"id":"tu-c2-r1","name":"Read","target":"/tmp/golden-project/src/bar.ts","argsHash":"r2"}],"fidelity":{"class":"full","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":true,"hasCacheCreateTokens":true,"hasToolCalls":true,"hasToolResultEvents":true,"hasSessionRelationships":true,"hasRawContent":true}},"activity":"review","hasEdits":false}} +{"v":1,"kind":"turn","record":{"v":1,"source":"codex","sessionId":"sess_30000000000000000000000000000003","messageId":"msg-cdx-1","turnIndex":0,"ts":"2026-04-22T00:00:00.000Z","model":"gpt-5-codex","project":"/tmp/golden-project","projectKey":"golden-project","activity":"coding","hasEdits":false,"toolCalls":[{"id":"tu-cdx-1","name":"shell","target":"cargo build","argsHash":"sh1"}],"usage":{"input":2000,"output":400,"reasoning":350,"cacheRead":0,"cacheCreate5m":0,"cacheCreate1h":0},"fidelity":{"class":"partial","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":false,"hasCacheCreateTokens":false,"hasToolCalls":true,"hasToolResultEvents":false,"hasSessionRelationships":false,"hasRawContent":false}}}} +{"v":1,"kind":"turn","record":{"v":1,"source":"opencode","sessionId":"ses_40000000000000000000000000000004","messageId":"msg-opn-1","turnIndex":0,"ts":"2026-04-23T00:00:00.000Z","model":"claude-sonnet-4-6","project":"/tmp/golden-project","projectKey":"golden-project","activity":"delegation","hasEdits":false,"toolCalls":[{"id":"tu-opn-task","name":"Task","target":"review the foo module","argsHash":"tk1"}],"usage":{"input":600,"output":80,"reasoning":0,"cacheRead":1500,"cacheCreate5m":0,"cacheCreate1h":0},"fidelity":{"class":"full","granularity":"per-turn","coverage":{"hasInputTokens":true,"hasOutputTokens":true,"hasReasoningTokens":true,"hasCacheReadTokens":true,"hasCacheCreateTokens":true,"hasToolCalls":true,"hasToolResultEvents":true,"hasSessionRelationships":true,"hasRawContent":true}}}} +{"v":1,"kind":"stamp","ts":"2026-04-23T12:00:00.000Z","selector":{"sessionId":"11111111-1111-1111-1111-111111111111"},"enrichment":{"workflowId":"wf-golden"}} +{"v":1,"kind":"user_turn","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","userUuid":"u-c1-pre-msg-1","ts":"2026-04-20T00:00:00.000Z","followingMessageId":"msg-c1-1","blocks":[{"kind":"text","byteLen":32,"approxTokens":8}]}} +{"v":1,"kind":"user_turn","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","userUuid":"u-c1-pre-msg-2","ts":"2026-04-20T00:00:30.000Z","precedingMessageId":"msg-c1-1","followingMessageId":"msg-c1-2","blocks":[{"kind":"tool_result","toolUseId":"tu-c1-r1","byteLen":4000,"approxTokens":1000}]}} +{"v":1,"kind":"user_turn","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","userUuid":"u-c1-pre-msg-3","ts":"2026-04-20T00:01:30.000Z","precedingMessageId":"msg-c1-2","followingMessageId":"msg-c1-3","blocks":[{"kind":"tool_result","toolUseId":"tu-c1-e1","byteLen":800,"approxTokens":200}]}} +{"v":1,"kind":"tool_result_event","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","toolUseId":"tu-c1-r1","ts":"2026-04-20T00:00:30.000Z","eventSource":"transcript","status":"completed","contentLength":4000}} +{"v":1,"kind":"tool_result_event","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","toolUseId":"tu-c1-e1","ts":"2026-04-20T00:01:30.000Z","eventSource":"transcript","status":"completed","contentLength":800}} +{"v":1,"kind":"tool_result_event","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","toolUseId":"tu-c1-b1","ts":"2026-04-20T00:02:30.000Z","eventSource":"transcript","status":"completed","contentLength":1200}} +{"v":1,"kind":"relationship","record":{"v":1,"source":"claude-code","sessionId":"11111111-1111-1111-1111-111111111111","relationshipType":"root","ts":"2026-04-20T00:00:00.000Z"}} +{"v":1,"kind":"relationship","record":{"v":1,"source":"claude-code","sessionId":"22222222-2222-2222-2222-222222222222","relationshipType":"root","ts":"2026-04-21T00:00:00.000Z"}} +{"v":1,"kind":"relationship","record":{"v":1,"source":"codex","sessionId":"sess_30000000000000000000000000000003","relationshipType":"root","ts":"2026-04-22T00:00:00.000Z"}} +{"v":1,"kind":"relationship","record":{"v":1,"source":"opencode","sessionId":"ses_40000000000000000000000000000004","relationshipType":"root","ts":"2026-04-23T00:00:00.000Z"}} diff --git a/tests/fixtures/cli-golden/project/CLAUDE.md b/tests/fixtures/cli-golden/project/CLAUDE.md new file mode 100644 index 00000000..53cb420d --- /dev/null +++ b/tests/fixtures/cli-golden/project/CLAUDE.md @@ -0,0 +1,23 @@ +# Golden Fixture CLAUDE.md + +This file exists so `burn overhead` has at least one overhead-eligible file +to surface in the snapshot. Lines below are stable, headed sections so +`burn overhead trim` has visible structure to recommend pruning over. + +## Conventions + +- Use `node --test` for tests. +- Prefer the SDK over hand-rolling ledger reads. + +## Frequently asked + +- Q: where does the ledger live? A: under `${RELAYBURN_HOME}` or `~/.relayburn`. +- Q: how do I add a harness? A: drop an adapter in `packages/cli/src/harnesses/`. + +## Long-tail trivia + +A deliberately wordy section so the trim recommender has size to bite into: +the goal here is just to give the per-section token estimate something larger +than the surrounding sections, not to communicate any real content. +The phrase "lorem ipsum" appears here for tokenization volume only. +Nothing in this section is load-bearing for the rest of the project. diff --git a/tests/fixtures/cli-golden/scripts/build-ledger.mjs b/tests/fixtures/cli-golden/scripts/build-ledger.mjs new file mode 100644 index 00000000..8f90af33 --- /dev/null +++ b/tests/fixtures/cli-golden/scripts/build-ledger.mjs @@ -0,0 +1,397 @@ +// Build the deterministic CLI-golden fixture ledger. +// +// Writes a synthetic ledger to ${RELAYBURN_HOME} that exercises: +// - all three readers/sources: claude-code, codex, opencode +// - multiple sessions per source, multiple turns per session +// - tool-call shapes the activity classifier + hotspots care about +// (Read, Edit, Bash, Task) so `compare` produces non-empty buckets +// - a stamp so workflow-id filtering has something to bind to +// +// All token counts, timestamps, message ids, session ids, and project paths +// are hard-coded so re-running the script always produces a byte-identical +// ledger. The Wave 2 PRs that un-ignore the golden test must avoid drifting +// these values without also refreshing the snapshots. +// +// Usage: +// RELAYBURN_HOME=tests/fixtures/cli-golden/ledger \ +// node tests/fixtures/cli-golden/scripts/build-ledger.mjs + +import { readFile, rm, writeFile } from 'node:fs/promises'; +import * as path from 'node:path'; + +import { + appendTurns, + appendUserTurns, + appendToolResultEvents, + appendRelationships, + ledgerHome, + ledgerPath, + stamp, +} from '@relayburn/ledger'; + +// stamp() writes ts: new Date().toISOString() — non-deterministic. We +// stamp first, then post-process the ledger to substitute the stamp's +// drifting `ts` with a fixed timestamp so re-running the script produces +// a byte-identical ledger. Keeps the rest of the pipeline (reader, +// archive, indexes) on the supported public API. +const STAMP_FIXED_TS = '2026-04-23T12:00:00.000Z'; + +const HOME = ledgerHome(); + +// Wipe any prior generation so re-runs are reproducible. We only delete +// known-burn files inside HOME to avoid clobbering an unrelated dir if a +// caller pointed RELAYBURN_HOME somewhere wrong. +const FILES = [ + 'ledger.jsonl', + 'ledger.idx', + 'ledger.content.idx', + 'cursors.json', + 'hwm.json', + 'config.json', + 'archive.sqlite', + 'archive.sqlite-shm', + 'archive.sqlite-wal', + 'burn.sqlite', + 'burn.sqlite-shm', + 'burn.sqlite-wal', +]; +for (const name of FILES) { + await rm(`${HOME}/${name}`, { force: true }); +} +await rm(`${HOME}/content`, { recursive: true, force: true }); + +console.error(`[fixture] writing to ${HOME}`); + +// Two coverage shapes we reuse: full per-turn coverage (used by Claude turns +// so `hotspots` attribution doesn't refuse) and a partial Codex coverage +// (no per-turn cache breakdown, no tool-result events). The shapes mirror +// what the real readers emit today; if the readers' output drifts, refresh +// these to match. +const FULL_COVERAGE = { + hasInputTokens: true, + hasOutputTokens: true, + hasReasoningTokens: true, + hasCacheReadTokens: true, + hasCacheCreateTokens: true, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: true, + hasRawContent: true, +}; +const CODEX_COVERAGE = { + hasInputTokens: true, + hasOutputTokens: true, + hasReasoningTokens: true, + hasCacheReadTokens: false, + hasCacheCreateTokens: false, + hasToolCalls: true, + hasToolResultEvents: false, + hasSessionRelationships: false, + hasRawContent: false, +}; + +const CLAUDE_SESSION_A = '11111111-1111-1111-1111-111111111111'; +const CLAUDE_SESSION_B = '22222222-2222-2222-2222-222222222222'; +const CODEX_SESSION = 'sess_30000000000000000000000000000003'; +const OPENCODE_SESSION = 'ses_40000000000000000000000000000004'; + +/** + * @param {Partial} overrides + */ +function turn(overrides) { + return { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + messageId: 'msg-1', + turnIndex: 0, + ts: '2026-04-20T00:00:00.000Z', + model: 'claude-sonnet-4-6', + project: '/tmp/golden-project', + projectKey: 'golden-project', + usage: { + input: 1000, + output: 200, + reasoning: 0, + cacheRead: 5000, + cacheCreate5m: 0, + cacheCreate1h: 0, + }, + toolCalls: [], + fidelity: { class: 'full', granularity: 'per-turn', coverage: FULL_COVERAGE }, + ...overrides, + }; +} + +// --- Claude session A — coding workflow with edits + reads ----------------- +await appendTurns([ + turn({ + sessionId: CLAUDE_SESSION_A, + messageId: 'msg-c1-1', + turnIndex: 0, + ts: '2026-04-20T00:00:00.000Z', + model: 'claude-sonnet-4-6', + activity: 'coding', + hasEdits: false, + toolCalls: [ + { id: 'tu-c1-r1', name: 'Read', target: '/tmp/golden-project/src/foo.ts', argsHash: 'r1' }, + ], + usage: { + input: 1500, output: 220, reasoning: 0, + cacheRead: 5000, cacheCreate5m: 0, cacheCreate1h: 0, + }, + }), + turn({ + sessionId: CLAUDE_SESSION_A, + messageId: 'msg-c1-2', + turnIndex: 1, + ts: '2026-04-20T00:01:00.000Z', + model: 'claude-sonnet-4-6', + activity: 'coding', + hasEdits: true, + toolCalls: [ + { + id: 'tu-c1-e1', + name: 'Edit', + target: '/tmp/golden-project/src/foo.ts', + argsHash: 'e1', + editPreHash: 'pre1', + editPostHash: 'post1', + }, + ], + usage: { + input: 1800, output: 350, reasoning: 0, + cacheRead: 6000, cacheCreate5m: 200, cacheCreate1h: 0, + }, + }), + turn({ + sessionId: CLAUDE_SESSION_A, + messageId: 'msg-c1-3', + turnIndex: 2, + ts: '2026-04-20T00:02:00.000Z', + model: 'claude-sonnet-4-6', + activity: 'testing', + hasEdits: false, + toolCalls: [ + { id: 'tu-c1-b1', name: 'Bash', target: 'npm test', argsHash: 'b1' }, + ], + usage: { + input: 1200, output: 180, reasoning: 0, + cacheRead: 7000, cacheCreate5m: 0, cacheCreate1h: 0, + }, + }), +]); + +// --- Claude session B — same model A + a haiku turn (compare needs ≥2 models) +await appendTurns([ + turn({ + sessionId: CLAUDE_SESSION_B, + messageId: 'msg-c2-1', + turnIndex: 0, + ts: '2026-04-21T00:00:00.000Z', + model: 'claude-haiku-4-5', + activity: 'coding', + hasEdits: true, + toolCalls: [ + { + id: 'tu-c2-e1', + name: 'Edit', + target: '/tmp/golden-project/src/bar.ts', + argsHash: 'e2', + editPreHash: 'pre2', + editPostHash: 'post2', + }, + ], + usage: { + input: 900, output: 120, reasoning: 0, + cacheRead: 2000, cacheCreate5m: 0, cacheCreate1h: 0, + }, + }), + turn({ + sessionId: CLAUDE_SESSION_B, + messageId: 'msg-c2-2', + turnIndex: 1, + ts: '2026-04-21T00:01:00.000Z', + model: 'claude-haiku-4-5', + activity: 'review', + hasEdits: false, + toolCalls: [ + { id: 'tu-c2-r1', name: 'Read', target: '/tmp/golden-project/src/bar.ts', argsHash: 'r2' }, + ], + usage: { + input: 800, output: 100, reasoning: 0, + cacheRead: 2500, cacheCreate5m: 0, cacheCreate1h: 0, + }, + }), +]); + +// --- Codex session — codex source + reasoning tokens, partial coverage ---- +await appendTurns([ + { + v: 1, + source: 'codex', + sessionId: CODEX_SESSION, + messageId: 'msg-cdx-1', + turnIndex: 0, + ts: '2026-04-22T00:00:00.000Z', + model: 'gpt-5-codex', + project: '/tmp/golden-project', + projectKey: 'golden-project', + activity: 'coding', + hasEdits: false, + toolCalls: [ + { id: 'tu-cdx-1', name: 'shell', target: 'cargo build', argsHash: 'sh1' }, + ], + usage: { + input: 2000, output: 400, reasoning: 350, + cacheRead: 0, cacheCreate5m: 0, cacheCreate1h: 0, + }, + fidelity: { class: 'partial', granularity: 'per-turn', coverage: CODEX_COVERAGE }, + }, +]); + +// --- OpenCode session — opencode source + a Task spawn (subagent stub) ---- +await appendTurns([ + { + v: 1, + source: 'opencode', + sessionId: OPENCODE_SESSION, + messageId: 'msg-opn-1', + turnIndex: 0, + ts: '2026-04-23T00:00:00.000Z', + model: 'claude-sonnet-4-6', + project: '/tmp/golden-project', + projectKey: 'golden-project', + activity: 'delegation', + hasEdits: false, + toolCalls: [ + { id: 'tu-opn-task', name: 'Task', target: 'review the foo module', argsHash: 'tk1' }, + ], + usage: { + input: 600, output: 80, reasoning: 0, + cacheRead: 1500, cacheCreate5m: 0, cacheCreate1h: 0, + }, + fidelity: { class: 'full', granularity: 'per-turn', coverage: FULL_COVERAGE }, + }, +]); + +// --- Stamp — workflow attribution for `--workflow` filtering -------------- +await stamp({ sessionId: CLAUDE_SESSION_A }, { workflowId: 'wf-golden' }); + +// --- User turns — let hotspots attribute Read/Edit on session A by size --- +await appendUserTurns([ + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + userUuid: 'u-c1-pre-msg-1', + ts: '2026-04-20T00:00:00.000Z', + followingMessageId: 'msg-c1-1', + blocks: [{ kind: 'text', byteLen: 32, approxTokens: 8 }], + }, + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + userUuid: 'u-c1-pre-msg-2', + ts: '2026-04-20T00:00:30.000Z', + precedingMessageId: 'msg-c1-1', + followingMessageId: 'msg-c1-2', + blocks: [ + { kind: 'tool_result', toolUseId: 'tu-c1-r1', byteLen: 4000, approxTokens: 1000 }, + ], + }, + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + userUuid: 'u-c1-pre-msg-3', + ts: '2026-04-20T00:01:30.000Z', + precedingMessageId: 'msg-c1-2', + followingMessageId: 'msg-c1-3', + blocks: [ + { kind: 'tool_result', toolUseId: 'tu-c1-e1', byteLen: 800, approxTokens: 200 }, + ], + }, +]); + +// --- Tool-result events — keeps hotspots attribution out of refusal mode -- +await appendToolResultEvents([ + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + toolUseId: 'tu-c1-r1', + ts: '2026-04-20T00:00:30.000Z', + eventSource: 'transcript', + status: 'completed', + contentLength: 4000, + }, + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + toolUseId: 'tu-c1-e1', + ts: '2026-04-20T00:01:30.000Z', + eventSource: 'transcript', + status: 'completed', + contentLength: 800, + }, + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + toolUseId: 'tu-c1-b1', + ts: '2026-04-20T00:02:30.000Z', + eventSource: 'transcript', + status: 'completed', + contentLength: 1200, + }, +]); + +// --- Relationships — rooted Claude sessions + a subagent edge from opencode +await appendRelationships([ + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_A, + relationshipType: 'root', + ts: '2026-04-20T00:00:00.000Z', + }, + { + v: 1, + source: 'claude-code', + sessionId: CLAUDE_SESSION_B, + relationshipType: 'root', + ts: '2026-04-21T00:00:00.000Z', + }, + { + v: 1, + source: 'codex', + sessionId: CODEX_SESSION, + relationshipType: 'root', + ts: '2026-04-22T00:00:00.000Z', + }, + { + v: 1, + source: 'opencode', + sessionId: OPENCODE_SESSION, + relationshipType: 'root', + ts: '2026-04-23T00:00:00.000Z', + }, +]); + +// Substitute the stamp's wall-clock ts for the fixed value so the ledger +// hashes the same on every run. Other ledger lines have hand-pinned ts +// values already; only stamp() inserts a live timestamp. +const ledgerFile = ledgerPath(); +const raw = await readFile(ledgerFile, 'utf8'); +const rewritten = raw.replace( + /("kind":"stamp","ts":")[^"]+(")/g, + `$1${STAMP_FIXED_TS}$2`, +); +if (rewritten !== raw) { + await writeFile(ledgerFile, rewritten); +} + +console.error('[fixture] done'); diff --git a/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs b/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs new file mode 100644 index 00000000..7ebb5e8e --- /dev/null +++ b/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs @@ -0,0 +1,139 @@ +#!/usr/bin/env node +// Re-run every TS-CLI invocation in invocations.json against the fixture +// ledger and write the captured stdout/stderr to snapshots/. +// +// The ledger is rebuilt fresh on every run (build-ledger.mjs), then the CLI +// is shelled out from packages/cli/dist/cli.js with a sealed env: +// - RELAYBURN_HOME points at tests/fixtures/cli-golden/ledger +// - HOME points at a tmp dir with no .claude / .codex / .local trees, so +// ingestAll's session-store sweep finds zero work +// - RELAYBURN_CONTENT_STORE=off so the content sidecar isn't materialized +// - RELAYBURN_ARCHIVE_AUTOBUILD=0 so summary doesn't autobuild the archive +// +// Snapshots are written verbatim from stdout, with two normalizations: +// 1. the absolute fixture HOME path becomes ${RELAYBURN_HOME} +// 2. the absolute fixture project path becomes ${PROJECT} +// Wave 2 PRs comparing Rust output do the same substitution before diffing +// so snapshots stay portable across machines / CI runners. + +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import * as path from 'node:path'; +import { spawnSync } from 'node:child_process'; +import { fileURLToPath } from 'node:url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.resolve(__dirname, '..', '..', '..', '..'); +const FIXTURE_DIR = path.resolve(__dirname, '..'); +const LEDGER_HOME = path.join(FIXTURE_DIR, 'ledger'); +const PROJECT_DIR = path.join(FIXTURE_DIR, 'project'); +const SNAPSHOT_DIR = path.join(FIXTURE_DIR, 'snapshots'); +const INVOCATIONS = path.join(FIXTURE_DIR, 'invocations.json'); +const CLI_PATH = path.join(ROOT, 'packages', 'cli', 'dist', 'cli.js'); + +await mkdir(LEDGER_HOME, { recursive: true }); +await mkdir(SNAPSHOT_DIR, { recursive: true }); + +// Step 1 — wipe + rebuild the fixture ledger. +console.error(`[capture] (re)building fixture ledger at ${LEDGER_HOME}`); +const buildResult = spawnSync( + process.execPath, + [path.join(__dirname, 'build-ledger.mjs')], + { + encoding: 'utf8', + env: { ...process.env, RELAYBURN_HOME: LEDGER_HOME, RELAYBURN_CONTENT_STORE: 'off' }, + stdio: ['ignore', 'inherit', 'inherit'], + }, +); +if (buildResult.status !== 0) { + process.stderr.write(`[capture] build-ledger failed (status=${buildResult.status})\n`); + process.exit(1); +} + +// Step 2 — sealed HOME with no agent session stores. ingestAll's listDirs +// returns [] for missing dirs so this keeps every read-path command's "ingest" +// preamble at "ingested 0 new sessions" without needing to mock it out. +const SEALED_HOME = await mkdtemp(path.join(tmpdir(), 'burn-golden-home-')); + +// Step 3 — load the invocations contract and run each. +const invocations = JSON.parse(await readFile(INVOCATIONS, 'utf8')); + +let failures = 0; +for (const inv of invocations) { + const args = inv.args; + const env = { + ...process.env, + HOME: SEALED_HOME, + RELAYBURN_HOME: LEDGER_HOME, + RELAYBURN_CONTENT_STORE: 'off', + // Force the streaming-ledger fallback in `burn summary` / `burn compare`. + // The archive is a perf optimization that materializes a SQLite mirror; + // its build path can hit binding errors on hand-rolled fixtures, and + // either way the output is meant to be identical to the streaming path. + // Wave 2 PRs porting the Rust commands likewise won't have an archive + // implementation on day one. + RELAYBURN_ARCHIVE: '0', + NO_COLOR: '1', + FORCE_COLOR: '0', + ...(inv.env ?? {}), + }; + console.error(`[capture] ${inv.name}: burn ${args.join(' ')}`); + const result = spawnSync(process.execPath, [CLI_PATH, ...args], { + encoding: 'utf8', + env, + cwd: ROOT, + timeout: 30_000, + }); + if (result.error) { + process.stderr.write(`[capture] ${inv.name}: spawn error ${result.error.message}\n`); + failures++; + continue; + } + const expectedStatus = typeof inv.expectStatus === 'number' ? inv.expectStatus : 0; + if (result.status !== expectedStatus) { + process.stderr.write( + `[capture] ${inv.name}: expected status ${expectedStatus}, got ${result.status}\n` + + ` stderr:\n${result.stderr}\n`, + ); + failures++; + continue; + } + const stdout = normalize(result.stdout, LEDGER_HOME, PROJECT_DIR); + const stderr = normalize(result.stderr, LEDGER_HOME, PROJECT_DIR); + + await writeFile(path.join(SNAPSHOT_DIR, `${inv.name}.stdout.txt`), stdout); + if (stderr.length > 0) { + await writeFile(path.join(SNAPSHOT_DIR, `${inv.name}.stderr.txt`), stderr); + } else { + await rm(path.join(SNAPSHOT_DIR, `${inv.name}.stderr.txt`), { force: true }); + } +} + +await rm(SEALED_HOME, { recursive: true, force: true }); + +if (failures > 0) { + process.stderr.write(`[capture] ${failures} invocation(s) failed\n`); + process.exit(1); +} +console.error('[capture] done'); + +/** + * Replace the absolute LEDGER_HOME path with the placeholder ${RELAYBURN_HOME} + * and the absolute project path with ${PROJECT}, so snapshots are portable + * across machines / CI runners. The diff runner applies the same substitution + * before comparing. Wall-clock millisecond fields in the `state status --json` + * shape (`ledgerMtimeMsCurrent`, `lastBuiltAt`, `lastRebuildAt`) are squashed + * to a stable placeholder for the same reason. + */ +function normalize(text, ledgerHome, projectDir) { + let out = text.replaceAll(ledgerHome, '${RELAYBURN_HOME}').replaceAll(projectDir, '${PROJECT}'); + // Squash wall-clock millisecond fields — they're load-bearing for cache + // invalidation but have no business in a golden snapshot. + out = out.replaceAll( + /"ledgerMtimeMsCurrent":\s*\d+/g, + '"ledgerMtimeMsCurrent": "${MTIME}"', + ); + out = out.replaceAll(/"lastBuiltAt":\s*\d+/g, '"lastBuiltAt": "${TS}"'); + out = out.replaceAll(/"lastRebuildAt":\s*\d+/g, '"lastRebuildAt": "${TS}"'); + return out; +} diff --git a/tests/fixtures/cli-golden/snapshots/compare-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/compare-json.stdout.txt new file mode 100644 index 00000000..62479361 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/compare-json.stdout.txt @@ -0,0 +1,184 @@ +{ + "analyzedTurns": 7, + "minSample": 5, + "models": [ + "claude-sonnet-4-6", + "claude-haiku-4-5" + ], + "categories": [ + "coding", + "delegation", + "review", + "testing" + ], + "totals": { + "claude-sonnet-4-6": { + "turns": 4, + "totalCost": 0.03435 + }, + "claude-haiku-4-5": { + "turns": 2, + "totalCost": 0.0032500000000000003 + } + }, + "cells": [ + { + "model": "claude-sonnet-4-6", + "category": "coding", + "turns": 2, + "editTurns": 1, + "oneShotTurns": 1, + "pricedTurns": 2, + "totalCost": 0.0225, + "costPerTurn": 0.01125, + "oneShotRate": 1, + "cacheHitRate": 0.7586, + "medianRetries": 0, + "noData": false, + "insufficientSample": true + }, + { + "model": "claude-sonnet-4-6", + "category": "delegation", + "turns": 1, + "editTurns": 0, + "oneShotTurns": 0, + "pricedTurns": 1, + "totalCost": 0.00345, + "costPerTurn": 0.00345, + "oneShotRate": null, + "cacheHitRate": 0.7143, + "medianRetries": null, + "noData": false, + "insufficientSample": true + }, + { + "model": "claude-sonnet-4-6", + "category": "review", + "turns": 0, + "editTurns": 0, + "oneShotTurns": 0, + "pricedTurns": 0, + "totalCost": 0, + "costPerTurn": null, + "oneShotRate": null, + "cacheHitRate": null, + "medianRetries": null, + "noData": true, + "insufficientSample": false + }, + { + "model": "claude-sonnet-4-6", + "category": "testing", + "turns": 1, + "editTurns": 0, + "oneShotTurns": 0, + "pricedTurns": 1, + "totalCost": 0.0084, + "costPerTurn": 0.0084, + "oneShotRate": null, + "cacheHitRate": 0.8537, + "medianRetries": null, + "noData": false, + "insufficientSample": true + }, + { + "model": "claude-haiku-4-5", + "category": "coding", + "turns": 1, + "editTurns": 1, + "oneShotTurns": 1, + "pricedTurns": 1, + "totalCost": 0.0017, + "costPerTurn": 0.0017, + "oneShotRate": 1, + "cacheHitRate": 0.6897, + "medianRetries": 0, + "noData": false, + "insufficientSample": true + }, + { + "model": "claude-haiku-4-5", + "category": "delegation", + "turns": 0, + "editTurns": 0, + "oneShotTurns": 0, + "pricedTurns": 0, + "totalCost": 0, + "costPerTurn": null, + "oneShotRate": null, + "cacheHitRate": null, + "medianRetries": null, + "noData": true, + "insufficientSample": false + }, + { + "model": "claude-haiku-4-5", + "category": "review", + "turns": 1, + "editTurns": 0, + "oneShotTurns": 0, + "pricedTurns": 1, + "totalCost": 0.00155, + "costPerTurn": 0.00155, + "oneShotRate": null, + "cacheHitRate": 0.7576, + "medianRetries": null, + "noData": false, + "insufficientSample": true + }, + { + "model": "claude-haiku-4-5", + "category": "testing", + "turns": 0, + "editTurns": 0, + "oneShotTurns": 0, + "pricedTurns": 0, + "totalCost": 0, + "costPerTurn": null, + "oneShotRate": null, + "cacheHitRate": null, + "medianRetries": null, + "noData": true, + "insufficientSample": false + } + ], + "fidelity": { + "minimum": "partial", + "excluded": { + "total": 0, + "aggregateOnly": 0, + "costOnly": 0, + "partial": 0, + "usageOnly": 0 + }, + "summary": { + "total": 7, + "byClass": { + "full": 6, + "usage-only": 0, + "aggregate-only": 0, + "cost-only": 0, + "partial": 1 + }, + "byGranularity": { + "per-turn": 7, + "per-message": 0, + "per-session-aggregate": 0, + "cost-only": 0 + }, + "missingCoverage": { + "hasInputTokens": 0, + "hasOutputTokens": 0, + "hasReasoningTokens": 0, + "hasCacheReadTokens": 1, + "hasCacheCreateTokens": 1, + "hasToolCalls": 0, + "hasToolResultEvents": 1, + "hasSessionRelationships": 1, + "hasRawContent": 1 + }, + "unknown": 0 + } + } +} diff --git a/tests/fixtures/cli-golden/snapshots/compare.stdout.txt b/tests/fixtures/cli-golden/snapshots/compare.stdout.txt new file mode 100644 index 00000000..cd373930 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/compare.stdout.txt @@ -0,0 +1,21 @@ + +turns analyzed: 7 + + claude-sonnet-4-6 claude-haiku-4-5 +Activity Turns Cost/turn 1-shot Turns Cost/turn 1-shot +coding 2 $0.011 100% 1 $0.0017 100% +delegation 1 $0.0034 — — — — +review — — — 1 $0.0015 — +testing 1 $0.0084 — — — — + + low claude-sonnet-4-6 sample in 'coding' (2 turns < 5) — treat as indicative. + low claude-haiku-4-5 sample in 'coding' (1 turns < 5) — treat as indicative. + low claude-sonnet-4-6 sample in 'delegation' (1 turns < 5) — treat as indicative. + no claude-haiku-4-5 data in 'delegation' — no comparison available. + no claude-sonnet-4-6 data in 'review' — no comparison available. + low claude-haiku-4-5 sample in 'review' (1 turns < 5) — treat as indicative. + low claude-sonnet-4-6 sample in 'testing' (1 turns < 5) — treat as indicative. + no claude-haiku-4-5 data in 'testing' — no comparison available. + +claude-sonnet-4-6: 4 turns, $0.034 total +claude-haiku-4-5: 2 turns, $0.0033 total diff --git a/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt new file mode 100644 index 00000000..14a24cc1 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt @@ -0,0 +1,118 @@ +{ + "turnsAnalyzed": 6, + "grandTotal": 0.0376, + "attributedTotal": 0.004775000000000001, + "unattributedTotal": 0.032825, + "attributionDegraded": true, + "sessions": [ + { + "sessionId": "11111111-1111-1111-1111-111111111111", + "grandCost": 0.030899999999999997, + "attributedCost": 0.003975, + "unattributedCost": 0.026924999999999998, + "attributionMethod": "sized" + }, + { + "sessionId": "22222222-2222-2222-2222-222222222222", + "grandCost": 0.0032500000000000003, + "attributedCost": 0.0008, + "unattributedCost": 0.0024500000000000004, + "attributionMethod": "even-split" + }, + { + "sessionId": "ses_40000000000000000000000000000004", + "grandCost": 0.00345, + "attributedCost": 0, + "unattributedCost": 0.00345, + "attributionMethod": "even-split" + } + ], + "files": [ + { + "path": "/tmp/golden-project/src/foo.ts", + "toolCallCount": 2, + "initialTokens": 1200, + "persistenceTokens": 1000, + "ridingTurns": 1, + "totalCost": 0.003975, + "firstEmitTs": "2026-04-20T00:00:00.000Z", + "firstEmitTurnIndex": 0 + }, + { + "path": "/tmp/golden-project/src/bar.ts", + "toolCallCount": 2, + "initialTokens": 800, + "persistenceTokens": 0, + "ridingTurns": 0, + "totalCost": 0.0008, + "firstEmitTs": "2026-04-21T00:00:00.000Z", + "firstEmitTurnIndex": 0 + } + ], + "bashVerbs": [ + { + "verb": "npm test", + "callCount": 1, + "distinctCommands": 1, + "totalCost": 0, + "initialTokens": 0, + "persistenceTokens": 0, + "avgPersistenceTurns": 0, + "topExamples": [ + "npm test" + ] + } + ], + "bash": [ + { + "argsHash": "b1", + "command": "npm test", + "callCount": 1, + "totalCost": 0, + "initialTokens": 0, + "persistenceTokens": 0 + } + ], + "subagents": [ + { + "subagentType": "review the foo module", + "callCount": 1, + "totalCost": 0, + "initialTokens": 0, + "persistenceTokens": 0 + } + ], + "fidelity": { + "analyzed": 6, + "excluded": 1, + "summary": { + "total": 7, + "byClass": { + "full": 6, + "usage-only": 0, + "aggregate-only": 0, + "cost-only": 0, + "partial": 1 + }, + "byGranularity": { + "per-turn": 7, + "per-message": 0, + "per-session-aggregate": 0, + "cost-only": 0 + }, + "missingCoverage": { + "hasInputTokens": 0, + "hasOutputTokens": 0, + "hasReasoningTokens": 0, + "hasCacheReadTokens": 1, + "hasCacheCreateTokens": 1, + "hasToolCalls": 0, + "hasToolResultEvents": 1, + "hasSessionRelationships": 1, + "hasRawContent": 1 + }, + "unknown": 0 + }, + "refused": false + } +} diff --git a/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt b/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt new file mode 100644 index 00000000..8dfa7c80 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt @@ -0,0 +1,30 @@ + +turns analyzed: 6 +analyzed 6 of 7 turns; 1 excluded for missing tool-result events, per-turn granularity (codex) +session grand total: $0.038 + +⚠ attribution is degraded: 2 of 3 sessions (66.7%) have no sized + tool-result data, so file / bash / subagent costs for those sessions are approximate + (even-split over turn N+1 input/cacheCreate). Run 'burn state rebuild content' + to backfill source-derived sizes, or see 'burn state' for + why capture is disabled. + +attributed ≈ $0.0048 (approximate — see above) +unattributed $0.033 (output, system overhead, untracked) + +Top files by cumulative cost (approximate) +path firstTurn initial(tok) persist(tok) rideTurns cost %attr +/tmp/golden-project/src/foo.ts 0 1,200 1,000 1 $0.0040 83.2% +/tmp/golden-project/src/bar.ts 0 800 0 0 $0.0008 16.8% + +Top Bash verbs by cost (approximate) +verb calls commands initial(tok) persist(tok) avgRide cost examples +npm test 1 1 0 0 0.0 $0.00 npm test + +Top exact Bash commands by cost (approximate) +command calls initial(tok) persist(tok) cost +npm test 1 0 0 $0.00 + +Top subagent calls by cost (approximate) +subagent calls initial(tok) persist(tok) cost +review the foo module 1 0 0 $0.00 diff --git a/tests/fixtures/cli-golden/snapshots/ingest-help.stdout.txt b/tests/fixtures/cli-golden/snapshots/ingest-help.stdout.txt new file mode 100644 index 00000000..ad62ec1c --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/ingest-help.stdout.txt @@ -0,0 +1,11 @@ +burn ingest — incremental ingest from agent session stores + +Usage: + burn ingest [--quiet] + burn ingest --watch [--interval ] [--quiet] [--opencode-stream] [--opencode-url ] + burn ingest --hook claude [--quiet] + +Default mode scans Claude Code, Codex, and OpenCode session stores once. +--watch keeps that scan loop running in the foreground. +--hook claude reads a Claude Code hook payload JSON from stdin and ingests the +transcript it references. Safe to call from every Claude Code hook. diff --git a/tests/fixtures/cli-golden/snapshots/mcp-server-help.stdout.txt b/tests/fixtures/cli-golden/snapshots/mcp-server-help.stdout.txt new file mode 100644 index 00000000..6017fcae --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/mcp-server-help.stdout.txt @@ -0,0 +1,11 @@ +burn mcp-server — stdio MCP server exposing read-only ledger queries + +Usage: + burn mcp-server [--session-id ] + +Registers tools for in-session self-query by an agent that was spawned with +this server attached via Claude Code's --mcp-config (see buildMcpConfig in +@relayburn/mcp). Tools default to the session id baked into the command line. + +Tools: + burn__sessionCost { sessionId? } → total USD / tokens / turns / models diff --git a/tests/fixtures/cli-golden/snapshots/overhead-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/overhead-json.stdout.txt new file mode 100644 index 00000000..b26db002 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/overhead-json.stdout.txt @@ -0,0 +1,126 @@ +{ + "project": "${PROJECT}", + "files": [ + { + "kind": "claude-md", + "path": "${PROJECT}/CLAUDE.md", + "appliesTo": [ + "claude-code" + ], + "totalLines": 23, + "bytes": 908, + "tokens": 227, + "sections": [ + { + "heading": "(preamble)", + "level": 0, + "startLine": 1, + "endLine": 6, + "bytes": 246, + "tokens": 62 + }, + { + "heading": "## Conventions", + "level": 2, + "startLine": 7, + "endLine": 11, + "bytes": 97, + "tokens": 25 + }, + { + "heading": "## Frequently asked", + "level": 2, + "startLine": 12, + "endLine": 16, + "bytes": 185, + "tokens": 47 + }, + { + "heading": "## Long-tail trivia", + "level": 2, + "startLine": 17, + "endLine": 23, + "bytes": 380, + "tokens": 95 + } + ], + "groupingLevel": 2 + } + ], + "perFile": [ + { + "path": "${PROJECT}/CLAUDE.md", + "kind": "claude-md", + "appliesTo": [ + "claude-code" + ], + "attribution": { + "totalTokens": 227, + "totalCost": 0, + "sessionCosts": [], + "sectionCosts": [ + { + "filePath": "${PROJECT}/CLAUDE.md", + "section": { + "heading": "(preamble)", + "level": 0, + "startLine": 1, + "endLine": 6, + "bytes": 246, + "tokens": 62 + }, + "tokenShare": 0.2709251101321586, + "costPerSession": 0, + "totalCost": 0 + }, + { + "filePath": "${PROJECT}/CLAUDE.md", + "section": { + "heading": "## Conventions", + "level": 2, + "startLine": 7, + "endLine": 11, + "bytes": 97, + "tokens": 25 + }, + "tokenShare": 0.10682819383259912, + "costPerSession": 0, + "totalCost": 0 + }, + { + "filePath": "${PROJECT}/CLAUDE.md", + "section": { + "heading": "## Frequently asked", + "level": 2, + "startLine": 12, + "endLine": 16, + "bytes": 185, + "tokens": 47 + }, + "tokenShare": 0.20374449339207049, + "costPerSession": 0, + "totalCost": 0 + }, + { + "filePath": "${PROJECT}/CLAUDE.md", + "section": { + "heading": "## Long-tail trivia", + "level": 2, + "startLine": 17, + "endLine": 23, + "bytes": 380, + "tokens": 95 + }, + "tokenShare": 0.4185022026431718, + "costPerSession": 0, + "totalCost": 0 + } + ], + "perSessionAvg": 0, + "perSessionP95": 0, + "sessionCount": 0 + } + } + ], + "grandTotal": 0 +} diff --git a/tests/fixtures/cli-golden/snapshots/overhead-trim-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/overhead-trim-json.stdout.txt new file mode 100644 index 00000000..ac3a0cd5 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/overhead-trim-json.stdout.txt @@ -0,0 +1,73 @@ +{ + "project": "${PROJECT}", + "since": "all time", + "recommendations": [ + { + "file": "CLAUDE.md", + "kind": "claude-md", + "appliesTo": [ + "claude-code" + ], + "section": { + "heading": "## Conventions", + "startLine": 7, + "endLine": 11, + "tokens": 25 + }, + "projectedSavings": { + "perSessionUsd": 0, + "acrossWindowUsd": 0, + "tokens": 25, + "tokenShare": 0.10682819383259912 + }, + "diff": "# TRIM: ## Conventions\n# projected savings per session: $0.0000\n# projected savings across window: $0.0000\n--- a/CLAUDE.md\n+++ b/CLAUDE.md\n@@ -7,5 +7,0 @@\n-## Conventions\n-\n-- Use `node --test` for tests.\n-- Prefer the SDK over hand-rolling ledger reads.\n-" + }, + { + "file": "CLAUDE.md", + "kind": "claude-md", + "appliesTo": [ + "claude-code" + ], + "section": { + "heading": "## Frequently asked", + "startLine": 12, + "endLine": 16, + "tokens": 47 + }, + "projectedSavings": { + "perSessionUsd": 0, + "acrossWindowUsd": 0, + "tokens": 47, + "tokenShare": 0.20374449339207049 + }, + "diff": "# TRIM: ## Frequently asked\n# projected savings per session: $0.0000\n# projected savings across window: $0.0000\n--- a/CLAUDE.md\n+++ b/CLAUDE.md\n@@ -12,5 +12,0 @@\n-## Frequently asked\n-\n-- Q: where does the ledger live? A: under `${RELAYBURN_HOME}` or `~/.relayburn`.\n-- Q: how do I add a harness? A: drop an adapter in `packages/cli/src/harnesses/`.\n-" + }, + { + "file": "CLAUDE.md", + "kind": "claude-md", + "appliesTo": [ + "claude-code" + ], + "section": { + "heading": "## Long-tail trivia", + "startLine": 17, + "endLine": 23, + "tokens": 95 + }, + "projectedSavings": { + "perSessionUsd": 0, + "acrossWindowUsd": 0, + "tokens": 95, + "tokenShare": 0.4185022026431718 + }, + "diff": "# TRIM: ## Long-tail trivia\n# projected savings per session: $0.0000\n# projected savings across window: $0.0000\n--- a/CLAUDE.md\n+++ b/CLAUDE.md\n@@ -17,7 +17,0 @@\n-## Long-tail trivia\n-\n-A deliberately wordy section so the trim recommender has size to bite into:\n-the goal here is just to give the per-section token estimate something larger\n-than the surrounding sections, not to communicate any real content.\n-The phrase \"lorem ipsum\" appears here for tokenization volume only.\n-Nothing in this section is load-bearing for the rest of the project." + } + ], + "summary": { + "filesAnalyzed": 1, + "filesWithRecommendations": 1, + "totalRecommendations": 3, + "totalProjectedSavingsPerSession": 0, + "totalProjectedSavingsAcrossWindow": 0 + } +} diff --git a/tests/fixtures/cli-golden/snapshots/overhead-trim.stdout.txt b/tests/fixtures/cli-golden/snapshots/overhead-trim.stdout.txt new file mode 100644 index 00000000..dfcba5ca --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/overhead-trim.stdout.txt @@ -0,0 +1,42 @@ +# burn overhead trim — projected savings if trimmed +# (recommendations only; burn never modifies your overhead files) + +# === CLAUDE.md (applies to: claude-code) === + +# TRIM: ## Conventions +# projected savings per session: $0.0000 +# projected savings across window: $0.0000 +--- a/CLAUDE.md ++++ b/CLAUDE.md +@@ -7,5 +7,0 @@ +-## Conventions +- +-- Use `node --test` for tests. +-- Prefer the SDK over hand-rolling ledger reads. +- + +# TRIM: ## Frequently asked +# projected savings per session: $0.0000 +# projected savings across window: $0.0000 +--- a/CLAUDE.md ++++ b/CLAUDE.md +@@ -12,5 +12,0 @@ +-## Frequently asked +- +-- Q: where does the ledger live? A: under `${RELAYBURN_HOME}` or `~/.relayburn`. +-- Q: how do I add a harness? A: drop an adapter in `packages/cli/src/harnesses/`. +- + +# TRIM: ## Long-tail trivia +# projected savings per session: $0.0000 +# projected savings across window: $0.0000 +--- a/CLAUDE.md ++++ b/CLAUDE.md +@@ -17,7 +17,0 @@ +-## Long-tail trivia +- +-A deliberately wordy section so the trim recommender has size to bite into: +-the goal here is just to give the per-section token estimate something larger +-than the surrounding sections, not to communicate any real content. +-The phrase "lorem ipsum" appears here for tokenization volume only. +-Nothing in this section is load-bearing for the rest of the project. diff --git a/tests/fixtures/cli-golden/snapshots/overhead.stdout.txt b/tests/fixtures/cli-golden/snapshots/overhead.stdout.txt new file mode 100644 index 00000000..0fe25fa6 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/overhead.stdout.txt @@ -0,0 +1,7 @@ + +Overhead files in ${PROJECT}: + +CLAUDE.md (tests/fixtures/cli-golden/project/CLAUDE.md) — 23 lines, ~227 tokens — applies to: claude-code + no matching sessions in window. + +Grand total (all overhead files, all time): $0.00 diff --git a/tests/fixtures/cli-golden/snapshots/run-help.stdout.txt b/tests/fixtures/cli-golden/snapshots/run-help.stdout.txt new file mode 100644 index 00000000..1fef452d --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/run-help.stdout.txt @@ -0,0 +1,11 @@ +burn run — spawn an agent harness with attribution + +Usage: + burn run [--tag k=v ...] [-- ] + +Known harnesses: claude, codex, opencode + +Examples: + burn run claude --tag workflow=refactor -- --resume + burn run codex --tag workflow=refactor + burn run opencode --tag workflow=refactor diff --git a/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt new file mode 100644 index 00000000..025bac08 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt @@ -0,0 +1,49 @@ +{ + "index": { + "ids": { + "path": "${RELAYBURN_HOME}/ledger.idx", + "exists": true, + "bytes": 289, + "entries": 17 + }, + "content": { + "path": "${RELAYBURN_HOME}/ledger.content.idx", + "exists": true, + "bytes": 119, + "entries": 7 + } + }, + "content": { + "path": "${RELAYBURN_HOME}/content", + "exists": false, + "files": 0, + "sessions": 0, + "bytes": 0, + "userTurns": 3 + }, + "classifier": { + "turns": 7, + "classified": 7, + "missing": 0 + }, + "archive": { + "archivePath": "${RELAYBURN_HOME}/archive.sqlite", + "exists": true, + "archiveVersion": 3, + "ledgerOffsetBytes": 0, + "ledgerMtimeMs": 0, + "ledgerSizeBytes": 8229, + "ledgerMtimeMsCurrent": "${MTIME}", + "upToDate": false, + "lastBuiltAt": null, + "lastRebuildAt": null, + "rowCounts": { + "sessions": 0, + "turns": 0, + "toolCalls": 0, + "toolResultEvents": 0, + "compactions": 0 + }, + "fidelityHistogram": {} + } +} diff --git a/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt b/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt new file mode 100644 index 00000000..4d71374d --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt @@ -0,0 +1,19 @@ +derived state: +index: + id index: 17 hashes, 289 bytes at ${RELAYBURN_HOME}/ledger.idx + content index: 7 fingerprints, 119 bytes at ${RELAYBURN_HOME}/ledger.content.idx +content: + status: not built yet at ${RELAYBURN_HOME}/content + sidecars: 0 files, 0 non-empty sessions, 0 bytes + user turns: 3 ledger rows +classifier: + turns: 7 classified / 7 total (complete) +archive: ${RELAYBURN_HOME}/archive.sqlite + schema version: 3 + ledger cursor: 0 / 8,229 bytes (tail pending) + rows: + sessions: 0 + turns: 0 + tool_calls: 0 + tool_result_events: 0 + compactions: 0 diff --git a/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt new file mode 100644 index 00000000..b124e200 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt @@ -0,0 +1,194 @@ +{ + "ingest": { + "ingestedSessions": 0, + "appendedTurns": 0 + }, + "turns": 7, + "totalCost": { + "model": "aggregate", + "total": 0.0441, + "input": 0.0195, + "output": 0.01755, + "reasoning": 0, + "cacheRead": 0.0063, + "cacheCreate": 0.00075 + }, + "byModel": [ + { + "model": "claude-sonnet-4-6", + "turns": 4, + "usage": { + "input": 5100, + "output": 830, + "reasoning": 0, + "cacheRead": 19500, + "cacheCreate5m": 200, + "cacheCreate1h": 0 + }, + "cost": { + "model": "claude-sonnet-4-6", + "total": 0.03435, + "input": 0.015300000000000001, + "output": 0.01245, + "reasoning": 0, + "cacheRead": 0.00585, + "cacheCreate": 0.00075 + } + }, + { + "model": "gpt-5-codex", + "turns": 1, + "usage": { + "input": 2000, + "output": 400, + "reasoning": 350, + "cacheRead": 0, + "cacheCreate5m": 0, + "cacheCreate1h": 0 + }, + "cost": { + "model": "gpt-5-codex", + "total": 0.006500000000000001, + "input": 0.0025, + "output": 0.004, + "reasoning": 0, + "cacheRead": 0, + "cacheCreate": 0 + } + }, + { + "model": "claude-haiku-4-5", + "turns": 2, + "usage": { + "input": 1700, + "output": 220, + "reasoning": 0, + "cacheRead": 4500, + "cacheCreate5m": 0, + "cacheCreate1h": 0 + }, + "cost": { + "model": "claude-haiku-4-5", + "total": 0.0032500000000000003, + "input": 0.0017000000000000001, + "output": 0.0011, + "reasoning": 0, + "cacheRead": 0.00045, + "cacheCreate": 0 + } + } + ], + "fidelity": { + "summary": { + "total": 7, + "byClass": { + "full": 6, + "usage-only": 0, + "aggregate-only": 0, + "cost-only": 0, + "partial": 1 + }, + "byGranularity": { + "per-turn": 7, + "per-message": 0, + "per-session-aggregate": 0, + "cost-only": 0 + }, + "missingCoverage": { + "hasInputTokens": 0, + "hasOutputTokens": 0, + "hasReasoningTokens": 0, + "hasCacheReadTokens": 1, + "hasCacheCreateTokens": 1, + "hasToolCalls": 0, + "hasToolResultEvents": 1, + "hasSessionRelationships": 1, + "hasRawContent": 1 + }, + "unknown": 0 + }, + "perCell": { + "groupBy": "model", + "cells": [ + { + "label": "claude-sonnet-4-6", + "partial": false, + "fields": { + "input": { + "known": 4, + "missing": 0 + }, + "output": { + "known": 4, + "missing": 0 + }, + "reasoning": { + "known": 4, + "missing": 0 + }, + "cacheRead": { + "known": 4, + "missing": 0 + }, + "cacheCreate": { + "known": 4, + "missing": 0 + } + } + }, + { + "label": "gpt-5-codex", + "partial": true, + "fields": { + "input": { + "known": 1, + "missing": 0 + }, + "output": { + "known": 1, + "missing": 0 + }, + "reasoning": { + "known": 1, + "missing": 0 + }, + "cacheRead": { + "known": 0, + "missing": 1 + }, + "cacheCreate": { + "known": 0, + "missing": 1 + } + } + }, + { + "label": "claude-haiku-4-5", + "partial": false, + "fields": { + "input": { + "known": 2, + "missing": 0 + }, + "output": { + "known": 2, + "missing": 0 + }, + "reasoning": { + "known": 2, + "missing": 0 + }, + "cacheRead": { + "known": 2, + "missing": 0 + }, + "cacheCreate": { + "known": 2, + "missing": 0 + } + } + } + ] + } + } +} diff --git a/tests/fixtures/cli-golden/snapshots/summary.stdout.txt b/tests/fixtures/cli-golden/snapshots/summary.stdout.txt new file mode 100644 index 00000000..1a015f57 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/summary.stdout.txt @@ -0,0 +1,14 @@ + +ingested 0 new sessions (+0 turns) + +turns analyzed: 7 + +model turns input output reasoning cacheRead cacheCreate cost +claude-sonnet-4-6 4 5,100 830 0 19,500 200 $0.034 +gpt-5-codex 1 2,000 400 350 — — $0.0065 +claude-haiku-4-5 2 1,700 220 0 4,500 0 $0.0033 + +total cost: $0.044 + input $0.019 / output $0.018 / reasoning $0.00 / cacheRead $0.0063 / cacheCreate $0.0008 + +fidelity: 6 full / 1 partial (use --json for per-field coverage) diff --git a/tests/fixtures/cli-golden/snapshots/top-level-help.stdout.txt b/tests/fixtures/cli-golden/snapshots/top-level-help.stdout.txt new file mode 100644 index 00000000..965d84b5 --- /dev/null +++ b/tests/fixtures/cli-golden/snapshots/top-level-help.stdout.txt @@ -0,0 +1,51 @@ +burn — token usage & cost attribution for agent CLIs + +Usage: + burn summary [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--provider

] [--quality] + [--by-provider | --by-tool | --by-subagent-type | --by-relationship[=subagent] | --subagent-tree ] [--no-archive] + (mode flags are mutually exclusive; --by-tool emits tool | calls | attributedCost) + burn hotspots [--since 7d] [--project ] [--workflow ] [--provider

] [--all] [--json] + [--session [id]] [--explain-drift] + [--patterns[=retries,failures,compaction,reverts]] [--findings] + burn overhead [trim] [--project ] [--since 7d] [--kind ] [--top ] [--json] + burn compare [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--json|--csv] + burn run [--tag k=v ...] [-- ] + burn ingest [--watch|--hook ] [--interval ] [--quiet] + burn mcp-server [--session-id ] (stdio MCP server for in-session self-query) + burn state [status] [--json] + burn state rebuild index | classify | content | archive [--full|--vacuum] | all + burn state prune [--days ] [--force] + burn state reset [--force] [--reingest] [--json] + +Examples: + burn summary --since 24h + burn summary --by-provider --provider synthetic + burn summary --subagent-tree + burn summary --by-subagent-type --since 7d + burn summary --by-relationship --since 7d + burn summary --by-tool --since 7d + burn hotspots --since 7d + burn hotspots --patterns --since 7d + burn hotspots --session --explain-drift + burn hotspots --session + burn overhead --since 30d + burn overhead --kind claude-md + burn overhead trim --top 3 + burn overhead trim --json + burn compare claude-sonnet-4-6,claude-haiku-4-5 --since 30d + burn run claude --tag workflow=refactor -- --resume + burn run codex --tag workflow=refactor + burn run opencode --tag workflow=refactor + burn ingest + burn ingest --watch + burn ingest --watch --opencode-stream + burn state + burn state prune --days 30 + burn state rebuild archive + burn state rebuild archive --full + burn state rebuild archive vacuum + burn state rebuild classify + +Provider filters are query-time only. Synthetic-routed models are recognized +from hf:*, accounts/fireworks/models/*, and synthetic/* model IDs and are +reported as provider "synthetic" without rewriting ledger rows. From 1ef101c8a85a2d547102c789ea912190397b9133 Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Wed, 6 May 2026 08:34:16 -0400 Subject: [PATCH 2/3] cli-golden: squash_numeric_field consumes all ASCII whitespace (review fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The byte-level skip loop only ate ' ' and '\t', so a newline or other ASCII whitespace before a numeric value would have stopped normalization and produced a false golden mismatch. Match the JS capture path's `\s*\d+` semantics by consuming the full ASCII whitespace set (space, tab, LF, CR, VT, FF) — `char::is_ascii_whitespace` is *not* equivalent (it excludes U+000B vertical tab), so list the bytes explicitly. Adds unit tests covering tab, newline+indent, CR/VT/FF, and the non-numeric bail. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/relayburn-cli/tests/golden.rs | 51 +++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/crates/relayburn-cli/tests/golden.rs b/crates/relayburn-cli/tests/golden.rs index aac89a0b..db33847a 100644 --- a/crates/relayburn-cli/tests/golden.rs +++ b/crates/relayburn-cli/tests/golden.rs @@ -253,7 +253,15 @@ fn squash_numeric_field(text: &str, key: &str, placeholder: &str) -> String { out.push_str(&rest[..idx]); out.push_str(&needle); let after_key = &rest[idx + needle.len()..]; - let trimmed_start = after_key.trim_start_matches(|c: char| c == ' ' || c == '\t'); + // Mirror the JS capture path's `\s*\d+` semantics. JS's `\s` matches + // the full ASCII whitespace set (space, tab, LF, CR, VT, FF) plus + // some Unicode spaces; JSON is ASCII at this layer so the byte set + // below is the right scope. NB: `char::is_ascii_whitespace` is *not* + // equivalent — it excludes U+000B (vertical tab), which JS `\s` does + // match, so we list the bytes explicitly. + let trimmed_start = after_key.trim_start_matches(|c: char| { + matches!(c, ' ' | '\t' | '\n' | '\r' | '\x0b' | '\x0c') + }); let ws_consumed = after_key.len() - trimmed_start.len(); // If the value isn't a bare integer (e.g. `null`), bail and emit // the original bytes untouched. @@ -336,3 +344,44 @@ fn tempdir_under(parent: &Path) -> PathBuf { fs::create_dir_all(&dir).expect("create sealed HOME"); dir } + +#[cfg(test)] +mod tests { + use super::squash_numeric_field; + + #[test] + fn squash_numeric_field_matches_space_and_tab() { + let input = "{\"lastBuiltAt\": 12345,\"lastRebuildAt\":\t67890}"; + let out = squash_numeric_field(input, "lastBuiltAt", "${TS}"); + let out = squash_numeric_field(&out, "lastRebuildAt", "${TS}"); + assert_eq!( + out, + "{\"lastBuiltAt\": \"${TS}\",\"lastRebuildAt\": \"${TS}\"}" + ); + } + + #[test] + fn squash_numeric_field_matches_newline_and_indent() { + // Matches the JS regex `\s*\d+` semantics — if a formatter ever + // pretty-prints a numeric field across a line break, the runner + // still has to normalize it. + let input = "{\"lastBuiltAt\":\n 12345}"; + let out = squash_numeric_field(input, "lastBuiltAt", "${TS}"); + assert_eq!(out, "{\"lastBuiltAt\": \"${TS}\"}"); + } + + #[test] + fn squash_numeric_field_matches_carriage_return_and_other_ws() { + // CR, vertical tab, form feed — all in `\s` and all ASCII whitespace. + let input = "{\"lastBuiltAt\":\r\n\x0b\x0c 12345}"; + let out = squash_numeric_field(input, "lastBuiltAt", "${TS}"); + assert_eq!(out, "{\"lastBuiltAt\": \"${TS}\"}"); + } + + #[test] + fn squash_numeric_field_leaves_non_numeric_value_untouched() { + let input = r#"{"lastBuiltAt": null}"#; + let out = squash_numeric_field(input, "lastBuiltAt", "${TS}"); + assert_eq!(out, input); + } +} From 9d5d1737c325922bd97393dec1105c5c79f3a754 Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Wed, 6 May 2026 08:47:26 -0400 Subject: [PATCH 3/3] cli-golden: HOME safety guard + fail-loud normalization + portable snapshot paths (review fixes round 2) - build-ledger.mjs: refuse to run unless the resolved ledger home is inside an allowlisted prefix (the in-repo fixture dir, $RUNNER_TEMP, or $TMPDIR). Aborts before any rm() so a missing RELAYBURN_HOME can no longer wipe a developer's real ledger. - build-ledger.mjs: hard-fail if the stamp ts normalization regex misses, instead of silently shipping a non-deterministic ledger. - golden.rs: BURN_GOLDEN!=1 now returns immediately so the skip path is truly fixture-free. Per-iteration gate check is dead code and gone. - capture-snapshots.mjs + golden.rs: normalize the synthetic /tmp/golden-project path to \${FIXTURE_PROJECT} on both sides so absolute-looking paths stay out of golden snapshots. - README.md: tag the layout fence as text for MD040. --- crates/relayburn-cli/tests/golden.rs | 18 ++++-- tests/fixtures/cli-golden/README.md | 2 +- .../cli-golden/scripts/build-ledger.mjs | 56 +++++++++++++++++-- .../cli-golden/scripts/capture-snapshots.mjs | 18 +++++- .../snapshots/hotspots-json.stdout.txt | 4 +- .../cli-golden/snapshots/hotspots.stdout.txt | 4 +- 6 files changed, 84 insertions(+), 18 deletions(-) diff --git a/crates/relayburn-cli/tests/golden.rs b/crates/relayburn-cli/tests/golden.rs index db33847a..15bd1152 100644 --- a/crates/relayburn-cli/tests/golden.rs +++ b/crates/relayburn-cli/tests/golden.rs @@ -60,17 +60,19 @@ struct Invocation { #[test] fn golden_diff_against_ts_cli_snapshots() { - let golden_gate = std::env::var("BURN_GOLDEN").ok(); - if golden_gate.as_deref() != Some("1") { + if std::env::var("BURN_GOLDEN").ok().as_deref() != Some("1") { // CI runs `cargo test --workspace` without BURN_GOLDEN set, so the // diff runner is silent there. Local devs run `BURN_GOLDEN=1 // cargo test --test golden -- --nocapture` to enforce the gate; // once Wave 2 finishes, the gate flips on by default in CI. + // Return early so an unset BURN_GOLDEN truly skips — no fixture + // discovery, no snapshot reads, no env-prep work. eprintln!( "[golden] BURN_GOLDEN!=1 — skipping (set BURN_GOLDEN=1 to enforce). \ Even when enforced, individual invocations stay skipped until their \ `enabled: true` flag is set in invocations.json." ); + return; } let fixture_dir = repo_root().join("tests").join("fixtures").join("cli-golden"); @@ -106,10 +108,8 @@ fn golden_diff_against_ts_cli_snapshots() { eprintln!("[golden] skip {} (enabled=false)", inv.name); continue; } - if golden_gate.as_deref() != Some("1") { - eprintln!("[golden] skip {} (BURN_GOLDEN!=1)", inv.name); - continue; - } + // The whole-test BURN_GOLDEN!=1 short-circuit at the top returned + // before this loop, so by the time we get here the gate is set. let snapshot_stdout = snapshots_dir.join(format!("{}.stdout.txt", inv.name)); let expected_stdout = fs::read_to_string(&snapshot_stdout).unwrap_or_else(|err| { @@ -227,6 +227,11 @@ fn burn_binary_path() -> PathBuf { /// Apply the same path / mtime placeholders the capture script uses so the /// snapshot stays portable across machines. Keep this in sync with /// `tests/fixtures/cli-golden/scripts/capture-snapshots.mjs::normalize`. +/// +/// The synthetic ledger embeds `/tmp/golden-project` as a fake project / +/// tool-target path; we substitute it here too so the Rust binary's output +/// matches the snapshot byte-for-byte regardless of how the path appears +/// on the host (it's a literal in the JSON, not a real filesystem path). fn normalize(text: &str, ledger_home: &Path, project_dir: &Path) -> String { let mut out = text.replace( ledger_home.to_str().expect("ledger home is utf8"), @@ -236,6 +241,7 @@ fn normalize(text: &str, ledger_home: &Path, project_dir: &Path) -> String { project_dir.to_str().expect("project dir is utf8"), "${PROJECT}", ); + out = out.replace("/tmp/golden-project", "${FIXTURE_PROJECT}"); out = squash_numeric_field(&out, "ledgerMtimeMsCurrent", "${MTIME}"); out = squash_numeric_field(&out, "lastBuiltAt", "${TS}"); out = squash_numeric_field(&out, "lastRebuildAt", "${TS}"); diff --git a/tests/fixtures/cli-golden/README.md b/tests/fixtures/cli-golden/README.md index 92125a9f..ee67d33a 100644 --- a/tests/fixtures/cli-golden/README.md +++ b/tests/fixtures/cli-golden/README.md @@ -8,7 +8,7 @@ no-op until Wave 2 flips invocations on one at a time. ## Layout -``` +```text tests/fixtures/cli-golden/ ├── README.md — you are here ├── invocations.json — args + sealed env per snapshot; the contract diff --git a/tests/fixtures/cli-golden/scripts/build-ledger.mjs b/tests/fixtures/cli-golden/scripts/build-ledger.mjs index 8f90af33..72411723 100644 --- a/tests/fixtures/cli-golden/scripts/build-ledger.mjs +++ b/tests/fixtures/cli-golden/scripts/build-ledger.mjs @@ -17,7 +17,9 @@ // node tests/fixtures/cli-golden/scripts/build-ledger.mjs import { readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; import * as path from 'node:path'; +import { fileURLToPath } from 'node:url'; import { appendTurns, @@ -38,6 +40,37 @@ const STAMP_FIXED_TS = '2026-04-23T12:00:00.000Z'; const HOME = ledgerHome(); +// Hard precondition: refuse to run unless the resolved ledger home is +// inside one of the known-safe prefixes. Without this guard, a missing +// RELAYBURN_HOME (which falls back to ~/.relayburn) plus the rm() loop +// below would happily wipe a developer's real ledger. The allowlist: +// - the in-repo fixture dir (tests/fixtures/cli-golden/) +// - the CI runner temp dir ($RUNNER_TEMP) +// - the OS temp dir (used by capture-snapshots' mkdtemp paths) +const FIXTURE_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..'); +const SAFE_PREFIXES = [ + path.resolve(FIXTURE_DIR), + path.resolve(tmpdir()), +]; +if (typeof process.env['RUNNER_TEMP'] === 'string' && process.env['RUNNER_TEMP'].length > 0) { + SAFE_PREFIXES.push(path.resolve(process.env['RUNNER_TEMP'])); +} +const RESOLVED_HOME = path.resolve(HOME); +const insideSafePrefix = SAFE_PREFIXES.some((prefix) => { + const rel = path.relative(prefix, RESOLVED_HOME); + return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel)); +}); +if (!insideSafePrefix) { + process.stderr.write( + `\n[fixture] REFUSING TO RUN: ledger home resolved to ${RESOLVED_HOME}\n` + + `[fixture] which is NOT inside any known-safe prefix:\n` + + SAFE_PREFIXES.map((p) => `[fixture] - ${p}\n`).join('') + + `[fixture] Set RELAYBURN_HOME to tests/fixtures/cli-golden/ledger (or a tmpdir)\n` + + `[fixture] before running this script. Aborting before any filesystem mutation.\n\n`, + ); + process.exit(2); +} + // Wipe any prior generation so re-runs are reproducible. We only delete // known-burn files inside HOME to avoid clobbering an unrelated dir if a // caller pointed RELAYBURN_HOME somewhere wrong. @@ -384,14 +417,25 @@ await appendRelationships([ // Substitute the stamp's wall-clock ts for the fixed value so the ledger // hashes the same on every run. Other ledger lines have hand-pinned ts // values already; only stamp() inserts a live timestamp. +// +// Hard-fail if the regex didn't match — a silent miss (e.g. JSON key +// reordering changed the serialization) would leave a non-deterministic +// stamp `ts` in the ledger, and downstream snapshots would drift on every +// run without an obvious cause. We stamped above, so the regex MUST hit. const ledgerFile = ledgerPath(); const raw = await readFile(ledgerFile, 'utf8'); -const rewritten = raw.replace( - /("kind":"stamp","ts":")[^"]+(")/g, - `$1${STAMP_FIXED_TS}$2`, -); -if (rewritten !== raw) { - await writeFile(ledgerFile, rewritten); +const stampPattern = /("kind":"stamp","ts":")[^"]+(")/g; +const rewritten = raw.replace(stampPattern, `$1${STAMP_FIXED_TS}$2`); +if (rewritten === raw) { + throw new Error( + `[fixture] stamp ts normalization regex did not match. The stamp() call ` + + `above should have written a "kind":"stamp" line with a "ts": field, ` + + `but the regex /${stampPattern.source}/ found nothing in ${ledgerFile}. ` + + `Has the stamp serialization shape changed (e.g. JSON key reordering)? ` + + `Update the regex to match the new shape — the alternative is a ` + + `non-deterministic ledger that drifts on every run.`, + ); } +await writeFile(ledgerFile, rewritten); console.error('[fixture] done'); diff --git a/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs b/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs index 7ebb5e8e..acd64d36 100644 --- a/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs +++ b/tests/fixtures/cli-golden/scripts/capture-snapshots.mjs @@ -30,6 +30,11 @@ const PROJECT_DIR = path.join(FIXTURE_DIR, 'project'); const SNAPSHOT_DIR = path.join(FIXTURE_DIR, 'snapshots'); const INVOCATIONS = path.join(FIXTURE_DIR, 'invocations.json'); const CLI_PATH = path.join(ROOT, 'packages', 'cli', 'dist', 'cli.js'); +// Synthetic "project" path hard-coded into the fixture ledger by +// build-ledger.mjs. We replace it with ${FIXTURE_PROJECT} at capture time +// so absolute-looking paths stay out of golden snapshots; the Rust diff +// runner mirrors this substitution. Keep in sync with build-ledger.mjs. +const FIXTURE_PROJECT = '/tmp/golden-project'; await mkdir(LEDGER_HOME, { recursive: true }); await mkdir(SNAPSHOT_DIR, { recursive: true }); @@ -124,9 +129,20 @@ console.error('[capture] done'); * before comparing. Wall-clock millisecond fields in the `state status --json` * shape (`ledgerMtimeMsCurrent`, `lastBuiltAt`, `lastRebuildAt`) are squashed * to a stable placeholder for the same reason. + * + * The synthetic ledger built by build-ledger.mjs hard-codes + * `/tmp/golden-project` as the fake project / tool-target path. Even though + * that string is deterministic (it's never read off disk), absolute-looking + * `/tmp/...` paths in a golden snapshot are confusing and would block + * cross-platform reuse if the fixture ever moved off Unix-style roots. We + * normalize it to ${FIXTURE_PROJECT} here and keep the Rust diff runner in + * sync (see crates/relayburn-cli/tests/golden.rs::normalize). */ function normalize(text, ledgerHome, projectDir) { - let out = text.replaceAll(ledgerHome, '${RELAYBURN_HOME}').replaceAll(projectDir, '${PROJECT}'); + let out = text + .replaceAll(ledgerHome, '${RELAYBURN_HOME}') + .replaceAll(projectDir, '${PROJECT}') + .replaceAll(FIXTURE_PROJECT, '${FIXTURE_PROJECT}'); // Squash wall-clock millisecond fields — they're load-bearing for cache // invalidation but have no business in a golden snapshot. out = out.replaceAll( diff --git a/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt index 14a24cc1..18ff9539 100644 --- a/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt +++ b/tests/fixtures/cli-golden/snapshots/hotspots-json.stdout.txt @@ -29,7 +29,7 @@ ], "files": [ { - "path": "/tmp/golden-project/src/foo.ts", + "path": "${FIXTURE_PROJECT}/src/foo.ts", "toolCallCount": 2, "initialTokens": 1200, "persistenceTokens": 1000, @@ -39,7 +39,7 @@ "firstEmitTurnIndex": 0 }, { - "path": "/tmp/golden-project/src/bar.ts", + "path": "${FIXTURE_PROJECT}/src/bar.ts", "toolCallCount": 2, "initialTokens": 800, "persistenceTokens": 0, diff --git a/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt b/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt index 8dfa7c80..563e6b18 100644 --- a/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt +++ b/tests/fixtures/cli-golden/snapshots/hotspots.stdout.txt @@ -14,8 +14,8 @@ unattributed $0.033 (output, system overhead, untracked) Top files by cumulative cost (approximate) path firstTurn initial(tok) persist(tok) rideTurns cost %attr -/tmp/golden-project/src/foo.ts 0 1,200 1,000 1 $0.0040 83.2% -/tmp/golden-project/src/bar.ts 0 800 0 0 $0.0008 16.8% +${FIXTURE_PROJECT}/src/foo.ts 0 1,200 1,000 1 $0.0040 83.2% +${FIXTURE_PROJECT}/src/bar.ts 0 800 0 0 $0.0008 16.8% Top Bash verbs by cost (approximate) verb calls commands initial(tok) persist(tok) avgRide cost examples