From 459aec276e29a4361ec91b32843f45d0400fcafd Mon Sep 17 00:00:00 2001
From: Will Washburn <will.washburn@gmail.com>
Date: Wed, 6 May 2026 10:41:48 -0400
Subject: [PATCH 1/2] relayburn-cli: burn compare presenter (#248 D3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire `burn compare <model_a,model_b[,...]>` as a presenter over
relayburn_sdk::analyze::compare's building blocks (build_compare_table
+ the per-turn fidelity gate); the SDK keeps the heavy lifting so a
future burn__compare MCP tool wraps the same call.

The CLI flag set mirrors the TS path 1:1 — positional comma-separated
model list, --include-partial / --fidelity / --since / --project /
--session / --workflow / --agent / --provider / --min-sample / --csv /
--no-archive / --json — and the wire shape (cells ordering, rounding
rules, fidelity-summary key order) goes byte-for-byte with the
TS-captured cli-golden snapshots. --workflow / --agent / --provider
short-circuit with a typed "not yet wired" error since the Rust ledger
query layer doesn't expose the enrichment / provider filter today;
the rest of the surface is fully wired.

The two compare entries in tests/fixtures/cli-golden/invocations.json
flip to enabled: true so BURN_GOLDEN=1 cargo test --test golden enforces
parity. Because the on-disk fixture is a TS-style JSONL ledger and the
Rust SDK reads SQLite, the golden runner now materializes a sibling
SQLite home from the JSONL once per test run and points the binary at
it; the import is loose-typed on tool_result_event / relationship /
stamp so a fixture extension surfaces a stderr breadcrumb instead of
panicking the whole runner. Smoke test loses `compare` from its
"stub exits 1" loop and gains an explicit "rejects missing models"
assertion against the wired path.
---
 CHANGELOG.md                                 |    1 +
 crates/relayburn-cli/src/cli.rs              |   73 +-
 crates/relayburn-cli/src/commands/compare.rs | 1058 +++++++++++++++++-
 crates/relayburn-cli/src/main.rs             |    2 +-
 crates/relayburn-cli/tests/golden.rs         |  147 ++-
 crates/relayburn-cli/tests/smoke.rs          |   30 +-
 crates/relayburn-sdk/src/lib.rs              |   20 +-
 tests/fixtures/cli-golden/invocations.json   |    4 +-
 8 files changed, 1311 insertions(+), 24 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 599e68c1..f62c2f06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@ Cross-package release notes for relayburn. Package changelogs contain package-le
 
 ## [Unreleased]
 
+- `relayburn-cli` (Rust): wire `burn compare` as a presenter over `relayburn_sdk::analyze::compare` building blocks (`build_compare_table` + the per-turn fidelity gate), matching the TS CLI flag set (positional comma-separated model list, `--include-partial` / `--fidelity` / `--since` / `--project` / `--session` / `--min-sample` / `--csv` / `--no-archive`) and producing byte-equivalent stdout for the cli-golden `compare` / `compare-json` invocations. (#248 D3)
 - `relayburn-sdk-node` (Rust): napi-rs bindings skeleton — `#[napi]` shims for every public verb in `relayburn-sdk` (`summary`, `sessionCost`, `overhead`, `overheadTrim`, `hotspots`, `search`, `exportLedger`, `exportStamps`, async `ingest`, plus `ledgerOpen`), with u64 token counts surfaced as JS `BigInt`, ISO-8601 timestamps as `String`, async verbs returning `Promise<T>`, and a typed `BurnError` mapping for SDK failures. (#247)
 - `relayburn-cli` (Rust): introduce the harness substrate — `HarnessAdapter` trait, lazy compile-time `phf` registry (`lookup` / `list_harness_names`), and the shared `pending_stamp::adapter` factory codex + opencode will reuse. Adapter slots in the registry are reserved but empty pending the Wave 2 PRs (#248-d/e/f). `relayburn-sdk` re-exports `start_watch_loop`, `WatchController`, `write_pending_stamp`, `PendingStampHarness`, and friends so the CLI doesn't have to reach into private SDK modules. (#248)
 - `relayburn-cli` (Rust): scaffold the clap v4 derive root with global `--json` / `--ledger-path` / `--no-color` flags, eight stub subcommands (`summary`, `hotspots`, `overhead`, `compare`, `run`, `state`, `ingest`, `mcp-server`), and shared `render::{table,json,error}` helpers. Stubs exit `1` with a `not yet implemented` message (or a `{"error": …}` envelope under `--json`); Wave 2 fan-out PRs replace each stub with a thin presenter over `relayburn-sdk`. (#248 part a)
diff --git a/crates/relayburn-cli/src/cli.rs b/crates/relayburn-cli/src/cli.rs
index cf2c522e..f01549a6 100644
--- a/crates/relayburn-cli/src/cli.rs
+++ b/crates/relayburn-cli/src/cli.rs
@@ -17,7 +17,7 @@
 
 use std::path::PathBuf;
 
-use clap::{Parser, Subcommand};
+use clap::{Args as ClapArgs, Parser, Subcommand};
 
 /// Parsed top-level argv — what every command handler receives via
 /// [`Args::globals`].
@@ -105,7 +105,7 @@ pub enum Command {
     Overhead,
 
     /// Compare cost across two or more models on the same workload.
-    Compare,
+    Compare(CompareArgs),
 
     /// Run an agent CLI under a harness wrapper that ingests its
     /// session log on exit.
@@ -122,3 +122,72 @@ pub enum Command {
     #[command(name = "mcp-server")]
     McpServer,
 }
+
+/// Per-command flag set for `burn compare`. Mirrors
+/// `packages/cli/src/commands/compare.ts` so the CLI surfaces match
+/// byte-for-byte; see that file for the canonical help text.
+///
+/// The first positional argument is a comma-separated model list
+/// (`claude-sonnet-4-6,claude-haiku-4-5`). The presenter rejects fewer
+/// than two distinct models with exit code 2 and a stderr message; this
+/// is enforced at runtime rather than by clap so we get the same error
+/// message shape as the TS CLI (`burn compare: needs at least 2
+/// models...`).
+#[derive(Debug, Clone, ClapArgs)]
+pub struct CompareArgs {
+    /// Comma-separated model list (e.g. `claude-sonnet-4-6,claude-haiku-4-5`).
+    /// Required at runtime — see the struct doc comment for the
+    /// minimum-models contract.
+    #[arg(value_name = "MODELS")]
+    pub models: Option<String>,
+
+    /// Comma-separated list of effective providers to include
+    /// (e.g. `synthetic,anthropic,openai`).
+    #[arg(long, value_name = "LIST")]
+    pub provider: Option<String>,
+
+    /// Relative range (e.g. `24h`, `7d`, `4w`) or ISO timestamp.
+    /// Defaults to all time.
+    #[arg(long, value_name = "WHEN")]
+    pub since: Option<String>,
+
+    /// Filter by project path or git-canonical projectKey.
+    #[arg(long, value_name = "PATH")]
+    pub project: Option<String>,
+
+    /// Filter by sessionId.
+    #[arg(long, value_name = "ID")]
+    pub session: Option<String>,
+
+    /// Filter by stamped workflowId.
+    #[arg(long, value_name = "ID")]
+    pub workflow: Option<String>,
+
+    /// Filter by stamped agentId.
+    #[arg(long, value_name = "ID")]
+    pub agent: Option<String>,
+
+    /// Insufficient-sample threshold; cells below this get flagged in
+    /// the coverage-notes block. Default 5.
+    #[arg(long = "min-sample", value_name = "N")]
+    pub min_sample: Option<u64>,
+
+    /// Minimum fidelity class to include
+    /// (`full | usage-only | aggregate-only | cost-only | partial`).
+    /// Default `usage-only`.
+    #[arg(long, value_name = "CLASS")]
+    pub fidelity: Option<String>,
+
+    /// Shorthand for `--fidelity partial`.
+    #[arg(long = "include-partial")]
+    pub include_partial: bool,
+
+    /// Emit a stable CSV with one row per (model, category) pair.
+    #[arg(long)]
+    pub csv: bool,
+
+    /// Bypass the SQLite archive and stream the ledger directly.
+    /// Honored when env `RELAYBURN_ARCHIVE=0`.
+    #[arg(long = "no-archive")]
+    pub no_archive: bool,
+}
diff --git a/crates/relayburn-cli/src/commands/compare.rs b/crates/relayburn-cli/src/commands/compare.rs
index 1ab513d6..bdc57344 100644
--- a/crates/relayburn-cli/src/commands/compare.rs
+++ b/crates/relayburn-cli/src/commands/compare.rs
@@ -1,12 +1,1054 @@
-//! `burn compare <model_a,model_b[,...]>` — compare cost across two or
-//! more models on the same workload.
+//! `burn compare <model_a,model_b[,...]>` — per-(model, activity) cost
+//! comparison table. Thin presenter over the
+//! `relayburn_sdk::analyze::compare` building blocks (`build_compare_table`
+//! plus `compare_from_archive`); the heavy lifting lives in the SDK so the
+//! MCP server can reuse it.
 //!
-//! Stub. Wave 2 D3 wires this up as a thin presenter over
-//! `relayburn_sdk::compare`. TS source of truth:
-//! `packages/cli/src/commands/compare.ts`.
+//! TS source of truth: `packages/cli/src/commands/compare.ts`. The wire
+//! shape (cells ordering, rounding rules, fidelity-summary key order)
+//! mirrors that file byte-for-byte against the cli-golden snapshot.
 
-use crate::cli::GlobalArgs;
+use std::collections::BTreeSet;
 
-pub fn run(globals: &GlobalArgs) -> i32 {
-    super::not_yet_implemented("compare", globals)
+use anyhow::{anyhow, Result};
+use relayburn_sdk::{
+    build_compare_table, has_minimum_fidelity, load_pricing, summarize_fidelity, CompareCell,
+    CompareOptions, CompareTable, EnrichedTurn, FidelityClass, FidelitySummary, Ledger,
+    LedgerOpenOptions, Query, UsageGranularity, DEFAULT_MIN_SAMPLE,
+};
+use serde_json::{json, Value};
+
+use crate::cli::{CompareArgs, GlobalArgs};
+use crate::render::error::{report_error, EXIT_GENERIC_ERROR};
+use crate::render::json::render_json;
+
+const FIDELITY_CHOICES: &[&str] = &[
+    "full",
+    "usage-only",
+    "aggregate-only",
+    "cost-only",
+    "partial",
+];
+
+const FIDELITY_ORDER: &[&str] = &[
+    "cost-only",
+    "aggregate-only",
+    "partial",
+    "usage-only",
+    "full",
+];
+
+const NEEDS_MODELS_MSG: &str =
+    "burn compare: needs at least 2 models. Run `burn summary --by-provider` (or `burn summary --by-tool`) to see which models have data.";
+
+const NOTE_LIMIT: usize = 8;
+const DASH: &str = "—";
+
+pub fn run(globals: &GlobalArgs, args: CompareArgs) -> i32 {
+    match run_inner(globals, args) {
+        Ok(code) => code,
+        Err(e) => report_error(&e, globals),
+    }
+}
+
+fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result<i32> {
+    // 1. Parse positional models list (comma-separated, dedup, preserve order).
+    let raw = match args.models.as_deref() {
+        Some(s) => s,
+        None => {
+            eprintln!("{NEEDS_MODELS_MSG}");
+            return Ok(EXIT_GENERIC_ERROR);
+        }
+    };
+    let mut seen: BTreeSet<String> = BTreeSet::new();
+    let mut models: Vec<String> = Vec::new();
+    for part in raw.split(',') {
+        let m = part.trim();
+        if m.is_empty() {
+            continue;
+        }
+        if seen.insert(m.to_string()) {
+            models.push(m.to_string());
+        }
+    }
+    if models.len() < 2 {
+        eprintln!("{NEEDS_MODELS_MSG}");
+        return Ok(EXIT_GENERIC_ERROR);
+    }
+
+    // 2. Resolve --fidelity / --include-partial.
+    let mut min_fidelity: FidelityClass = FidelityClass::UsageOnly;
+    if let Some(raw) = args.fidelity.as_deref() {
+        if !FIDELITY_CHOICES.contains(&raw) {
+            eprintln!(
+                "burn: invalid --fidelity: {raw} (expected one of {})",
+                FIDELITY_CHOICES.join(", ")
+            );
+            return Ok(EXIT_GENERIC_ERROR);
+        }
+        min_fidelity = parse_fidelity(raw)?;
+    }
+    if args.include_partial {
+        if let Some(raw) = args.fidelity.as_deref() {
+            if raw != "partial" {
+                eprintln!("burn: --include-partial conflicts with --fidelity {raw}");
+                return Ok(EXIT_GENERIC_ERROR);
+            }
+        }
+        min_fidelity = FidelityClass::Partial;
+    }
+
+    // 3. JSON / CSV mutual exclusion. `--json` is a global flag; `--csv` is
+    //    per-command so the global JSON take-precedence rule in the TS CLI
+    //    becomes "explicit conflict" here — same exit code, same message.
+    if globals.json && args.csv {
+        eprintln!("burn: --json and --csv are mutually exclusive; pick one.");
+        return Ok(EXIT_GENERIC_ERROR);
+    }
+
+    // 4. Provider filter. Surfaced as an explicit "not yet wired" error
+    //    rather than a silent no-op — the SDK's provider filter is private
+    //    to the analyze module today, and exposing it through a typed
+    //    top-level surface is part of the broader provider-classifier
+    //    follow-up. The cli-golden corpus exercises compare without a
+    //    provider filter, so this is unblocked for parity.
+    if args.provider.is_some() {
+        return Err(anyhow!(
+            "burn compare: --provider filter is not yet wired through the Rust SDK (#246 follow-up)"
+        ));
+    }
+
+    // 5. min-sample.
+    let min_sample = args.min_sample.unwrap_or(DEFAULT_MIN_SAMPLE);
+    if min_sample < 1 {
+        eprintln!("burn: invalid --min-sample: {min_sample}");
+        return Ok(EXIT_GENERIC_ERROR);
+    }
+
+    // 6. Honor --no-archive by exporting RELAYBURN_ARCHIVE=0 for the
+    //    duration of this call. The Rust SDK doesn't read RELAYBURN_ARCHIVE
+    //    today (it's SQLite-only), but we set the env so any future archive
+    //    layer behaves identically to the TS CLI's `--no-archive`.
+    let _archive_guard = ArchiveOverride::activate(args.no_archive);
+
+    // 7. Build the Query.
+    let mut q = Query::default();
+    if let Some(s) = normalize_since(args.since.as_deref())? {
+        q.since = Some(s);
+    }
+    if let Some(p) = args.project.as_deref() {
+        q.project = Some(p.to_string());
+    }
+    if let Some(s) = args.session.as_deref() {
+        q.session_id = Some(s.to_string());
+    }
+    // `workflow` / `agent` flow through the stamp-based enrichment filter
+    // which the Rust ledger query layer doesn't yet expose. Surface the
+    // gap explicitly rather than silently dropping the flag — when the
+    // ledger gains enrichment-filter support, this branch comes out.
+    if args.workflow.is_some() || args.agent.is_some() {
+        return Err(anyhow!(
+            "burn compare: --workflow / --agent filters are not yet wired through the Rust ledger query (#246 follow-up)"
+        ));
+    }
+
+    // 8. Open ledger and walk turns.
+    let ledger_opts = match globals.ledger_path.as_deref() {
+        Some(p) => LedgerOpenOptions::with_home(p),
+        None => LedgerOpenOptions::default(),
+    };
+    let handle = Ledger::open(ledger_opts)?;
+    let queried_turns: Vec<EnrichedTurn> = handle.raw().query_turns(&q)?;
+
+    // 9. Provider filter is rejected up-front (see step 4). Pipeline
+    //    treats every queried turn as eligible.
+    let filtered_by_provider: Vec<EnrichedTurn> = queried_turns;
+
+    // 10. Fidelity summary is computed BEFORE the fidelity gate so the
+    //     `summary` block in the JSON envelope reflects the queried slice.
+    let fidelity_summary = summarize_fidelity(
+        &filtered_by_provider
+            .iter()
+            .map(|et| et.turn.clone())
+            .collect::<Vec<_>>(),
+    );
+    let filtered_turns: Vec<EnrichedTurn> = if matches!(min_fidelity, FidelityClass::Partial) {
+        filtered_by_provider
+    } else {
+        filtered_by_provider
+            .into_iter()
+            .filter(|et| has_minimum_fidelity(et.turn.fidelity.as_ref(), min_fidelity))
+            .collect()
+    };
+    let analyzed_turns = filtered_turns.len();
+
+    // 11. Build the compare table.
+    let pricing = load_pricing(None);
+    let opts = CompareOptions {
+        pricing: &pricing,
+        models: Some(models.clone()),
+        min_sample: Some(min_sample),
+    };
+    let table = build_compare_table(&filtered_turns, &opts);
+
+    // 12. Render.
+    if globals.json {
+        let v = build_json(&table, analyzed_turns, min_fidelity, &fidelity_summary);
+        render_json(&v)?;
+        return Ok(0);
+    }
+    if args.csv {
+        let csv = render_csv(&table);
+        print!("{csv}");
+        return Ok(0);
+    }
+    let tty = render_tty(
+        &table,
+        analyzed_turns,
+        min_fidelity,
+        &fidelity_summary,
+    );
+    print!("{tty}");
+    Ok(0)
+}
+
+// ---------------------------------------------------------------------------
+// helpers
+// ---------------------------------------------------------------------------
+
+fn parse_fidelity(s: &str) -> Result<FidelityClass> {
+    match s {
+        "full" => Ok(FidelityClass::Full),
+        "usage-only" => Ok(FidelityClass::UsageOnly),
+        "aggregate-only" => Ok(FidelityClass::AggregateOnly),
+        "cost-only" => Ok(FidelityClass::CostOnly),
+        "partial" => Ok(FidelityClass::Partial),
+        other => Err(anyhow!("invalid fidelity class: {other}")),
+    }
+}
+
+fn fidelity_class_str(cls: FidelityClass) -> &'static str {
+    match cls {
+        FidelityClass::Full => "full",
+        FidelityClass::UsageOnly => "usage-only",
+        FidelityClass::AggregateOnly => "aggregate-only",
+        FidelityClass::CostOnly => "cost-only",
+        FidelityClass::Partial => "partial",
+    }
+}
+
+/// Normalize `--since` exactly like the SDK's free-fn would (relative
+/// `7d` → ISO Z, ISO pass-through, garbage → error). Inlined here rather
+/// than imported because the SDK helper isn't on the public surface.
+fn normalize_since(since: Option<&str>) -> Result<Option<String>> {
+    let Some(raw) = since else {
+        return Ok(None);
+    };
+    if raw.is_empty() {
+        return Ok(None);
+    }
+    if let Some((n, unit)) = parse_relative(raw) {
+        let secs_back = match unit {
+            'h' => n * 3_600,
+            'd' => n * 86_400,
+            'w' => n * 7 * 86_400,
+            'm' => n * 30 * 86_400,
+            _ => unreachable!(),
+        };
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .map(|d| d.as_secs())
+            .unwrap_or(0);
+        let when = now.saturating_sub(secs_back);
+        return Ok(Some(format_iso_z(when)));
+    }
+    if !looks_like_iso(raw) {
+        return Err(anyhow!(
+            "invalid since: {raw} (expected ISO timestamp or relative range like 7d)"
+        ));
+    }
+    Ok(Some(raw.to_string()))
+}
+
+fn parse_relative(s: &str) -> Option<(u64, char)> {
+    let bytes = s.as_bytes();
+    if bytes.len() < 2 {
+        return None;
+    }
+    let unit = bytes[bytes.len() - 1] as char;
+    if !matches!(unit, 'h' | 'd' | 'w' | 'm') {
+        return None;
+    }
+    let num = &s[..s.len() - 1];
+    if num.is_empty() || !num.bytes().all(|b| b.is_ascii_digit()) {
+        return None;
+    }
+    let n: u64 = num.parse().ok()?;
+    Some((n, unit))
+}
+
+fn looks_like_iso(s: &str) -> bool {
+    let b = s.as_bytes();
+    b.len() >= 10
+        && b[0..4].iter().all(|c| c.is_ascii_digit())
+        && b[4] == b'-'
+        && b[5..7].iter().all(|c| c.is_ascii_digit())
+        && b[7] == b'-'
+        && b[8..10].iter().all(|c| c.is_ascii_digit())
+}
+
+fn format_iso_z(secs: u64) -> String {
+    let total_days = (secs / 86_400) as i64;
+    let secs_in_day = (secs % 86_400) as u32;
+    let hour = secs_in_day / 3_600;
+    let minute = (secs_in_day / 60) % 60;
+    let second = secs_in_day % 60;
+    let (year, month, day) = days_to_ymd(total_days);
+    format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z")
+}
+
+fn days_to_ymd(days_from_epoch: i64) -> (i64, u32, u32) {
+    let z = days_from_epoch + 719_468;
+    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
+    let doe = (z - era * 146_097) as u64;
+    let yoe = (doe - doe / 1_460 + doe / 36_524 - doe / 146_096) / 365;
+    let y = yoe as i64 + era * 400;
+    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
+    let mp = (5 * doy + 2) / 153;
+    let d = doy - (153 * mp + 2) / 5 + 1;
+    let m = if mp < 10 { mp + 3 } else { mp - 9 };
+    let year = if m <= 2 { y + 1 } else { y };
+    (year, m as u32, d as u32)
+}
+
+/// Drop-in for `RELAYBURN_ARCHIVE=0`. Restores the previous value on
+/// drop so a panic part-way through doesn't leak the override.
+struct ArchiveOverride {
+    previous: Option<String>,
+    activated: bool,
+}
+
+impl ArchiveOverride {
+    fn activate(no_archive: bool) -> Self {
+        if !no_archive {
+            return Self {
+                previous: None,
+                activated: false,
+            };
+        }
+        let previous = std::env::var("RELAYBURN_ARCHIVE").ok();
+        std::env::set_var("RELAYBURN_ARCHIVE", "0");
+        Self {
+            previous,
+            activated: true,
+        }
+    }
+}
+
+impl Drop for ArchiveOverride {
+    fn drop(&mut self) {
+        if !self.activated {
+            return;
+        }
+        match self.previous.take() {
+            Some(v) => std::env::set_var("RELAYBURN_ARCHIVE", v),
+            None => std::env::remove_var("RELAYBURN_ARCHIVE"),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// number formatting (matches packages/cli/src/format.ts)
+// ---------------------------------------------------------------------------
+
+fn format_usd(n: f64) -> String {
+    if n == 0.0 {
+        return "$0.00".to_string();
+    }
+    if n < 0.01 {
+        return format!("${}", to_fixed_raw(n, 4));
+    }
+    if n < 1.0 {
+        return format!("${}", to_fixed_raw(n, 3));
+    }
+    format!("${}", to_fixed_raw(n, 2))
+}
+
+/// Mirror JS `n.toFixed(d)` — keeps trailing zeros (so 1.5 with digits=2
+/// becomes "1.50"). Use this for human-readable output where the
+/// fixed-width column matters; use [`to_fixed`] for JSON-bound values
+/// that go through `Number(...).toString()` semantics.
+fn to_fixed_raw(n: f64, digits: usize) -> String {
+    format!("{n:.*}", digits)
+}
+
+fn format_int(n: u64) -> String {
+    // `toLocaleString('en-US')` thousands grouping. JS uses `,`. The
+    // golden corpus values are small (≤ 7) so the comma path isn't hit
+    // by the snapshot, but we implement it anyway for parity.
+    let s = n.to_string();
+    let bytes = s.as_bytes();
+    let mut out = String::with_capacity(s.len() + s.len() / 3);
+    let len = bytes.len();
+    for (i, b) in bytes.iter().enumerate() {
+        if i > 0 && (len - i).is_multiple_of(3) {
+            out.push(',');
+        }
+        out.push(*b as char);
+    }
+    out
+}
+
+fn format_pct(rate: f64) -> String {
+    // `Math.round(p * 100)` — round half to even on f64; matches JS for
+    // the corpus we compare against (the `Math.round` half-to-even
+    // exception is below the 1e-9 precision we care about here).
+    let pct = (rate * 100.0).round() as i64;
+    format!("{pct}%")
+}
+
+/// `Number(n.toFixed(d))` — produce the shortest decimal string for the
+/// rounded value. Drops trailing zeros, mirroring JS `Number(...).toString()`.
+fn to_fixed(n: f64, digits: usize) -> String {
+    let s = format!("{n:.*}", digits);
+    // For "0.00" / "1.00" → strip the trailing zeros, but keep at least
+    // the integer part. Mirrors JS: `Number("1.00").toString() === "1"`.
+    trim_trailing_zeros(&s)
+}
+
+fn trim_trailing_zeros(s: &str) -> String {
+    if !s.contains('.') {
+        return s.to_string();
+    }
+    let trimmed = s.trim_end_matches('0').trim_end_matches('.');
+    if trimmed.is_empty() || trimmed == "-" {
+        "0".to_string()
+    } else {
+        trimmed.to_string()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// rounding for JSON output (Number(n.toFixed(d)))
+// ---------------------------------------------------------------------------
+
+/// JSON-friendly rounded number. Returns a `serde_json::Value::Number`
+/// that prints without trailing zeros — matches `JSON.stringify(Number(n.toFixed(d)))`.
+/// Whole-number results render as integers (`1`, not `1.0`); fractional
+/// results render as the shortest decimal needed.
+fn round_json(n: f64, digits: usize) -> Value {
+    let s = format!("{n:.*}", digits);
+    let parsed: f64 = s.parse().unwrap_or(0.0);
+    f64_to_json(parsed)
+}
+
+/// Serialize an f64 with JS `JSON.stringify` semantics: integral values
+/// render as integers, fractional values render via Ryu.
+fn f64_to_json(n: f64) -> Value {
+    if n.is_nan() || n.is_infinite() {
+        // Match JS: NaN / Infinity become `null` in JSON.
+        return Value::Null;
+    }
+    if n == 0.0 {
+        // Both +0.0 and -0.0 become 0.
+        return Value::from(0u64);
+    }
+    if n.fract() == 0.0 && n.abs() < (i64::MAX as f64) {
+        return Value::from(n as i64);
+    }
+    // `serde_json::Number::from_f64` always emits a JSON number; the
+    // pretty-printer uses Ryu's shortest representation for finite f64.
+    Value::from(n)
+}
+
+/// Like `f64_to_json` but for `Option<f64>` — `None` → `null`.
+fn opt_f64_to_json(n: Option<f64>) -> Value {
+    match n {
+        Some(v) => f64_to_json(v),
+        None => Value::Null,
+    }
+}
+
+/// Like `round_json` but for `Option<f64>`.
+fn round_opt(n: Option<f64>, digits: usize) -> Value {
+    match n {
+        Some(v) => round_json(v, digits),
+        None => Value::Null,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// CompareExcludedBreakdown
+// ---------------------------------------------------------------------------
+
+#[derive(Default)]
+struct ExcludedBreakdown {
+    total: u64,
+    aggregate_only: u64,
+    cost_only: u64,
+    partial: u64,
+    usage_only: u64,
+}
+
+fn compute_excluded(summary: &FidelitySummary, minimum: FidelityClass) -> ExcludedBreakdown {
+    let mut out = ExcludedBreakdown::default();
+    if matches!(minimum, FidelityClass::Partial) {
+        return out;
+    }
+    let need = FIDELITY_ORDER
+        .iter()
+        .position(|c| *c == fidelity_class_str(minimum))
+        .unwrap_or(0);
+    for (i, key) in FIDELITY_ORDER.iter().enumerate() {
+        if i >= need {
+            continue;
+        }
+        let cls = parse_fidelity(key).unwrap();
+        let n = summary.by_class.get(&cls).copied().unwrap_or(0);
+        if n == 0 {
+            continue;
+        }
+        out.total += n;
+        match *key {
+            "aggregate-only" => out.aggregate_only += n,
+            "cost-only" => out.cost_only += n,
+            "partial" => out.partial += n,
+            "usage-only" => out.usage_only += n,
+            _ => {}
+        }
+    }
+    out
+}
+
+// ---------------------------------------------------------------------------
+// JSON envelope
+// ---------------------------------------------------------------------------
+
+fn build_json(
+    table: &CompareTable,
+    analyzed_turns: usize,
+    minimum: FidelityClass,
+    summary: &FidelitySummary,
+) -> Value {
+    let excluded = compute_excluded(summary, minimum);
+    // Cells in (model × category) iteration order; matches the TS
+    // `for m of models / for cat of categories` walk.
+    let mut cells: Vec<Value> = Vec::with_capacity(table.models.len() * table.categories.len());
+    for m in &table.models {
+        for cat in &table.categories {
+            let c = table
+                .cells
+                .get(m)
+                .and_then(|by_cat| by_cat.get(cat))
+                .cloned()
+                .unwrap_or_else(empty_cell);
+            cells.push(json!({
+                "model": m,
+                "category": cat,
+                "turns": c.turns,
+                "editTurns": c.edit_turns,
+                "oneShotTurns": c.one_shot_turns,
+                "pricedTurns": c.priced_turns,
+                "totalCost": round_json(c.total_cost, 6),
+                "costPerTurn": round_opt(c.cost_per_turn, 6),
+                "oneShotRate": round_opt(c.one_shot_rate, 4),
+                "cacheHitRate": round_opt(c.cache_hit_rate, 4),
+                "medianRetries": opt_f64_to_json(c.median_retries),
+                "noData": c.no_data,
+                "insufficientSample": c.insufficient_sample,
+            }));
+        }
+    }
+
+    // `totals` keys must come out in `models` order (the TS `Object`
+    // preserves insertion order). Build with a serde_json::Map so the
+    // `preserve_order` feature on serde_json keeps insertion order.
+    let mut totals = serde_json::Map::new();
+    for m in &table.models {
+        let totals_for = table.totals.get(m).cloned().unwrap_or_default();
+        totals.insert(
+            m.clone(),
+            json!({
+                "turns": totals_for.turns,
+                "totalCost": f64_to_json(totals_for.total_cost),
+            }),
+        );
+    }
+
+    json!({
+        "analyzedTurns": analyzed_turns,
+        "minSample": table.min_sample,
+        "models": &table.models,
+        "categories": &table.categories,
+        "totals": Value::Object(totals),
+        "cells": cells,
+        "fidelity": {
+            "minimum": fidelity_class_str(minimum),
+            "excluded": {
+                "total": excluded.total,
+                "aggregateOnly": excluded.aggregate_only,
+                "costOnly": excluded.cost_only,
+                "partial": excluded.partial,
+                "usageOnly": excluded.usage_only,
+            },
+            "summary": fidelity_summary_to_value(summary),
+        }
+    })
+}
+
+/// Build the fidelity-summary JSON sub-object with the same key order
+/// the TS path emits (literal `{ full, usage-only, aggregate-only,
+/// cost-only, partial }` order, preserved via serde_json's
+/// `preserve_order` feature).
+fn fidelity_summary_to_value(s: &FidelitySummary) -> Value {
+    let mut by_class = serde_json::Map::new();
+    for key in &["full", "usage-only", "aggregate-only", "cost-only", "partial"] {
+        let cls = parse_fidelity(key).unwrap();
+        let n = s.by_class.get(&cls).copied().unwrap_or(0);
+        by_class.insert((*key).to_string(), Value::from(n));
+    }
+    let mut by_granularity = serde_json::Map::new();
+    for key in &["per-turn", "per-message", "per-session-aggregate", "cost-only"] {
+        let g = match *key {
+            "per-turn" => UsageGranularity::PerTurn,
+            "per-message" => UsageGranularity::PerMessage,
+            "per-session-aggregate" => UsageGranularity::PerSessionAggregate,
+            "cost-only" => UsageGranularity::CostOnly,
+            _ => unreachable!(),
+        };
+        let n = s.by_granularity.get(&g).copied().unwrap_or(0);
+        by_granularity.insert((*key).to_string(), Value::from(n));
+    }
+    // missingCoverage: keys are camelCase; iterate in the same fixed order
+    // the TS `emptyFidelitySummary()` literal uses so JSON shape is stable.
+    let coverage_keys = &[
+        "hasInputTokens",
+        "hasOutputTokens",
+        "hasReasoningTokens",
+        "hasCacheReadTokens",
+        "hasCacheCreateTokens",
+        "hasToolCalls",
+        "hasToolResultEvents",
+        "hasSessionRelationships",
+        "hasRawContent",
+    ];
+    let mut missing = serde_json::Map::new();
+    for k in coverage_keys {
+        let n = s.missing_coverage.get(*k).copied().unwrap_or(0);
+        missing.insert((*k).to_string(), Value::from(n));
+    }
+
+    let mut out = serde_json::Map::new();
+    out.insert("total".to_string(), Value::from(s.total));
+    out.insert("byClass".to_string(), Value::Object(by_class));
+    out.insert("byGranularity".to_string(), Value::Object(by_granularity));
+    out.insert("missingCoverage".to_string(), Value::Object(missing));
+    out.insert("unknown".to_string(), Value::from(s.unknown));
+    Value::Object(out)
+}
+
+fn empty_cell() -> CompareCell {
+    CompareCell {
+        turns: 0,
+        edit_turns: 0,
+        one_shot_turns: 0,
+        priced_turns: 0,
+        total_cost: 0.0,
+        cost_per_turn: None,
+        one_shot_rate: None,
+        cache_hit_rate: None,
+        median_retries: None,
+        no_data: true,
+        insufficient_sample: false,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// CSV
+// ---------------------------------------------------------------------------
+
+fn render_csv(table: &CompareTable) -> String {
+    let header = [
+        "model",
+        "category",
+        "turns",
+        "editTurns",
+        "oneShotTurns",
+        "pricedTurns",
+        "totalCost",
+        "costPerTurn",
+        "oneShotRate",
+        "cacheHitRate",
+        "medianRetries",
+        "noData",
+        "insufficientSample",
+    ];
+    let mut rows: Vec<String> = Vec::new();
+    rows.push(header.join(","));
+    for m in &table.models {
+        for cat in &table.categories {
+            let c = table
+                .cells
+                .get(m)
+                .and_then(|by_cat| by_cat.get(cat))
+                .cloned()
+                .unwrap_or_else(empty_cell);
+            let row = vec![
+                csv_cell(m),
+                csv_cell(cat),
+                c.turns.to_string(),
+                c.edit_turns.to_string(),
+                c.one_shot_turns.to_string(),
+                c.priced_turns.to_string(),
+                num_csv(c.total_cost, 6),
+                c.cost_per_turn
+                    .map(|v| num_csv(v, 6))
+                    .unwrap_or_default(),
+                c.one_shot_rate
+                    .map(|v| num_csv(v, 4))
+                    .unwrap_or_default(),
+                c.cache_hit_rate
+                    .map(|v| num_csv(v, 4))
+                    .unwrap_or_default(),
+                c.median_retries
+                    .map(|v| {
+                        // `String(n)` for numbers; JS prints integers as-is.
+                        if v.fract() == 0.0 {
+                            (v as i64).to_string()
+                        } else {
+                            v.to_string()
+                        }
+                    })
+                    .unwrap_or_default(),
+                if c.no_data { "true" } else { "false" }.to_string(),
+                if c.insufficient_sample {
+                    "true"
+                } else {
+                    "false"
+                }
+                .to_string(),
+            ];
+            rows.push(row.join(","));
+        }
+    }
+    format!("{}\n", rows.join("\n"))
+}
+
+fn csv_cell(s: &str) -> String {
+    if s.contains(',') || s.contains('"') || s.contains('\n') {
+        format!("\"{}\"", s.replace('"', "\"\""))
+    } else {
+        s.to_string()
+    }
+}
+
+fn num_csv(n: f64, digits: usize) -> String {
+    to_fixed(n, digits)
+}
+
+// ---------------------------------------------------------------------------
+// TTY
+// ---------------------------------------------------------------------------
+
+fn cell_fields(c: &CompareCell) -> [String; 3] {
+    if c.no_data {
+        return [DASH.to_string(), DASH.to_string(), DASH.to_string()];
+    }
+    let turns = format_int(c.turns);
+    let cost = c
+        .cost_per_turn
+        .map(format_usd)
+        .unwrap_or_else(|| DASH.to_string());
+    let one_shot = c
+        .one_shot_rate
+        .map(format_pct)
+        .unwrap_or_else(|| DASH.to_string());
+    [turns, cost, one_shot]
+}
+
+fn render_tty(
+    table: &CompareTable,
+    analyzed_turns: usize,
+    minimum: FidelityClass,
+    summary: &FidelitySummary,
+) -> String {
+    let mut lines: Vec<String> = Vec::new();
+    lines.push(String::new());
+    lines.push(format!("turns analyzed: {}", format_int(analyzed_turns as u64)));
+
+    let excluded = compute_excluded(summary, minimum);
+    if excluded.total > 0 {
+        lines.push(format_excluded_note(&excluded, minimum));
+    }
+    lines.push(String::new());
+
+    if table.models.is_empty() || table.categories.is_empty() {
+        lines.push(
+            "no data to compare (need turns spanning ≥1 model and ≥1 activity).".to_string(),
+        );
+        lines.push(String::new());
+        return lines.join("\n");
+    }
+
+    let sub_header = build_sub_header(&table.models);
+
+    let owned_empty = empty_cell();
+    let cell_for = |m: &str, cat: &str| -> CompareCell {
+        table
+            .cells
+            .get(m)
+            .and_then(|by| by.get(cat))
+            .cloned()
+            .unwrap_or_else(empty_cell)
+    };
+    // Suppress the unused-variable warning on `owned_empty`; it's only
+    // referenced when we run a corner case where neither cells.get nor
+    // by_cat.get is hit, which the table builder doesn't produce today.
+    let _ = &owned_empty;
+
+    let mut data_rows: Vec<Vec<String>> = Vec::new();
+    for cat in &table.categories {
+        let mut row: Vec<String> = vec![cat.clone()];
+        for m in &table.models {
+            let cell = cell_for(m, cat);
+            let [a, b, c] = cell_fields(&cell);
+            row.push(a);
+            row.push(b);
+            row.push(c);
+        }
+        data_rows.push(row);
+    }
+
+    let mut widths = vec![0usize; sub_header.len()];
+    for row in std::iter::once(&sub_header).chain(data_rows.iter()) {
+        for (i, cell) in row.iter().enumerate() {
+            widths[i] = widths[i].max(display_width(cell));
+        }
+    }
+
+    const SEP: &str = "  ";
+
+    // Widen the last column of each model's group to fit the (possibly
+    // longer) display name. Mirrors the TS path's group-line padding.
+    for mi in 0..table.models.len() {
+        let start = 1 + mi * 3;
+        let group_width =
+            widths[start] + SEP.len() + widths[start + 1] + SEP.len() + widths[start + 2];
+        let name = display_model_name(&table.models[mi]);
+        let name_w = display_width(name);
+        if name_w > group_width {
+            widths[start + 2] += name_w - group_width;
+        }
+    }
+
+    // Group-name line.
+    let mut group_line: Vec<String> = vec![pad_end("", widths[0])];
+    for mi in 0..table.models.len() {
+        let start = 1 + mi * 3;
+        let group_width =
+            widths[start] + SEP.len() + widths[start + 1] + SEP.len() + widths[start + 2];
+        let name = display_model_name(&table.models[mi]);
+        group_line.push(pad_end(name, group_width));
+    }
+    lines.push(rstrip(&group_line.join(SEP)));
+
+    // Sub-header.
+    lines.push(render_row(&sub_header, &widths, SEP));
+
+    // Data rows.
+    for row in &data_rows {
+        lines.push(render_row(row, &widths, SEP));
+    }
+
+    // Coverage notes.
+    let mut notes: Vec<String> = Vec::new();
+    for cat in &table.categories {
+        let any_has_data = table
+            .models
+            .iter()
+            .any(|m| !cell_for(m, cat).no_data);
+        if !any_has_data {
+            continue;
+        }
+        for m in &table.models {
+            let cell = cell_for(m, cat);
+            if cell.no_data {
+                notes.push(format!(
+                    "no {} data in '{cat}' — no comparison available.",
+                    display_model_name(m)
+                ));
+            } else if cell.insufficient_sample {
+                notes.push(format!(
+                    "low {} sample in '{cat}' ({} turns < {}) — treat as indicative.",
+                    display_model_name(m),
+                    cell.turns,
+                    table.min_sample
+                ));
+            }
+        }
+    }
+    if !notes.is_empty() {
+        lines.push(String::new());
+        let shown = notes.iter().take(NOTE_LIMIT);
+        for n in shown {
+            lines.push(format!("  {n}"));
+        }
+        if notes.len() > NOTE_LIMIT {
+            lines.push(format!(
+                "  … and {} more coverage gaps.",
+                notes.len() - NOTE_LIMIT
+            ));
+        }
+    }
+
+    // Per-model totals.
+    lines.push(String::new());
+    for m in &table.models {
+        let tot = table.totals.get(m).cloned().unwrap_or_default();
+        let total_cost = if tot.turns > 0 {
+            format_usd(tot.total_cost)
+        } else {
+            DASH.to_string()
+        };
+        lines.push(format!(
+            "{}: {} turns, {} total",
+            display_model_name(m),
+            format_int(tot.turns),
+            total_cost
+        ));
+    }
+    lines.push(String::new());
+    lines.join("\n")
+}
+
+fn build_sub_header(models: &[String]) -> Vec<String> {
+    let mut row: Vec<String> = vec!["Activity".to_string()];
+    for _ in models {
+        row.push("Turns".to_string());
+        row.push("Cost/turn".to_string());
+        row.push("1-shot".to_string());
+    }
+    row
+}
+
+fn render_row(row: &[String], widths: &[usize], sep: &str) -> String {
+    let mut parts: Vec<String> = Vec::with_capacity(row.len());
+    for (i, cell) in row.iter().enumerate() {
+        parts.push(pad_end(cell, widths[i]));
+    }
+    rstrip(&parts.join(sep))
+}
+
+fn pad_end(s: &str, width: usize) -> String {
+    let w = display_width(s);
+    if w >= width {
+        return s.to_string();
+    }
+    let pad = " ".repeat(width - w);
+    format!("{s}{pad}")
+}
+
+fn rstrip(s: &str) -> String {
+    s.trim_end_matches(' ').to_string()
+}
+
+/// `String.length` in JS counts UTF-16 code units, but for the corpus
+/// this CLI ships against (ASCII model names, ASCII activity labels),
+/// `chars().count()` is byte-equivalent. We use it instead of byte length
+/// to keep the dash sentinel (`—`, U+2014, 3 bytes UTF-8 / 1 UTF-16
+/// unit) aligning the way the TS path expects.
+fn display_width(s: &str) -> usize {
+    s.chars().count()
+}
+
+fn display_model_name(m: &str) -> &str {
+    match m.find('/') {
+        Some(i) => &m[i + 1..],
+        None => m,
+    }
+}
+
+fn format_excluded_note(excluded: &ExcludedBreakdown, minimum: FidelityClass) -> String {
+    let mut parts: Vec<String> = Vec::new();
+    if excluded.aggregate_only > 0 {
+        parts.push(format!("{} aggregate-only", excluded.aggregate_only));
+    }
+    if excluded.cost_only > 0 {
+        parts.push(format!("{} cost-only", excluded.cost_only));
+    }
+    if excluded.partial > 0 {
+        parts.push(format!("{} partial", excluded.partial));
+    }
+    if excluded.usage_only > 0 {
+        parts.push(format!("{} usage-only", excluded.usage_only));
+    }
+    let breakdown = if parts.is_empty() {
+        String::new()
+    } else {
+        format!(" ({})", parts.join(", "))
+    };
+    let noun = if excluded.total == 1 { "turn" } else { "turns" };
+    format!(
+        "excluded {} {noun} below {} fidelity{breakdown}",
+        format_int(excluded.total),
+        fidelity_class_str(minimum)
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn format_usd_buckets() {
+        assert_eq!(format_usd(0.0), "$0.00");
+        assert_eq!(format_usd(0.0017), "$0.0017");
+        assert_eq!(format_usd(0.011), "$0.011");
+        assert_eq!(format_usd(0.034), "$0.034");
+        assert_eq!(format_usd(1.5), "$1.50");
+    }
+
+    #[test]
+    fn format_int_groups_thousands() {
+        assert_eq!(format_int(7), "7");
+        assert_eq!(format_int(1_500), "1,500");
+        assert_eq!(format_int(1_000_000), "1,000,000");
+    }
+
+    #[test]
+    fn format_pct_rounds_to_int() {
+        assert_eq!(format_pct(0.0), "0%");
+        assert_eq!(format_pct(0.5), "50%");
+        assert_eq!(format_pct(1.0), "100%");
+        assert_eq!(format_pct(2.0 / 3.0), "67%");
+    }
+
+    #[test]
+    fn round_json_matches_js_to_fixed() {
+        // Whole numbers come out as integers (no `.0` suffix).
+        let v = round_json(1.0, 4);
+        assert_eq!(v.to_string(), "1");
+        // Non-whole shorter than digit cap drops trailing zeros.
+        let v = round_json(0.5, 4);
+        assert_eq!(v.to_string(), "0.5");
+        // Rounds to 6 digits.
+        let v = round_json(0.0112499999, 6);
+        assert_eq!(v.to_string(), "0.01125");
+    }
+
+    #[test]
+    fn parse_fidelity_known_classes() {
+        assert!(matches!(parse_fidelity("full").unwrap(), FidelityClass::Full));
+        assert!(matches!(
+            parse_fidelity("usage-only").unwrap(),
+            FidelityClass::UsageOnly
+        ));
+        assert!(parse_fidelity("nope").is_err());
+    }
+
+    #[test]
+    fn display_model_name_strips_provider_prefix() {
+        assert_eq!(display_model_name("anthropic/claude-sonnet-4-6"), "claude-sonnet-4-6");
+        assert_eq!(display_model_name("claude-haiku-4-5"), "claude-haiku-4-5");
+    }
 }
diff --git a/crates/relayburn-cli/src/main.rs b/crates/relayburn-cli/src/main.rs
index ca20ed37..cdb8712f 100644
--- a/crates/relayburn-cli/src/main.rs
+++ b/crates/relayburn-cli/src/main.rs
@@ -34,7 +34,7 @@ fn dispatch(args: Args) -> i32 {
         Command::Summary => commands::summary::run(&globals),
         Command::Hotspots => commands::hotspots::run(&globals),
         Command::Overhead => commands::overhead::run(&globals),
-        Command::Compare => commands::compare::run(&globals),
+        Command::Compare(args) => commands::compare::run(&globals, args),
         Command::Run => commands::run::run(&globals),
         Command::State => commands::state::run(&globals),
         Command::Ingest => commands::ingest::run(&globals),
diff --git a/crates/relayburn-cli/tests/golden.rs b/crates/relayburn-cli/tests/golden.rs
index 15bd1152..4573821a 100644
--- a/crates/relayburn-cli/tests/golden.rs
+++ b/crates/relayburn-cli/tests/golden.rs
@@ -36,7 +36,12 @@ use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 
+use relayburn_sdk::{
+    Ledger, LedgerOpenOptions, SessionRelationshipRecord, Stamp, ToolResultEventRecord,
+    TurnRecord, UserTurnRecord,
+};
 use serde::Deserialize;
+use serde_json::Value;
 
 #[derive(Debug, Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -93,9 +98,26 @@ fn golden_diff_against_ts_cli_snapshots() {
         .unwrap_or_else(|err| panic!("invocations.json is malformed: {err}"));
 
     let snapshots_dir = fixture_dir.join("snapshots");
-    let ledger_home = fixture_dir.join("ledger");
+    let ts_ledger_home = fixture_dir.join("ledger");
     let project_dir = fixture_dir.join("project");
 
+    // The on-disk fixture is a TS-style JSONL ledger. The Rust SDK reads
+    // SQLite, so we materialize a sibling SQLite home from the JSONL once
+    // and point the binary at it for every invocation. Done up front so
+    // the import cost is paid once per test run, not per invocation.
+    let rust_ledger_home = tempdir_under(&fixture_dir);
+    if let Err(err) = import_jsonl_into_sqlite(
+        &ts_ledger_home.join("ledger.jsonl"),
+        &rust_ledger_home,
+    ) {
+        let _ = fs::remove_dir_all(&rust_ledger_home);
+        panic!(
+            "[golden] failed to import {} into a Rust-SDK SQLite ledger: {err:?}",
+            ts_ledger_home.display(),
+        );
+    }
+    let ledger_home = rust_ledger_home.clone();
+
     // Sealed HOME so the Rust binary's eventual ingest sweep doesn't
     // discover the developer's real session stores.
     let sealed_home = tempdir_under(&fixture_dir);
@@ -190,6 +212,7 @@ fn golden_diff_against_ts_cli_snapshots() {
     }
 
     let _ = fs::remove_dir_all(&sealed_home);
+    let _ = fs::remove_dir_all(&rust_ledger_home);
 
     if !failures.is_empty() {
         panic!(
@@ -339,6 +362,128 @@ fn indent(text: &str, prefix: &str) -> String {
         .join("\n")
 }
 
+/// Read a TS-style JSONL ledger (the on-disk shape `@relayburn/ledger`
+/// emits today) and materialize an equivalent SQLite-backed home that
+/// `relayburn_sdk::Ledger::open` can read. The cli-golden corpus ships
+/// only the JSONL form; this bridge keeps the corpus source-of-truth
+/// while letting the Rust binary read the same data.
+///
+/// The supported line kinds mirror what `tests/fixtures/cli-golden/scripts/
+/// build-ledger.mjs` writes: `turn`, `user_turn`, `tool_result_event`,
+/// `relationship`, `stamp`. Anything else is passed through silently
+/// (the legacy `text` / `tool_result` line kinds are the inner block
+/// shapes nested under `user_turn.blocks` and never appear at the top
+/// level — they're caught by the catch-all path so a future fixture
+/// extension doesn't drop data on the floor without warning).
+fn import_jsonl_into_sqlite(jsonl_path: &Path, sqlite_home: &Path) -> anyhow::Result<()> {
+    use anyhow::{anyhow, Context};
+
+    fs::create_dir_all(sqlite_home)
+        .with_context(|| format!("create_dir_all({})", sqlite_home.display()))?;
+    let raw = fs::read_to_string(jsonl_path)
+        .with_context(|| format!("read_to_string({})", jsonl_path.display()))?;
+
+    let mut turns: Vec<TurnRecord> = Vec::new();
+    let mut user_turns: Vec<UserTurnRecord> = Vec::new();
+    let mut tool_results: Vec<ToolResultEventRecord> = Vec::new();
+    let mut relationships: Vec<SessionRelationshipRecord> = Vec::new();
+    let mut stamps: Vec<Stamp> = Vec::new();
+
+    for (i, line) in raw.lines().enumerate() {
+        if line.trim().is_empty() {
+            continue;
+        }
+        let v: Value = serde_json::from_str(line)
+            .with_context(|| format!("parse JSON at line {}", i + 1))?;
+        let kind = v
+            .get("kind")
+            .and_then(|x| x.as_str())
+            .ok_or_else(|| anyhow!("missing kind at line {}", i + 1))?;
+        match kind {
+            "turn" => {
+                let rec = v
+                    .get("record")
+                    .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?;
+                let parsed: TurnRecord = serde_json::from_value(rec.clone())
+                    .with_context(|| format!("parse turn at line {}", i + 1))?;
+                turns.push(parsed);
+            }
+            "user_turn" => {
+                let rec = v
+                    .get("record")
+                    .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?;
+                let parsed: UserTurnRecord = serde_json::from_value(rec.clone())
+                    .with_context(|| format!("parse user_turn at line {}", i + 1))?;
+                user_turns.push(parsed);
+            }
+            "tool_result_event" => {
+                let rec = v
+                    .get("record")
+                    .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?;
+                // Best-effort: the cli-golden fixture writes `eventSource:
+                // "transcript"`, which is a synthetic value the strict
+                // Rust enum doesn't recognize. Skip on parse failure
+                // rather than panicking — `compare` doesn't read this
+                // table anyway, so dropping these rows doesn't drift
+                // the snapshot output. (#310 left this loose-typed in
+                // the fixture; tightening that is out of scope here.)
+                match serde_json::from_value::<ToolResultEventRecord>(rec.clone()) {
+                    Ok(parsed) => tool_results.push(parsed),
+                    Err(err) => {
+                        eprintln!(
+                            "[golden] skipping tool_result_event at line {}: {err}",
+                            i + 1
+                        );
+                    }
+                }
+            }
+            "relationship" => {
+                let rec = v
+                    .get("record")
+                    .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?;
+                match serde_json::from_value::<SessionRelationshipRecord>(rec.clone()) {
+                    Ok(parsed) => relationships.push(parsed),
+                    Err(err) => {
+                        eprintln!(
+                            "[golden] skipping relationship at line {}: {err}",
+                            i + 1
+                        );
+                    }
+                }
+            }
+            "stamp" => {
+                // Stamp lines are flat — selector / enrichment / ts at
+                // the top level, no `record` envelope (matches the TS
+                // `stamp()` writer).
+                match serde_json::from_value::<Stamp>(v.clone()) {
+                    Ok(parsed) => stamps.push(parsed),
+                    Err(err) => {
+                        eprintln!("[golden] skipping stamp at line {}: {err}", i + 1);
+                    }
+                }
+            }
+            _ => {
+                // Unknown kind — skip with a stderr breadcrumb so
+                // fixture extensions surface explicitly.
+                eprintln!(
+                    "[golden] skipping unknown ledger line kind `{kind}` at line {}",
+                    i + 1
+                );
+            }
+        }
+    }
+
+    let mut handle = Ledger::open(LedgerOpenOptions::with_home(sqlite_home))?;
+    handle.raw_mut().append_turns(&turns)?;
+    handle.raw_mut().append_user_turns(&user_turns)?;
+    handle.raw_mut().append_tool_result_events(&tool_results)?;
+    handle.raw_mut().append_relationships(&relationships)?;
+    for s in &stamps {
+        handle.raw_mut().append_stamp(s)?;
+    }
+    Ok(())
+}
+
 fn tempdir_under(parent: &Path) -> PathBuf {
     use std::time::{SystemTime, UNIX_EPOCH};
     let nanos = SystemTime::now()
diff --git a/crates/relayburn-cli/tests/smoke.rs b/crates/relayburn-cli/tests/smoke.rs
index 78401d58..8be4dc48 100644
--- a/crates/relayburn-cli/tests/smoke.rs
+++ b/crates/relayburn-cli/tests/smoke.rs
@@ -34,6 +34,21 @@ const SUBCOMMANDS: &[&str] = &[
     "mcp-server",
 ];
 
+/// Subset of [`SUBCOMMANDS`] still on the scaffold "not yet implemented"
+/// exit path. As each Wave 2 D1–D8 PR wires its presenter, drop the
+/// command from this list — the missing entries fall under a more
+/// targeted assertion (see `compare_command_rejects_missing_models`
+/// below for an example).
+const STUB_SUBCOMMANDS: &[&str] = &[
+    "summary",
+    "hotspots",
+    "overhead",
+    "run",
+    "state",
+    "ingest",
+    "mcp-server",
+];
+
 /// Helper: build a `Command` driving the locally-built `burn` binary.
 fn burn() -> Command {
     Command::cargo_bin("burn").expect("`burn` binary must build for the smoke test")
@@ -76,7 +91,7 @@ fn each_subcommand_help_exits_zero_with_non_empty_stdout() {
 
 #[test]
 fn each_stub_exits_one_with_not_yet_implemented_message() {
-    for sub in SUBCOMMANDS {
+    for sub in STUB_SUBCOMMANDS {
         // Run the stub with no extra args. The default exit-code
         // contract for the scaffold is `EXIT_NOT_YET_IMPLEMENTED == 1`;
         // assert it explicitly so a future Wave 2 PR that wires up a
@@ -90,6 +105,19 @@ fn each_stub_exits_one_with_not_yet_implemented_message() {
     }
 }
 
+#[test]
+fn compare_command_rejects_missing_models() {
+    // `burn compare` is wired (Wave 2 D3); no positional list means
+    // exit 2 + the canonical "needs at least 2 models" message. This
+    // asserts the wired path exists so a future regression that nukes
+    // the dispatch arm fails loud.
+    burn()
+        .arg("compare")
+        .assert()
+        .code(2)
+        .stderr(predicate::str::contains("needs at least 2 models"));
+}
+
 #[test]
 fn json_mode_emits_error_envelope_on_unimplemented() {
     // The `--json` global flips error reporting from a stderr line to
diff --git a/crates/relayburn-sdk/src/lib.rs b/crates/relayburn-sdk/src/lib.rs
index 0740dfd6..d3ddde21 100644
--- a/crates/relayburn-sdk/src/lib.rs
+++ b/crates/relayburn-sdk/src/lib.rs
@@ -91,16 +91,18 @@ pub use crate::ledger::{
 
 pub use crate::analyze::{
     aggregate_by_bash, aggregate_by_bash_verb, aggregate_by_file, aggregate_by_subagent,
-    attribute_hotspots, attribute_overhead, build_trim_recommendations, cost_for_turn,
-    cost_for_usage, detect_patterns, detect_tool_call_patterns, detect_tool_output_bloat,
-    find_overhead_files, findings_from_patterns, has_minimum_fidelity, load_overhead_file,
-    load_pricing, render_unified_diff_for_recommendation, summarize_fidelity, sum_costs,
+    attribute_hotspots, attribute_overhead, build_compare_table, build_trim_recommendations,
+    compare_from_archive, cost_for_turn, cost_for_usage, detect_patterns,
+    detect_tool_call_patterns, detect_tool_output_bloat, find_overhead_files,
+    findings_from_patterns, has_minimum_fidelity, load_overhead_file, load_pricing,
+    render_unified_diff_for_recommendation, summarize_fidelity, sum_costs,
     AttributeOverheadInput, AttributionMethod, BashAggregation, BashVerbAggregation,
-    CostBreakdown, FidelitySummary, FileAggregation, HotspotsOptions as AnalyzeHotspotsOptions,
-    HotspotsResult as AnalyzeHotspotsResult, ModelCost, OverheadAttribution, OverheadFile,
-    OverheadFileAttribution, OverheadFileKind, ParsedOverheadFile, PricingTable, ReasoningMode,
-    SessionTotals, SubagentAggregation, ToolAttribution, TrimRecommendation, WasteFinding,
-    WasteSeverity,
+    CompareCategory, CompareCell, CompareFromArchiveResult, CompareOptions, CompareTable,
+    CompareTotals, CostBreakdown, FidelitySummary, FileAggregation,
+    HotspotsOptions as AnalyzeHotspotsOptions, HotspotsResult as AnalyzeHotspotsResult, ModelCost,
+    OverheadAttribution, OverheadFile, OverheadFileAttribution, OverheadFileKind,
+    ParsedOverheadFile, PricingTable, ReasoningMode, SessionTotals, SubagentAggregation,
+    ToolAttribution, TrimRecommendation, WasteFinding, WasteSeverity, DEFAULT_MIN_SAMPLE,
 };
 
 pub use crate::ingest::{
diff --git a/tests/fixtures/cli-golden/invocations.json b/tests/fixtures/cli-golden/invocations.json
index 9baaa964..231030a4 100644
--- a/tests/fixtures/cli-golden/invocations.json
+++ b/tests/fixtures/cli-golden/invocations.json
@@ -51,13 +51,13 @@
     "name": "compare",
     "args": ["compare", "claude-sonnet-4-6,claude-haiku-4-5", "--include-partial"],
     "expectStatus": 0,
-    "enabled": false
+    "enabled": true
   },
   {
     "name": "compare-json",
     "args": ["compare", "claude-sonnet-4-6,claude-haiku-4-5", "--include-partial", "--json"],
     "expectStatus": 0,
-    "enabled": false
+    "enabled": true
   },
   {
     "name": "state-status",

From 822963c1198651858b833754ada2a54b39e57c64 Mon Sep 17 00:00:00 2001
From: Will Washburn <will.washburn@gmail.com>
Date: Wed, 6 May 2026 14:12:14 -0400
Subject: [PATCH 2/2] relayburn-cli: address PR #314 review on burn compare

- P1: normalize ISO `--since` inputs to UTC `...mmmZ` so offset-bearing
  timestamps (e.g. `2026-05-06T00:00:00-07:00`) lex-compare correctly
  against ledger rows. New `normalize_iso_to_utc_z` parses RFC 3339
  with `Z`, `+HH:MM`, `-HH:MM`, optional fractional seconds, and
  date-only forms; truncates sub-millisecond precision. `ymd_to_days`
  added as the round-trip inverse of the existing `days_to_ymd`.
- P2: relative `--since` (e.g. `7d`, `24h`) now emits
  `YYYY-MM-DDTHH:MM:SS.000Z` rather than `...SSZ`, so same-second
  ledger rows with sub-second precision (e.g. `...12.500Z`) sort
  consistently against the cutoff.
- CodeRabbit: every `eprintln!`-then-return-exit-code branch in
  `run_inner` now returns `Err(anyhow!(...))`. The outer `run` already
  routes errors through `report_error`, so `burn --json compare ...`
  emits the documented `{"error": "..."}` envelope on stdout instead
  of plain stderr. Exit code remains 2. Messages stripped of their
  embedded `burn:` prefix since `report_error` adds one.

Tests: 11 new unit tests covering the offset, fractional-second,
date-only, lowercase-`z`, and reject-garbage cases plus a sanity check
that the canonical `.000Z` format lex-orders correctly against ledger
rows. `cargo test --workspace` and `BURN_GOLDEN=1 cargo test --test
golden -p relayburn-cli` both green.
---
 crates/relayburn-cli/src/commands/compare.rs | 371 +++++++++++++++++--
 1 file changed, 334 insertions(+), 37 deletions(-)

diff --git a/crates/relayburn-cli/src/commands/compare.rs b/crates/relayburn-cli/src/commands/compare.rs
index bdc57344..01faf896 100644
--- a/crates/relayburn-cli/src/commands/compare.rs
+++ b/crates/relayburn-cli/src/commands/compare.rs
@@ -19,7 +19,7 @@ use relayburn_sdk::{
 use serde_json::{json, Value};
 
 use crate::cli::{CompareArgs, GlobalArgs};
-use crate::render::error::{report_error, EXIT_GENERIC_ERROR};
+use crate::render::error::report_error;
 use crate::render::json::render_json;
 
 const FIDELITY_CHOICES: &[&str] = &[
@@ -39,7 +39,7 @@ const FIDELITY_ORDER: &[&str] = &[
 ];
 
 const NEEDS_MODELS_MSG: &str =
-    "burn compare: needs at least 2 models. Run `burn summary --by-provider` (or `burn summary --by-tool`) to see which models have data.";
+    "compare: needs at least 2 models. Run `burn summary --by-provider` (or `burn summary --by-tool`) to see which models have data.";
 
 const NOTE_LIMIT: usize = 8;
 const DASH: &str = "—";
@@ -53,11 +53,15 @@ pub fn run(globals: &GlobalArgs, args: CompareArgs) -> i32 {
 
 fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result<i32> {
     // 1. Parse positional models list (comma-separated, dedup, preserve order).
+    //    Argument-validation failures route through `report_error` (the outer
+    //    `run` catches the `Err` from this function) so `--json` mode emits
+    //    the documented `{"error": ...}` envelope instead of plain stderr.
+    //    `report_error` prepends `burn: ` for human stderr, so the messages
+    //    here read as the natural-language continuation (no leading `burn`).
     let raw = match args.models.as_deref() {
         Some(s) => s,
         None => {
-            eprintln!("{NEEDS_MODELS_MSG}");
-            return Ok(EXIT_GENERIC_ERROR);
+            return Err(anyhow!("{NEEDS_MODELS_MSG}"));
         }
     };
     let mut seen: BTreeSet<String> = BTreeSet::new();
@@ -72,27 +76,26 @@ fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result<i32> {
         }
     }
     if models.len() < 2 {
-        eprintln!("{NEEDS_MODELS_MSG}");
-        return Ok(EXIT_GENERIC_ERROR);
+        return Err(anyhow!("{NEEDS_MODELS_MSG}"));
     }
 
     // 2. Resolve --fidelity / --include-partial.
     let mut min_fidelity: FidelityClass = FidelityClass::UsageOnly;
     if let Some(raw) = args.fidelity.as_deref() {
         if !FIDELITY_CHOICES.contains(&raw) {
-            eprintln!(
-                "burn: invalid --fidelity: {raw} (expected one of {})",
+            return Err(anyhow!(
+                "invalid --fidelity: {raw} (expected one of {})",
                 FIDELITY_CHOICES.join(", ")
-            );
-            return Ok(EXIT_GENERIC_ERROR);
+            ));
         }
         min_fidelity = parse_fidelity(raw)?;
     }
     if args.include_partial {
         if let Some(raw) = args.fidelity.as_deref() {
             if raw != "partial" {
-                eprintln!("burn: --include-partial conflicts with --fidelity {raw}");
-                return Ok(EXIT_GENERIC_ERROR);
+                return Err(anyhow!(
+                    "--include-partial conflicts with --fidelity {raw}"
+                ));
             }
         }
         min_fidelity = FidelityClass::Partial;
@@ -102,8 +105,7 @@ fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result<i32> {
     //    per-command so the global JSON take-precedence rule in the TS CLI
     //    becomes "explicit conflict" here — same exit code, same message.
     if globals.json && args.csv {
-        eprintln!("burn: --json and --csv are mutually exclusive; pick one.");
-        return Ok(EXIT_GENERIC_ERROR);
+        return Err(anyhow!("--json and --csv are mutually exclusive; pick one."));
     }
 
     // 4. Provider filter. Surfaced as an explicit "not yet wired" error
@@ -121,8 +123,7 @@ fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result<i32> {
     // 5. min-sample.
     let min_sample = args.min_sample.unwrap_or(DEFAULT_MIN_SAMPLE);
     if min_sample < 1 {
-        eprintln!("burn: invalid --min-sample: {min_sample}");
-        return Ok(EXIT_GENERIC_ERROR);
+        return Err(anyhow!("invalid --min-sample: {min_sample}"));
     }
 
     // 6. Honor --no-archive by exporting RELAYBURN_ARCHIVE=0 for the
@@ -237,9 +238,22 @@ fn fidelity_class_str(cls: FidelityClass) -> &'static str {
     }
 }
 
-/// Normalize `--since` exactly like the SDK's free-fn would (relative
-/// `7d` → ISO Z, ISO pass-through, garbage → error). Inlined here rather
-/// than imported because the SDK helper isn't on the public surface.
+/// Normalize `--since` exactly like the TS CLI's `parseSinceArg` does:
+///
+/// - Relative ranges (`7d`, `24h`, `4w`, `30m`) → `now - delta` rendered
+///   as a fully canonical UTC ISO string with milliseconds
+///   (`YYYY-MM-DDTHH:MM:SS.mmmZ`).
+/// - ISO inputs (with or without an offset, with or without fractional
+///   seconds) get parsed and re-rendered as UTC `...Z` with milliseconds.
+///   This matters because `Ledger::query_turns` applies `since` via
+///   lexicographic comparison against stored `...mmmZ` timestamps:
+///     * an offset like `2026-05-06T00:00:00-07:00` would otherwise sort
+///       before any ledger row regardless of the actual instant, and
+///     * a no-fraction `...12Z` would sort before `...12.500Z` even
+///       though `...12.500Z` is a later instant.
+///   Re-emitting as UTC with `.000Z` (or the original sub-second
+///   precision) keeps the lex order stable against the ledger.
+/// - Garbage → error.
 fn normalize_since(since: Option<&str>) -> Result<Option<String>> {
     let Some(raw) = since else {
         return Ok(None);
@@ -260,14 +274,14 @@ fn normalize_since(since: Option<&str>) -> Result<Option<String>> {
             .map(|d| d.as_secs())
             .unwrap_or(0);
         let when = now.saturating_sub(secs_back);
-        return Ok(Some(format_iso_z(when)));
+        return Ok(Some(format_iso_z_ms(when, 0)));
     }
-    if !looks_like_iso(raw) {
-        return Err(anyhow!(
-            "invalid since: {raw} (expected ISO timestamp or relative range like 7d)"
-        ));
+    if let Some(canonical) = normalize_iso_to_utc_z(raw) {
+        return Ok(Some(canonical));
     }
-    Ok(Some(raw.to_string()))
+    Err(anyhow!(
+        "invalid since: {raw} (expected ISO timestamp or relative range like 7d)"
+    ))
 }
 
 fn parse_relative(s: &str) -> Option<(u64, char)> {
@@ -287,24 +301,182 @@ fn parse_relative(s: &str) -> Option<(u64, char)> {
     Some((n, unit))
 }
 
-fn looks_like_iso(s: &str) -> bool {
-    let b = s.as_bytes();
-    b.len() >= 10
-        && b[0..4].iter().all(|c| c.is_ascii_digit())
-        && b[4] == b'-'
-        && b[5..7].iter().all(|c| c.is_ascii_digit())
-        && b[7] == b'-'
-        && b[8..10].iter().all(|c| c.is_ascii_digit())
+/// Parse an ISO 8601 / RFC 3339 timestamp and re-emit it as a fully
+/// canonical UTC `YYYY-MM-DDTHH:MM:SS.mmmZ` string. Handles:
+///
+/// - `YYYY-MM-DD` (date-only — assumed midnight UTC).
+/// - `YYYY-MM-DDTHH:MM:SS` (offset-less — assumed UTC).
+/// - `YYYY-MM-DDTHH:MM:SS.fff` (fractional seconds, any width 1–9).
+/// - `Z` suffix.
+/// - `+HH:MM` / `-HH:MM` offsets.
+///
+/// Returns `None` for inputs that don't look ISO-shaped, so the caller
+/// can surface a usage error. Emits with millisecond precision: any
+/// sub-millisecond fractional digits are truncated, matching JS
+/// `Date.toISOString()` rounding behavior closely enough for ledger
+/// `since` lex-ordering. Whole-second inputs are widened to `.000Z`.
+fn normalize_iso_to_utc_z(s: &str) -> Option<String> {
+    let bytes = s.as_bytes();
+    if bytes.len() < 10 {
+        return None;
+    }
+    // YYYY-MM-DD prefix.
+    if !(bytes[0..4].iter().all(|c| c.is_ascii_digit())
+        && bytes[4] == b'-'
+        && bytes[5..7].iter().all(|c| c.is_ascii_digit())
+        && bytes[7] == b'-'
+        && bytes[8..10].iter().all(|c| c.is_ascii_digit()))
+    {
+        return None;
+    }
+    let year: i64 = std::str::from_utf8(&bytes[0..4]).ok()?.parse().ok()?;
+    let month: u32 = std::str::from_utf8(&bytes[5..7]).ok()?.parse().ok()?;
+    let day: u32 = std::str::from_utf8(&bytes[8..10]).ok()?.parse().ok()?;
+    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
+        return None;
+    }
+
+    // Defaults for date-only inputs.
+    let mut hour: u32 = 0;
+    let mut minute: u32 = 0;
+    let mut second: u32 = 0;
+    let mut millis: u32 = 0;
+    let mut offset_minutes: i32 = 0;
+
+    if bytes.len() > 10 {
+        // Expect a time component starting with 'T' or ' '.
+        if !(bytes[10] == b'T' || bytes[10] == b't' || bytes[10] == b' ') {
+            return None;
+        }
+        // HH:MM:SS at offsets 11..19.
+        if bytes.len() < 19 {
+            return None;
+        }
+        if !(bytes[11..13].iter().all(|c| c.is_ascii_digit())
+            && bytes[13] == b':'
+            && bytes[14..16].iter().all(|c| c.is_ascii_digit())
+            && bytes[16] == b':'
+            && bytes[17..19].iter().all(|c| c.is_ascii_digit()))
+        {
+            return None;
+        }
+        hour = std::str::from_utf8(&bytes[11..13]).ok()?.parse().ok()?;
+        minute = std::str::from_utf8(&bytes[14..16]).ok()?.parse().ok()?;
+        second = std::str::from_utf8(&bytes[17..19]).ok()?.parse().ok()?;
+        if hour > 23 || minute > 59 || second > 60 {
+            return None;
+        }
+
+        // Optional fractional seconds.
+        let mut idx = 19;
+        if idx < bytes.len() && (bytes[idx] == b'.' || bytes[idx] == b',') {
+            idx += 1;
+            let frac_start = idx;
+            while idx < bytes.len() && bytes[idx].is_ascii_digit() {
+                idx += 1;
+            }
+            if idx == frac_start {
+                return None;
+            }
+            // Truncate to milliseconds: take the first 3 digits, pad
+            // with zeros if shorter, ignore the rest.
+            let mut frac_str = String::from(std::str::from_utf8(&bytes[frac_start..idx]).ok()?);
+            if frac_str.len() > 3 {
+                frac_str.truncate(3);
+            }
+            while frac_str.len() < 3 {
+                frac_str.push('0');
+            }
+            millis = frac_str.parse().ok()?;
+        }
+
+        // Optional offset.
+        if idx < bytes.len() {
+            match bytes[idx] {
+                b'Z' | b'z' => {
+                    if idx + 1 != bytes.len() {
+                        return None;
+                    }
+                }
+                b'+' | b'-' => {
+                    let sign: i32 = if bytes[idx] == b'-' { -1 } else { 1 };
+                    idx += 1;
+                    if bytes.len() < idx + 5 {
+                        return None;
+                    }
+                    if !(bytes[idx..idx + 2].iter().all(|c| c.is_ascii_digit())
+                        && bytes[idx + 2] == b':'
+                        && bytes[idx + 3..idx + 5].iter().all(|c| c.is_ascii_digit()))
+                    {
+                        return None;
+                    }
+                    let oh: i32 = std::str::from_utf8(&bytes[idx..idx + 2])
+                        .ok()?
+                        .parse()
+                        .ok()?;
+                    let om: i32 = std::str::from_utf8(&bytes[idx + 3..idx + 5])
+                        .ok()?
+                        .parse()
+                        .ok()?;
+                    if oh > 23 || om > 59 {
+                        return None;
+                    }
+                    offset_minutes = sign * (oh * 60 + om);
+                    if idx + 5 != bytes.len() {
+                        return None;
+                    }
+                }
+                _ => return None,
+            }
+        }
+    }
+
+    // Convert (year, month, day, h, m, s, offset) → unix seconds.
+    let days = ymd_to_days(year, month, day)?;
+    let local_secs: i64 = days * 86_400 + (hour as i64) * 3_600 + (minute as i64) * 60 + (second as i64);
+    // Subtract the offset to land on UTC seconds: `local = utc + offset`,
+    // so `utc = local - offset`. Offset is in minutes.
+    let utc_secs: i64 = local_secs - (offset_minutes as i64) * 60;
+    Some(format_iso_z_ms_signed(utc_secs, millis))
+}
+
+/// Format Unix-seconds as `YYYY-MM-DDTHH:MM:SS.mmmZ`. Always emits the
+/// milliseconds component so the resulting string sorts correctly against
+/// ledger rows that always carry sub-second precision.
+fn format_iso_z_ms(secs: u64, millis: u32) -> String {
+    format_iso_z_ms_signed(secs as i64, millis)
 }
 
-fn format_iso_z(secs: u64) -> String {
-    let total_days = (secs / 86_400) as i64;
-    let secs_in_day = (secs % 86_400) as u32;
+fn format_iso_z_ms_signed(secs: i64, millis: u32) -> String {
+    // `secs` may be negative for pre-1970 timestamps — split into a
+    // floored day count and a non-negative seconds-in-day remainder so
+    // the formatting math doesn't have to care about sign.
+    let total_days = secs.div_euclid(86_400);
+    let secs_in_day = secs.rem_euclid(86_400) as u32;
     let hour = secs_in_day / 3_600;
     let minute = (secs_in_day / 60) % 60;
     let second = secs_in_day % 60;
     let (year, month, day) = days_to_ymd(total_days);
-    format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z")
+    format!(
+        "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z"
+    )
+}
+
+/// Civil-date → days-from-Unix-epoch (Howard Hinnant's algorithm,
+/// proleptic Gregorian). Inverse of [`days_to_ymd`].
+fn ymd_to_days(year: i64, month: u32, day: u32) -> Option<i64> {
+    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
+        return None;
+    }
+    let m = month as i64;
+    let d = day as i64;
+    let y = if m <= 2 { year - 1 } else { year };
+    let era = if y >= 0 { y } else { y - 399 } / 400;
+    let yoe = (y - era * 400) as u64; // [0, 399]
+    let mp = if m > 2 { m - 3 } else { m + 9 } as u64; // [0, 11]
+    let doy = (153 * mp + 2) / 5 + (d as u64) - 1; // [0, 365]
+    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; // [0, 146096]
+    Some(era * 146_097 + (doe as i64) - 719_468)
 }
 
 fn days_to_ymd(days_from_epoch: i64) -> (i64, u32, u32) {
@@ -1051,4 +1223,129 @@ mod tests {
         assert_eq!(display_model_name("anthropic/claude-sonnet-4-6"), "claude-sonnet-4-6");
         assert_eq!(display_model_name("claude-haiku-4-5"), "claude-haiku-4-5");
     }
+
+    // -------------------------------------------------------------------
+    // Codex P1 / P2: ISO normalization + relative-range millisecond
+    // padding.  Both bugs surface as ledger lex-order skews: the ledger
+    // stores rows with `...mmmZ` precision, so a `since` that doesn't
+    // match that shape gets compared as the wrong instant.
+    // -------------------------------------------------------------------
+
+    #[test]
+    fn normalize_iso_widens_no_fraction_to_three_zeros() {
+        // P2 root cause: same-second ledger row `...12.500Z` would sort
+        // *before* a `--since` cutoff of `...12Z`, dropping valid turns.
+        // Normalizing widens to `.000Z` so the cutoff is the lower bound
+        // for that second.
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06T00:00:00Z"),
+            Some("2026-05-06T00:00:00.000Z".to_string()),
+        );
+    }
+
+    #[test]
+    fn normalize_iso_preserves_millisecond_precision() {
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06T00:00:00.500Z"),
+            Some("2026-05-06T00:00:00.500Z".to_string()),
+        );
+        // Sub-millisecond digits are truncated to 3 (matches the ledger
+        // shape; mirrors `Date.toISOString()` truncation closely enough
+        // for `since`-cutoff lex ordering).
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06T00:00:00.500999Z"),
+            Some("2026-05-06T00:00:00.500Z".to_string()),
+        );
+        // Shorter fraction is right-padded.
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06T00:00:00.5Z"),
+            Some("2026-05-06T00:00:00.500Z".to_string()),
+        );
+    }
+
+    #[test]
+    fn normalize_iso_converts_negative_offset_to_utc() {
+        // P1 root cause: `-07:00` is 7h *behind* UTC, so the same
+        // wall-clock time corresponds to a UTC instant 7h *later*.
+        // 2026-05-06T00:00:00-07:00 == 2026-05-06T07:00:00Z.
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06T00:00:00-07:00"),
+            Some("2026-05-06T07:00:00.000Z".to_string()),
+        );
+    }
+
+    #[test]
+    fn normalize_iso_converts_positive_offset_to_utc() {
+        // 2026-05-06T00:00:00+09:00 == 2026-05-05T15:00:00Z.
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06T00:00:00+09:00"),
+            Some("2026-05-05T15:00:00.000Z".to_string()),
+        );
+    }
+
+    #[test]
+    fn normalize_iso_handles_lowercase_z() {
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06t00:00:00.500z"),
+            Some("2026-05-06T00:00:00.500Z".to_string()),
+        );
+    }
+
+    #[test]
+    fn normalize_iso_accepts_date_only() {
+        // Date-only input: no time component → midnight UTC.
+        assert_eq!(
+            normalize_iso_to_utc_z("2026-05-06"),
+            Some("2026-05-06T00:00:00.000Z".to_string()),
+        );
+    }
+
+    #[test]
+    fn normalize_iso_rejects_garbage() {
+        assert_eq!(normalize_iso_to_utc_z("not a date"), None);
+        assert_eq!(normalize_iso_to_utc_z("2026/05/06"), None);
+        assert_eq!(normalize_iso_to_utc_z("2026-13-01T00:00:00Z"), None); // bad month
+        assert_eq!(normalize_iso_to_utc_z("2026-05-06T25:00:00Z"), None); // bad hour
+        assert_eq!(normalize_iso_to_utc_z("2026-05-06T00:00:00+9"), None); // malformed offset
+    }
+
+    #[test]
+    fn normalize_since_relative_emits_milliseconds() {
+        // P2: relative range output must carry the `.000Z` fragment so
+        // ledger rows with sub-second precision sort correctly against
+        // the cutoff. We can't pin the absolute value (depends on `now`),
+        // but we can assert the shape.
+        let out = normalize_since(Some("7d")).unwrap().unwrap();
+        assert!(out.ends_with(".000Z"), "expected .000Z suffix in {out}");
+        assert_eq!(out.len(), 24, "expected 24-char canonical shape: {out}");
+    }
+
+    #[test]
+    fn normalize_since_iso_pass_normalizes_offset() {
+        let out = normalize_since(Some("2026-05-06T00:00:00-07:00"))
+            .unwrap()
+            .unwrap();
+        assert_eq!(out, "2026-05-06T07:00:00.000Z");
+    }
+
+    #[test]
+    fn normalize_since_relative_format_is_lex_compatible_with_ledger_rows() {
+        // Sanity check: a canonical `.000Z` cutoff must lex *before* the
+        // same-second ledger row carrying any non-zero millisecond
+        // suffix. This is the property the bug was breaking.
+        let cutoff = "2026-05-06T12:00:00.000Z";
+        let row_a = "2026-05-06T12:00:00.500Z";
+        let row_b = "2026-05-06T12:00:00.001Z";
+        assert!(cutoff <= row_a);
+        assert!(cutoff <= row_b);
+    }
+
+    #[test]
+    fn ymd_round_trip() {
+        for (y, m, d) in &[(1970, 1, 1), (2026, 5, 6), (2000, 2, 29), (1999, 12, 31)] {
+            let days = ymd_to_days(*y, *m, *d).unwrap();
+            let (ry, rm, rd) = days_to_ymd(days);
+            assert_eq!((*y, *m, *d), (ry, rm, rd));
+        }
+    }
 }