From 459aec276e29a4361ec91b32843f45d0400fcafd Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Wed, 6 May 2026 10:41:48 -0400 Subject: [PATCH 1/2] relayburn-cli: burn compare presenter (#248 D3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire `burn compare ` as a presenter over relayburn_sdk::analyze::compare's building blocks (build_compare_table + the per-turn fidelity gate); the SDK keeps the heavy lifting so a future burn__compare MCP tool wraps the same call. The CLI flag set mirrors the TS path 1:1 — positional comma-separated model list, --include-partial / --fidelity / --since / --project / --session / --workflow / --agent / --provider / --min-sample / --csv / --no-archive / --json — and the wire shape (cells ordering, rounding rules, fidelity-summary key order) goes byte-for-byte with the TS-captured cli-golden snapshots. --workflow / --agent / --provider short-circuit with a typed "not yet wired" error since the Rust ledger query layer doesn't expose the enrichment / provider filter today; the rest of the surface is fully wired. The two compare entries in tests/fixtures/cli-golden/invocations.json flip to enabled: true so BURN_GOLDEN=1 cargo test --test golden enforces parity. Because the on-disk fixture is a TS-style JSONL ledger and the Rust SDK reads SQLite, the golden runner now materializes a sibling SQLite home from the JSONL once per test run and points the binary at it; the import is loose-typed on tool_result_event / relationship / stamp so a fixture extension surfaces a stderr breadcrumb instead of panicking the whole runner. Smoke test loses `compare` from its "stub exits 1" loop and gains an explicit "rejects missing models" assertion against the wired path. --- CHANGELOG.md | 1 + crates/relayburn-cli/src/cli.rs | 73 +- crates/relayburn-cli/src/commands/compare.rs | 1058 +++++++++++++++++- crates/relayburn-cli/src/main.rs | 2 +- crates/relayburn-cli/tests/golden.rs | 147 ++- crates/relayburn-cli/tests/smoke.rs | 30 +- crates/relayburn-sdk/src/lib.rs | 20 +- tests/fixtures/cli-golden/invocations.json | 4 +- 8 files changed, 1311 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 599e68c1..f62c2f06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Cross-package release notes for relayburn. Package changelogs contain package-le ## [Unreleased] +- `relayburn-cli` (Rust): wire `burn compare` as a presenter over `relayburn_sdk::analyze::compare` building blocks (`build_compare_table` + the per-turn fidelity gate), matching the TS CLI flag set (positional comma-separated model list, `--include-partial` / `--fidelity` / `--since` / `--project` / `--session` / `--min-sample` / `--csv` / `--no-archive`) and producing byte-equivalent stdout for the cli-golden `compare` / `compare-json` invocations. (#248 D3) - `relayburn-sdk-node` (Rust): napi-rs bindings skeleton — `#[napi]` shims for every public verb in `relayburn-sdk` (`summary`, `sessionCost`, `overhead`, `overheadTrim`, `hotspots`, `search`, `exportLedger`, `exportStamps`, async `ingest`, plus `ledgerOpen`), with u64 token counts surfaced as JS `BigInt`, ISO-8601 timestamps as `String`, async verbs returning `Promise`, and a typed `BurnError` mapping for SDK failures. (#247) - `relayburn-cli` (Rust): introduce the harness substrate — `HarnessAdapter` trait, lazy compile-time `phf` registry (`lookup` / `list_harness_names`), and the shared `pending_stamp::adapter` factory codex + opencode will reuse. Adapter slots in the registry are reserved but empty pending the Wave 2 PRs (#248-d/e/f). `relayburn-sdk` re-exports `start_watch_loop`, `WatchController`, `write_pending_stamp`, `PendingStampHarness`, and friends so the CLI doesn't have to reach into private SDK modules. (#248) - `relayburn-cli` (Rust): scaffold the clap v4 derive root with global `--json` / `--ledger-path` / `--no-color` flags, eight stub subcommands (`summary`, `hotspots`, `overhead`, `compare`, `run`, `state`, `ingest`, `mcp-server`), and shared `render::{table,json,error}` helpers. Stubs exit `1` with a `not yet implemented` message (or a `{"error": …}` envelope under `--json`); Wave 2 fan-out PRs replace each stub with a thin presenter over `relayburn-sdk`. (#248 part a) diff --git a/crates/relayburn-cli/src/cli.rs b/crates/relayburn-cli/src/cli.rs index cf2c522e..f01549a6 100644 --- a/crates/relayburn-cli/src/cli.rs +++ b/crates/relayburn-cli/src/cli.rs @@ -17,7 +17,7 @@ use std::path::PathBuf; -use clap::{Parser, Subcommand}; +use clap::{Args as ClapArgs, Parser, Subcommand}; /// Parsed top-level argv — what every command handler receives via /// [`Args::globals`]. @@ -105,7 +105,7 @@ pub enum Command { Overhead, /// Compare cost across two or more models on the same workload. - Compare, + Compare(CompareArgs), /// Run an agent CLI under a harness wrapper that ingests its /// session log on exit. @@ -122,3 +122,72 @@ pub enum Command { #[command(name = "mcp-server")] McpServer, } + +/// Per-command flag set for `burn compare`. Mirrors +/// `packages/cli/src/commands/compare.ts` so the CLI surfaces match +/// byte-for-byte; see that file for the canonical help text. +/// +/// The first positional argument is a comma-separated model list +/// (`claude-sonnet-4-6,claude-haiku-4-5`). The presenter rejects fewer +/// than two distinct models with exit code 2 and a stderr message; this +/// is enforced at runtime rather than by clap so we get the same error +/// message shape as the TS CLI (`burn compare: needs at least 2 +/// models...`). +#[derive(Debug, Clone, ClapArgs)] +pub struct CompareArgs { + /// Comma-separated model list (e.g. `claude-sonnet-4-6,claude-haiku-4-5`). + /// Required at runtime — see the struct doc comment for the + /// minimum-models contract. + #[arg(value_name = "MODELS")] + pub models: Option, + + /// Comma-separated list of effective providers to include + /// (e.g. `synthetic,anthropic,openai`). + #[arg(long, value_name = "LIST")] + pub provider: Option, + + /// Relative range (e.g. `24h`, `7d`, `4w`) or ISO timestamp. + /// Defaults to all time. + #[arg(long, value_name = "WHEN")] + pub since: Option, + + /// Filter by project path or git-canonical projectKey. + #[arg(long, value_name = "PATH")] + pub project: Option, + + /// Filter by sessionId. + #[arg(long, value_name = "ID")] + pub session: Option, + + /// Filter by stamped workflowId. + #[arg(long, value_name = "ID")] + pub workflow: Option, + + /// Filter by stamped agentId. + #[arg(long, value_name = "ID")] + pub agent: Option, + + /// Insufficient-sample threshold; cells below this get flagged in + /// the coverage-notes block. Default 5. + #[arg(long = "min-sample", value_name = "N")] + pub min_sample: Option, + + /// Minimum fidelity class to include + /// (`full | usage-only | aggregate-only | cost-only | partial`). + /// Default `usage-only`. + #[arg(long, value_name = "CLASS")] + pub fidelity: Option, + + /// Shorthand for `--fidelity partial`. + #[arg(long = "include-partial")] + pub include_partial: bool, + + /// Emit a stable CSV with one row per (model, category) pair. + #[arg(long)] + pub csv: bool, + + /// Bypass the SQLite archive and stream the ledger directly. + /// Honored when env `RELAYBURN_ARCHIVE=0`. + #[arg(long = "no-archive")] + pub no_archive: bool, +} diff --git a/crates/relayburn-cli/src/commands/compare.rs b/crates/relayburn-cli/src/commands/compare.rs index 1ab513d6..bdc57344 100644 --- a/crates/relayburn-cli/src/commands/compare.rs +++ b/crates/relayburn-cli/src/commands/compare.rs @@ -1,12 +1,1054 @@ -//! `burn compare ` — compare cost across two or -//! more models on the same workload. +//! `burn compare ` — per-(model, activity) cost +//! comparison table. Thin presenter over the +//! `relayburn_sdk::analyze::compare` building blocks (`build_compare_table` +//! plus `compare_from_archive`); the heavy lifting lives in the SDK so the +//! MCP server can reuse it. //! -//! Stub. Wave 2 D3 wires this up as a thin presenter over -//! `relayburn_sdk::compare`. TS source of truth: -//! `packages/cli/src/commands/compare.ts`. +//! TS source of truth: `packages/cli/src/commands/compare.ts`. The wire +//! shape (cells ordering, rounding rules, fidelity-summary key order) +//! mirrors that file byte-for-byte against the cli-golden snapshot. -use crate::cli::GlobalArgs; +use std::collections::BTreeSet; -pub fn run(globals: &GlobalArgs) -> i32 { - super::not_yet_implemented("compare", globals) +use anyhow::{anyhow, Result}; +use relayburn_sdk::{ + build_compare_table, has_minimum_fidelity, load_pricing, summarize_fidelity, CompareCell, + CompareOptions, CompareTable, EnrichedTurn, FidelityClass, FidelitySummary, Ledger, + LedgerOpenOptions, Query, UsageGranularity, DEFAULT_MIN_SAMPLE, +}; +use serde_json::{json, Value}; + +use crate::cli::{CompareArgs, GlobalArgs}; +use crate::render::error::{report_error, EXIT_GENERIC_ERROR}; +use crate::render::json::render_json; + +const FIDELITY_CHOICES: &[&str] = &[ + "full", + "usage-only", + "aggregate-only", + "cost-only", + "partial", +]; + +const FIDELITY_ORDER: &[&str] = &[ + "cost-only", + "aggregate-only", + "partial", + "usage-only", + "full", +]; + +const NEEDS_MODELS_MSG: &str = + "burn compare: needs at least 2 models. Run `burn summary --by-provider` (or `burn summary --by-tool`) to see which models have data."; + +const NOTE_LIMIT: usize = 8; +const DASH: &str = "—"; + +pub fn run(globals: &GlobalArgs, args: CompareArgs) -> i32 { + match run_inner(globals, args) { + Ok(code) => code, + Err(e) => report_error(&e, globals), + } +} + +fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result { + // 1. Parse positional models list (comma-separated, dedup, preserve order). + let raw = match args.models.as_deref() { + Some(s) => s, + None => { + eprintln!("{NEEDS_MODELS_MSG}"); + return Ok(EXIT_GENERIC_ERROR); + } + }; + let mut seen: BTreeSet = BTreeSet::new(); + let mut models: Vec = Vec::new(); + for part in raw.split(',') { + let m = part.trim(); + if m.is_empty() { + continue; + } + if seen.insert(m.to_string()) { + models.push(m.to_string()); + } + } + if models.len() < 2 { + eprintln!("{NEEDS_MODELS_MSG}"); + return Ok(EXIT_GENERIC_ERROR); + } + + // 2. Resolve --fidelity / --include-partial. + let mut min_fidelity: FidelityClass = FidelityClass::UsageOnly; + if let Some(raw) = args.fidelity.as_deref() { + if !FIDELITY_CHOICES.contains(&raw) { + eprintln!( + "burn: invalid --fidelity: {raw} (expected one of {})", + FIDELITY_CHOICES.join(", ") + ); + return Ok(EXIT_GENERIC_ERROR); + } + min_fidelity = parse_fidelity(raw)?; + } + if args.include_partial { + if let Some(raw) = args.fidelity.as_deref() { + if raw != "partial" { + eprintln!("burn: --include-partial conflicts with --fidelity {raw}"); + return Ok(EXIT_GENERIC_ERROR); + } + } + min_fidelity = FidelityClass::Partial; + } + + // 3. JSON / CSV mutual exclusion. `--json` is a global flag; `--csv` is + // per-command so the global JSON take-precedence rule in the TS CLI + // becomes "explicit conflict" here — same exit code, same message. + if globals.json && args.csv { + eprintln!("burn: --json and --csv are mutually exclusive; pick one."); + return Ok(EXIT_GENERIC_ERROR); + } + + // 4. Provider filter. Surfaced as an explicit "not yet wired" error + // rather than a silent no-op — the SDK's provider filter is private + // to the analyze module today, and exposing it through a typed + // top-level surface is part of the broader provider-classifier + // follow-up. The cli-golden corpus exercises compare without a + // provider filter, so this is unblocked for parity. + if args.provider.is_some() { + return Err(anyhow!( + "burn compare: --provider filter is not yet wired through the Rust SDK (#246 follow-up)" + )); + } + + // 5. min-sample. + let min_sample = args.min_sample.unwrap_or(DEFAULT_MIN_SAMPLE); + if min_sample < 1 { + eprintln!("burn: invalid --min-sample: {min_sample}"); + return Ok(EXIT_GENERIC_ERROR); + } + + // 6. Honor --no-archive by exporting RELAYBURN_ARCHIVE=0 for the + // duration of this call. The Rust SDK doesn't read RELAYBURN_ARCHIVE + // today (it's SQLite-only), but we set the env so any future archive + // layer behaves identically to the TS CLI's `--no-archive`. + let _archive_guard = ArchiveOverride::activate(args.no_archive); + + // 7. Build the Query. + let mut q = Query::default(); + if let Some(s) = normalize_since(args.since.as_deref())? { + q.since = Some(s); + } + if let Some(p) = args.project.as_deref() { + q.project = Some(p.to_string()); + } + if let Some(s) = args.session.as_deref() { + q.session_id = Some(s.to_string()); + } + // `workflow` / `agent` flow through the stamp-based enrichment filter + // which the Rust ledger query layer doesn't yet expose. Surface the + // gap explicitly rather than silently dropping the flag — when the + // ledger gains enrichment-filter support, this branch comes out. + if args.workflow.is_some() || args.agent.is_some() { + return Err(anyhow!( + "burn compare: --workflow / --agent filters are not yet wired through the Rust ledger query (#246 follow-up)" + )); + } + + // 8. Open ledger and walk turns. + let ledger_opts = match globals.ledger_path.as_deref() { + Some(p) => LedgerOpenOptions::with_home(p), + None => LedgerOpenOptions::default(), + }; + let handle = Ledger::open(ledger_opts)?; + let queried_turns: Vec = handle.raw().query_turns(&q)?; + + // 9. Provider filter is rejected up-front (see step 4). Pipeline + // treats every queried turn as eligible. + let filtered_by_provider: Vec = queried_turns; + + // 10. Fidelity summary is computed BEFORE the fidelity gate so the + // `summary` block in the JSON envelope reflects the queried slice. + let fidelity_summary = summarize_fidelity( + &filtered_by_provider + .iter() + .map(|et| et.turn.clone()) + .collect::>(), + ); + let filtered_turns: Vec = if matches!(min_fidelity, FidelityClass::Partial) { + filtered_by_provider + } else { + filtered_by_provider + .into_iter() + .filter(|et| has_minimum_fidelity(et.turn.fidelity.as_ref(), min_fidelity)) + .collect() + }; + let analyzed_turns = filtered_turns.len(); + + // 11. Build the compare table. + let pricing = load_pricing(None); + let opts = CompareOptions { + pricing: &pricing, + models: Some(models.clone()), + min_sample: Some(min_sample), + }; + let table = build_compare_table(&filtered_turns, &opts); + + // 12. Render. + if globals.json { + let v = build_json(&table, analyzed_turns, min_fidelity, &fidelity_summary); + render_json(&v)?; + return Ok(0); + } + if args.csv { + let csv = render_csv(&table); + print!("{csv}"); + return Ok(0); + } + let tty = render_tty( + &table, + analyzed_turns, + min_fidelity, + &fidelity_summary, + ); + print!("{tty}"); + Ok(0) +} + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +fn parse_fidelity(s: &str) -> Result { + match s { + "full" => Ok(FidelityClass::Full), + "usage-only" => Ok(FidelityClass::UsageOnly), + "aggregate-only" => Ok(FidelityClass::AggregateOnly), + "cost-only" => Ok(FidelityClass::CostOnly), + "partial" => Ok(FidelityClass::Partial), + other => Err(anyhow!("invalid fidelity class: {other}")), + } +} + +fn fidelity_class_str(cls: FidelityClass) -> &'static str { + match cls { + FidelityClass::Full => "full", + FidelityClass::UsageOnly => "usage-only", + FidelityClass::AggregateOnly => "aggregate-only", + FidelityClass::CostOnly => "cost-only", + FidelityClass::Partial => "partial", + } +} + +/// Normalize `--since` exactly like the SDK's free-fn would (relative +/// `7d` → ISO Z, ISO pass-through, garbage → error). Inlined here rather +/// than imported because the SDK helper isn't on the public surface. +fn normalize_since(since: Option<&str>) -> Result> { + let Some(raw) = since else { + return Ok(None); + }; + if raw.is_empty() { + return Ok(None); + } + if let Some((n, unit)) = parse_relative(raw) { + let secs_back = match unit { + 'h' => n * 3_600, + 'd' => n * 86_400, + 'w' => n * 7 * 86_400, + 'm' => n * 30 * 86_400, + _ => unreachable!(), + }; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let when = now.saturating_sub(secs_back); + return Ok(Some(format_iso_z(when))); + } + if !looks_like_iso(raw) { + return Err(anyhow!( + "invalid since: {raw} (expected ISO timestamp or relative range like 7d)" + )); + } + Ok(Some(raw.to_string())) +} + +fn parse_relative(s: &str) -> Option<(u64, char)> { + let bytes = s.as_bytes(); + if bytes.len() < 2 { + return None; + } + let unit = bytes[bytes.len() - 1] as char; + if !matches!(unit, 'h' | 'd' | 'w' | 'm') { + return None; + } + let num = &s[..s.len() - 1]; + if num.is_empty() || !num.bytes().all(|b| b.is_ascii_digit()) { + return None; + } + let n: u64 = num.parse().ok()?; + Some((n, unit)) +} + +fn looks_like_iso(s: &str) -> bool { + let b = s.as_bytes(); + b.len() >= 10 + && b[0..4].iter().all(|c| c.is_ascii_digit()) + && b[4] == b'-' + && b[5..7].iter().all(|c| c.is_ascii_digit()) + && b[7] == b'-' + && b[8..10].iter().all(|c| c.is_ascii_digit()) +} + +fn format_iso_z(secs: u64) -> String { + let total_days = (secs / 86_400) as i64; + let secs_in_day = (secs % 86_400) as u32; + let hour = secs_in_day / 3_600; + let minute = (secs_in_day / 60) % 60; + let second = secs_in_day % 60; + let (year, month, day) = days_to_ymd(total_days); + format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z") +} + +fn days_to_ymd(days_from_epoch: i64) -> (i64, u32, u32) { + let z = days_from_epoch + 719_468; + let era = if z >= 0 { z } else { z - 146_096 } / 146_097; + let doe = (z - era * 146_097) as u64; + let yoe = (doe - doe / 1_460 + doe / 36_524 - doe / 146_096) / 365; + let y = yoe as i64 + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let year = if m <= 2 { y + 1 } else { y }; + (year, m as u32, d as u32) +} + +/// Drop-in for `RELAYBURN_ARCHIVE=0`. Restores the previous value on +/// drop so a panic part-way through doesn't leak the override. +struct ArchiveOverride { + previous: Option, + activated: bool, +} + +impl ArchiveOverride { + fn activate(no_archive: bool) -> Self { + if !no_archive { + return Self { + previous: None, + activated: false, + }; + } + let previous = std::env::var("RELAYBURN_ARCHIVE").ok(); + std::env::set_var("RELAYBURN_ARCHIVE", "0"); + Self { + previous, + activated: true, + } + } +} + +impl Drop for ArchiveOverride { + fn drop(&mut self) { + if !self.activated { + return; + } + match self.previous.take() { + Some(v) => std::env::set_var("RELAYBURN_ARCHIVE", v), + None => std::env::remove_var("RELAYBURN_ARCHIVE"), + } + } +} + +// --------------------------------------------------------------------------- +// number formatting (matches packages/cli/src/format.ts) +// --------------------------------------------------------------------------- + +fn format_usd(n: f64) -> String { + if n == 0.0 { + return "$0.00".to_string(); + } + if n < 0.01 { + return format!("${}", to_fixed_raw(n, 4)); + } + if n < 1.0 { + return format!("${}", to_fixed_raw(n, 3)); + } + format!("${}", to_fixed_raw(n, 2)) +} + +/// Mirror JS `n.toFixed(d)` — keeps trailing zeros (so 1.5 with digits=2 +/// becomes "1.50"). Use this for human-readable output where the +/// fixed-width column matters; use [`to_fixed`] for JSON-bound values +/// that go through `Number(...).toString()` semantics. +fn to_fixed_raw(n: f64, digits: usize) -> String { + format!("{n:.*}", digits) +} + +fn format_int(n: u64) -> String { + // `toLocaleString('en-US')` thousands grouping. JS uses `,`. The + // golden corpus values are small (≤ 7) so the comma path isn't hit + // by the snapshot, but we implement it anyway for parity. + let s = n.to_string(); + let bytes = s.as_bytes(); + let mut out = String::with_capacity(s.len() + s.len() / 3); + let len = bytes.len(); + for (i, b) in bytes.iter().enumerate() { + if i > 0 && (len - i).is_multiple_of(3) { + out.push(','); + } + out.push(*b as char); + } + out +} + +fn format_pct(rate: f64) -> String { + // `Math.round(p * 100)` — round half to even on f64; matches JS for + // the corpus we compare against (the `Math.round` half-to-even + // exception is below the 1e-9 precision we care about here). + let pct = (rate * 100.0).round() as i64; + format!("{pct}%") +} + +/// `Number(n.toFixed(d))` — produce the shortest decimal string for the +/// rounded value. Drops trailing zeros, mirroring JS `Number(...).toString()`. +fn to_fixed(n: f64, digits: usize) -> String { + let s = format!("{n:.*}", digits); + // For "0.00" / "1.00" → strip the trailing zeros, but keep at least + // the integer part. Mirrors JS: `Number("1.00").toString() === "1"`. + trim_trailing_zeros(&s) +} + +fn trim_trailing_zeros(s: &str) -> String { + if !s.contains('.') { + return s.to_string(); + } + let trimmed = s.trim_end_matches('0').trim_end_matches('.'); + if trimmed.is_empty() || trimmed == "-" { + "0".to_string() + } else { + trimmed.to_string() + } +} + +// --------------------------------------------------------------------------- +// rounding for JSON output (Number(n.toFixed(d))) +// --------------------------------------------------------------------------- + +/// JSON-friendly rounded number. Returns a `serde_json::Value::Number` +/// that prints without trailing zeros — matches `JSON.stringify(Number(n.toFixed(d)))`. +/// Whole-number results render as integers (`1`, not `1.0`); fractional +/// results render as the shortest decimal needed. +fn round_json(n: f64, digits: usize) -> Value { + let s = format!("{n:.*}", digits); + let parsed: f64 = s.parse().unwrap_or(0.0); + f64_to_json(parsed) +} + +/// Serialize an f64 with JS `JSON.stringify` semantics: integral values +/// render as integers, fractional values render via Ryu. +fn f64_to_json(n: f64) -> Value { + if n.is_nan() || n.is_infinite() { + // Match JS: NaN / Infinity become `null` in JSON. + return Value::Null; + } + if n == 0.0 { + // Both +0.0 and -0.0 become 0. + return Value::from(0u64); + } + if n.fract() == 0.0 && n.abs() < (i64::MAX as f64) { + return Value::from(n as i64); + } + // `serde_json::Number::from_f64` always emits a JSON number; the + // pretty-printer uses Ryu's shortest representation for finite f64. + Value::from(n) +} + +/// Like `f64_to_json` but for `Option` — `None` → `null`. +fn opt_f64_to_json(n: Option) -> Value { + match n { + Some(v) => f64_to_json(v), + None => Value::Null, + } +} + +/// Like `round_json` but for `Option`. +fn round_opt(n: Option, digits: usize) -> Value { + match n { + Some(v) => round_json(v, digits), + None => Value::Null, + } +} + +// --------------------------------------------------------------------------- +// CompareExcludedBreakdown +// --------------------------------------------------------------------------- + +#[derive(Default)] +struct ExcludedBreakdown { + total: u64, + aggregate_only: u64, + cost_only: u64, + partial: u64, + usage_only: u64, +} + +fn compute_excluded(summary: &FidelitySummary, minimum: FidelityClass) -> ExcludedBreakdown { + let mut out = ExcludedBreakdown::default(); + if matches!(minimum, FidelityClass::Partial) { + return out; + } + let need = FIDELITY_ORDER + .iter() + .position(|c| *c == fidelity_class_str(minimum)) + .unwrap_or(0); + for (i, key) in FIDELITY_ORDER.iter().enumerate() { + if i >= need { + continue; + } + let cls = parse_fidelity(key).unwrap(); + let n = summary.by_class.get(&cls).copied().unwrap_or(0); + if n == 0 { + continue; + } + out.total += n; + match *key { + "aggregate-only" => out.aggregate_only += n, + "cost-only" => out.cost_only += n, + "partial" => out.partial += n, + "usage-only" => out.usage_only += n, + _ => {} + } + } + out +} + +// --------------------------------------------------------------------------- +// JSON envelope +// --------------------------------------------------------------------------- + +fn build_json( + table: &CompareTable, + analyzed_turns: usize, + minimum: FidelityClass, + summary: &FidelitySummary, +) -> Value { + let excluded = compute_excluded(summary, minimum); + // Cells in (model × category) iteration order; matches the TS + // `for m of models / for cat of categories` walk. + let mut cells: Vec = Vec::with_capacity(table.models.len() * table.categories.len()); + for m in &table.models { + for cat in &table.categories { + let c = table + .cells + .get(m) + .and_then(|by_cat| by_cat.get(cat)) + .cloned() + .unwrap_or_else(empty_cell); + cells.push(json!({ + "model": m, + "category": cat, + "turns": c.turns, + "editTurns": c.edit_turns, + "oneShotTurns": c.one_shot_turns, + "pricedTurns": c.priced_turns, + "totalCost": round_json(c.total_cost, 6), + "costPerTurn": round_opt(c.cost_per_turn, 6), + "oneShotRate": round_opt(c.one_shot_rate, 4), + "cacheHitRate": round_opt(c.cache_hit_rate, 4), + "medianRetries": opt_f64_to_json(c.median_retries), + "noData": c.no_data, + "insufficientSample": c.insufficient_sample, + })); + } + } + + // `totals` keys must come out in `models` order (the TS `Object` + // preserves insertion order). Build with a serde_json::Map so the + // `preserve_order` feature on serde_json keeps insertion order. + let mut totals = serde_json::Map::new(); + for m in &table.models { + let totals_for = table.totals.get(m).cloned().unwrap_or_default(); + totals.insert( + m.clone(), + json!({ + "turns": totals_for.turns, + "totalCost": f64_to_json(totals_for.total_cost), + }), + ); + } + + json!({ + "analyzedTurns": analyzed_turns, + "minSample": table.min_sample, + "models": &table.models, + "categories": &table.categories, + "totals": Value::Object(totals), + "cells": cells, + "fidelity": { + "minimum": fidelity_class_str(minimum), + "excluded": { + "total": excluded.total, + "aggregateOnly": excluded.aggregate_only, + "costOnly": excluded.cost_only, + "partial": excluded.partial, + "usageOnly": excluded.usage_only, + }, + "summary": fidelity_summary_to_value(summary), + } + }) +} + +/// Build the fidelity-summary JSON sub-object with the same key order +/// the TS path emits (literal `{ full, usage-only, aggregate-only, +/// cost-only, partial }` order, preserved via serde_json's +/// `preserve_order` feature). +fn fidelity_summary_to_value(s: &FidelitySummary) -> Value { + let mut by_class = serde_json::Map::new(); + for key in &["full", "usage-only", "aggregate-only", "cost-only", "partial"] { + let cls = parse_fidelity(key).unwrap(); + let n = s.by_class.get(&cls).copied().unwrap_or(0); + by_class.insert((*key).to_string(), Value::from(n)); + } + let mut by_granularity = serde_json::Map::new(); + for key in &["per-turn", "per-message", "per-session-aggregate", "cost-only"] { + let g = match *key { + "per-turn" => UsageGranularity::PerTurn, + "per-message" => UsageGranularity::PerMessage, + "per-session-aggregate" => UsageGranularity::PerSessionAggregate, + "cost-only" => UsageGranularity::CostOnly, + _ => unreachable!(), + }; + let n = s.by_granularity.get(&g).copied().unwrap_or(0); + by_granularity.insert((*key).to_string(), Value::from(n)); + } + // missingCoverage: keys are camelCase; iterate in the same fixed order + // the TS `emptyFidelitySummary()` literal uses so JSON shape is stable. + let coverage_keys = &[ + "hasInputTokens", + "hasOutputTokens", + "hasReasoningTokens", + "hasCacheReadTokens", + "hasCacheCreateTokens", + "hasToolCalls", + "hasToolResultEvents", + "hasSessionRelationships", + "hasRawContent", + ]; + let mut missing = serde_json::Map::new(); + for k in coverage_keys { + let n = s.missing_coverage.get(*k).copied().unwrap_or(0); + missing.insert((*k).to_string(), Value::from(n)); + } + + let mut out = serde_json::Map::new(); + out.insert("total".to_string(), Value::from(s.total)); + out.insert("byClass".to_string(), Value::Object(by_class)); + out.insert("byGranularity".to_string(), Value::Object(by_granularity)); + out.insert("missingCoverage".to_string(), Value::Object(missing)); + out.insert("unknown".to_string(), Value::from(s.unknown)); + Value::Object(out) +} + +fn empty_cell() -> CompareCell { + CompareCell { + turns: 0, + edit_turns: 0, + one_shot_turns: 0, + priced_turns: 0, + total_cost: 0.0, + cost_per_turn: None, + one_shot_rate: None, + cache_hit_rate: None, + median_retries: None, + no_data: true, + insufficient_sample: false, + } +} + +// --------------------------------------------------------------------------- +// CSV +// --------------------------------------------------------------------------- + +fn render_csv(table: &CompareTable) -> String { + let header = [ + "model", + "category", + "turns", + "editTurns", + "oneShotTurns", + "pricedTurns", + "totalCost", + "costPerTurn", + "oneShotRate", + "cacheHitRate", + "medianRetries", + "noData", + "insufficientSample", + ]; + let mut rows: Vec = Vec::new(); + rows.push(header.join(",")); + for m in &table.models { + for cat in &table.categories { + let c = table + .cells + .get(m) + .and_then(|by_cat| by_cat.get(cat)) + .cloned() + .unwrap_or_else(empty_cell); + let row = vec![ + csv_cell(m), + csv_cell(cat), + c.turns.to_string(), + c.edit_turns.to_string(), + c.one_shot_turns.to_string(), + c.priced_turns.to_string(), + num_csv(c.total_cost, 6), + c.cost_per_turn + .map(|v| num_csv(v, 6)) + .unwrap_or_default(), + c.one_shot_rate + .map(|v| num_csv(v, 4)) + .unwrap_or_default(), + c.cache_hit_rate + .map(|v| num_csv(v, 4)) + .unwrap_or_default(), + c.median_retries + .map(|v| { + // `String(n)` for numbers; JS prints integers as-is. + if v.fract() == 0.0 { + (v as i64).to_string() + } else { + v.to_string() + } + }) + .unwrap_or_default(), + if c.no_data { "true" } else { "false" }.to_string(), + if c.insufficient_sample { + "true" + } else { + "false" + } + .to_string(), + ]; + rows.push(row.join(",")); + } + } + format!("{}\n", rows.join("\n")) +} + +fn csv_cell(s: &str) -> String { + if s.contains(',') || s.contains('"') || s.contains('\n') { + format!("\"{}\"", s.replace('"', "\"\"")) + } else { + s.to_string() + } +} + +fn num_csv(n: f64, digits: usize) -> String { + to_fixed(n, digits) +} + +// --------------------------------------------------------------------------- +// TTY +// --------------------------------------------------------------------------- + +fn cell_fields(c: &CompareCell) -> [String; 3] { + if c.no_data { + return [DASH.to_string(), DASH.to_string(), DASH.to_string()]; + } + let turns = format_int(c.turns); + let cost = c + .cost_per_turn + .map(format_usd) + .unwrap_or_else(|| DASH.to_string()); + let one_shot = c + .one_shot_rate + .map(format_pct) + .unwrap_or_else(|| DASH.to_string()); + [turns, cost, one_shot] +} + +fn render_tty( + table: &CompareTable, + analyzed_turns: usize, + minimum: FidelityClass, + summary: &FidelitySummary, +) -> String { + let mut lines: Vec = Vec::new(); + lines.push(String::new()); + lines.push(format!("turns analyzed: {}", format_int(analyzed_turns as u64))); + + let excluded = compute_excluded(summary, minimum); + if excluded.total > 0 { + lines.push(format_excluded_note(&excluded, minimum)); + } + lines.push(String::new()); + + if table.models.is_empty() || table.categories.is_empty() { + lines.push( + "no data to compare (need turns spanning ≥1 model and ≥1 activity).".to_string(), + ); + lines.push(String::new()); + return lines.join("\n"); + } + + let sub_header = build_sub_header(&table.models); + + let owned_empty = empty_cell(); + let cell_for = |m: &str, cat: &str| -> CompareCell { + table + .cells + .get(m) + .and_then(|by| by.get(cat)) + .cloned() + .unwrap_or_else(empty_cell) + }; + // Suppress the unused-variable warning on `owned_empty`; it's only + // referenced when we run a corner case where neither cells.get nor + // by_cat.get is hit, which the table builder doesn't produce today. + let _ = &owned_empty; + + let mut data_rows: Vec> = Vec::new(); + for cat in &table.categories { + let mut row: Vec = vec![cat.clone()]; + for m in &table.models { + let cell = cell_for(m, cat); + let [a, b, c] = cell_fields(&cell); + row.push(a); + row.push(b); + row.push(c); + } + data_rows.push(row); + } + + let mut widths = vec![0usize; sub_header.len()]; + for row in std::iter::once(&sub_header).chain(data_rows.iter()) { + for (i, cell) in row.iter().enumerate() { + widths[i] = widths[i].max(display_width(cell)); + } + } + + const SEP: &str = " "; + + // Widen the last column of each model's group to fit the (possibly + // longer) display name. Mirrors the TS path's group-line padding. + for mi in 0..table.models.len() { + let start = 1 + mi * 3; + let group_width = + widths[start] + SEP.len() + widths[start + 1] + SEP.len() + widths[start + 2]; + let name = display_model_name(&table.models[mi]); + let name_w = display_width(name); + if name_w > group_width { + widths[start + 2] += name_w - group_width; + } + } + + // Group-name line. + let mut group_line: Vec = vec![pad_end("", widths[0])]; + for mi in 0..table.models.len() { + let start = 1 + mi * 3; + let group_width = + widths[start] + SEP.len() + widths[start + 1] + SEP.len() + widths[start + 2]; + let name = display_model_name(&table.models[mi]); + group_line.push(pad_end(name, group_width)); + } + lines.push(rstrip(&group_line.join(SEP))); + + // Sub-header. + lines.push(render_row(&sub_header, &widths, SEP)); + + // Data rows. + for row in &data_rows { + lines.push(render_row(row, &widths, SEP)); + } + + // Coverage notes. + let mut notes: Vec = Vec::new(); + for cat in &table.categories { + let any_has_data = table + .models + .iter() + .any(|m| !cell_for(m, cat).no_data); + if !any_has_data { + continue; + } + for m in &table.models { + let cell = cell_for(m, cat); + if cell.no_data { + notes.push(format!( + "no {} data in '{cat}' — no comparison available.", + display_model_name(m) + )); + } else if cell.insufficient_sample { + notes.push(format!( + "low {} sample in '{cat}' ({} turns < {}) — treat as indicative.", + display_model_name(m), + cell.turns, + table.min_sample + )); + } + } + } + if !notes.is_empty() { + lines.push(String::new()); + let shown = notes.iter().take(NOTE_LIMIT); + for n in shown { + lines.push(format!(" {n}")); + } + if notes.len() > NOTE_LIMIT { + lines.push(format!( + " … and {} more coverage gaps.", + notes.len() - NOTE_LIMIT + )); + } + } + + // Per-model totals. + lines.push(String::new()); + for m in &table.models { + let tot = table.totals.get(m).cloned().unwrap_or_default(); + let total_cost = if tot.turns > 0 { + format_usd(tot.total_cost) + } else { + DASH.to_string() + }; + lines.push(format!( + "{}: {} turns, {} total", + display_model_name(m), + format_int(tot.turns), + total_cost + )); + } + lines.push(String::new()); + lines.join("\n") +} + +fn build_sub_header(models: &[String]) -> Vec { + let mut row: Vec = vec!["Activity".to_string()]; + for _ in models { + row.push("Turns".to_string()); + row.push("Cost/turn".to_string()); + row.push("1-shot".to_string()); + } + row +} + +fn render_row(row: &[String], widths: &[usize], sep: &str) -> String { + let mut parts: Vec = Vec::with_capacity(row.len()); + for (i, cell) in row.iter().enumerate() { + parts.push(pad_end(cell, widths[i])); + } + rstrip(&parts.join(sep)) +} + +fn pad_end(s: &str, width: usize) -> String { + let w = display_width(s); + if w >= width { + return s.to_string(); + } + let pad = " ".repeat(width - w); + format!("{s}{pad}") +} + +fn rstrip(s: &str) -> String { + s.trim_end_matches(' ').to_string() +} + +/// `String.length` in JS counts UTF-16 code units, but for the corpus +/// this CLI ships against (ASCII model names, ASCII activity labels), +/// `chars().count()` is byte-equivalent. We use it instead of byte length +/// to keep the dash sentinel (`—`, U+2014, 3 bytes UTF-8 / 1 UTF-16 +/// unit) aligning the way the TS path expects. +fn display_width(s: &str) -> usize { + s.chars().count() +} + +fn display_model_name(m: &str) -> &str { + match m.find('/') { + Some(i) => &m[i + 1..], + None => m, + } +} + +fn format_excluded_note(excluded: &ExcludedBreakdown, minimum: FidelityClass) -> String { + let mut parts: Vec = Vec::new(); + if excluded.aggregate_only > 0 { + parts.push(format!("{} aggregate-only", excluded.aggregate_only)); + } + if excluded.cost_only > 0 { + parts.push(format!("{} cost-only", excluded.cost_only)); + } + if excluded.partial > 0 { + parts.push(format!("{} partial", excluded.partial)); + } + if excluded.usage_only > 0 { + parts.push(format!("{} usage-only", excluded.usage_only)); + } + let breakdown = if parts.is_empty() { + String::new() + } else { + format!(" ({})", parts.join(", ")) + }; + let noun = if excluded.total == 1 { "turn" } else { "turns" }; + format!( + "excluded {} {noun} below {} fidelity{breakdown}", + format_int(excluded.total), + fidelity_class_str(minimum) + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn format_usd_buckets() { + assert_eq!(format_usd(0.0), "$0.00"); + assert_eq!(format_usd(0.0017), "$0.0017"); + assert_eq!(format_usd(0.011), "$0.011"); + assert_eq!(format_usd(0.034), "$0.034"); + assert_eq!(format_usd(1.5), "$1.50"); + } + + #[test] + fn format_int_groups_thousands() { + assert_eq!(format_int(7), "7"); + assert_eq!(format_int(1_500), "1,500"); + assert_eq!(format_int(1_000_000), "1,000,000"); + } + + #[test] + fn format_pct_rounds_to_int() { + assert_eq!(format_pct(0.0), "0%"); + assert_eq!(format_pct(0.5), "50%"); + assert_eq!(format_pct(1.0), "100%"); + assert_eq!(format_pct(2.0 / 3.0), "67%"); + } + + #[test] + fn round_json_matches_js_to_fixed() { + // Whole numbers come out as integers (no `.0` suffix). + let v = round_json(1.0, 4); + assert_eq!(v.to_string(), "1"); + // Non-whole shorter than digit cap drops trailing zeros. + let v = round_json(0.5, 4); + assert_eq!(v.to_string(), "0.5"); + // Rounds to 6 digits. + let v = round_json(0.0112499999, 6); + assert_eq!(v.to_string(), "0.01125"); + } + + #[test] + fn parse_fidelity_known_classes() { + assert!(matches!(parse_fidelity("full").unwrap(), FidelityClass::Full)); + assert!(matches!( + parse_fidelity("usage-only").unwrap(), + FidelityClass::UsageOnly + )); + assert!(parse_fidelity("nope").is_err()); + } + + #[test] + fn display_model_name_strips_provider_prefix() { + assert_eq!(display_model_name("anthropic/claude-sonnet-4-6"), "claude-sonnet-4-6"); + assert_eq!(display_model_name("claude-haiku-4-5"), "claude-haiku-4-5"); + } } diff --git a/crates/relayburn-cli/src/main.rs b/crates/relayburn-cli/src/main.rs index ca20ed37..cdb8712f 100644 --- a/crates/relayburn-cli/src/main.rs +++ b/crates/relayburn-cli/src/main.rs @@ -34,7 +34,7 @@ fn dispatch(args: Args) -> i32 { Command::Summary => commands::summary::run(&globals), Command::Hotspots => commands::hotspots::run(&globals), Command::Overhead => commands::overhead::run(&globals), - Command::Compare => commands::compare::run(&globals), + Command::Compare(args) => commands::compare::run(&globals, args), Command::Run => commands::run::run(&globals), Command::State => commands::state::run(&globals), Command::Ingest => commands::ingest::run(&globals), diff --git a/crates/relayburn-cli/tests/golden.rs b/crates/relayburn-cli/tests/golden.rs index 15bd1152..4573821a 100644 --- a/crates/relayburn-cli/tests/golden.rs +++ b/crates/relayburn-cli/tests/golden.rs @@ -36,7 +36,12 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; +use relayburn_sdk::{ + Ledger, LedgerOpenOptions, SessionRelationshipRecord, Stamp, ToolResultEventRecord, + TurnRecord, UserTurnRecord, +}; use serde::Deserialize; +use serde_json::Value; #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] @@ -93,9 +98,26 @@ fn golden_diff_against_ts_cli_snapshots() { .unwrap_or_else(|err| panic!("invocations.json is malformed: {err}")); let snapshots_dir = fixture_dir.join("snapshots"); - let ledger_home = fixture_dir.join("ledger"); + let ts_ledger_home = fixture_dir.join("ledger"); let project_dir = fixture_dir.join("project"); + // The on-disk fixture is a TS-style JSONL ledger. The Rust SDK reads + // SQLite, so we materialize a sibling SQLite home from the JSONL once + // and point the binary at it for every invocation. Done up front so + // the import cost is paid once per test run, not per invocation. + let rust_ledger_home = tempdir_under(&fixture_dir); + if let Err(err) = import_jsonl_into_sqlite( + &ts_ledger_home.join("ledger.jsonl"), + &rust_ledger_home, + ) { + let _ = fs::remove_dir_all(&rust_ledger_home); + panic!( + "[golden] failed to import {} into a Rust-SDK SQLite ledger: {err:?}", + ts_ledger_home.display(), + ); + } + let ledger_home = rust_ledger_home.clone(); + // Sealed HOME so the Rust binary's eventual ingest sweep doesn't // discover the developer's real session stores. let sealed_home = tempdir_under(&fixture_dir); @@ -190,6 +212,7 @@ fn golden_diff_against_ts_cli_snapshots() { } let _ = fs::remove_dir_all(&sealed_home); + let _ = fs::remove_dir_all(&rust_ledger_home); if !failures.is_empty() { panic!( @@ -339,6 +362,128 @@ fn indent(text: &str, prefix: &str) -> String { .join("\n") } +/// Read a TS-style JSONL ledger (the on-disk shape `@relayburn/ledger` +/// emits today) and materialize an equivalent SQLite-backed home that +/// `relayburn_sdk::Ledger::open` can read. The cli-golden corpus ships +/// only the JSONL form; this bridge keeps the corpus source-of-truth +/// while letting the Rust binary read the same data. +/// +/// The supported line kinds mirror what `tests/fixtures/cli-golden/scripts/ +/// build-ledger.mjs` writes: `turn`, `user_turn`, `tool_result_event`, +/// `relationship`, `stamp`. Anything else is passed through silently +/// (the legacy `text` / `tool_result` line kinds are the inner block +/// shapes nested under `user_turn.blocks` and never appear at the top +/// level — they're caught by the catch-all path so a future fixture +/// extension doesn't drop data on the floor without warning). +fn import_jsonl_into_sqlite(jsonl_path: &Path, sqlite_home: &Path) -> anyhow::Result<()> { + use anyhow::{anyhow, Context}; + + fs::create_dir_all(sqlite_home) + .with_context(|| format!("create_dir_all({})", sqlite_home.display()))?; + let raw = fs::read_to_string(jsonl_path) + .with_context(|| format!("read_to_string({})", jsonl_path.display()))?; + + let mut turns: Vec = Vec::new(); + let mut user_turns: Vec = Vec::new(); + let mut tool_results: Vec = Vec::new(); + let mut relationships: Vec = Vec::new(); + let mut stamps: Vec = Vec::new(); + + for (i, line) in raw.lines().enumerate() { + if line.trim().is_empty() { + continue; + } + let v: Value = serde_json::from_str(line) + .with_context(|| format!("parse JSON at line {}", i + 1))?; + let kind = v + .get("kind") + .and_then(|x| x.as_str()) + .ok_or_else(|| anyhow!("missing kind at line {}", i + 1))?; + match kind { + "turn" => { + let rec = v + .get("record") + .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?; + let parsed: TurnRecord = serde_json::from_value(rec.clone()) + .with_context(|| format!("parse turn at line {}", i + 1))?; + turns.push(parsed); + } + "user_turn" => { + let rec = v + .get("record") + .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?; + let parsed: UserTurnRecord = serde_json::from_value(rec.clone()) + .with_context(|| format!("parse user_turn at line {}", i + 1))?; + user_turns.push(parsed); + } + "tool_result_event" => { + let rec = v + .get("record") + .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?; + // Best-effort: the cli-golden fixture writes `eventSource: + // "transcript"`, which is a synthetic value the strict + // Rust enum doesn't recognize. Skip on parse failure + // rather than panicking — `compare` doesn't read this + // table anyway, so dropping these rows doesn't drift + // the snapshot output. (#310 left this loose-typed in + // the fixture; tightening that is out of scope here.) + match serde_json::from_value::(rec.clone()) { + Ok(parsed) => tool_results.push(parsed), + Err(err) => { + eprintln!( + "[golden] skipping tool_result_event at line {}: {err}", + i + 1 + ); + } + } + } + "relationship" => { + let rec = v + .get("record") + .ok_or_else(|| anyhow!("missing record at line {}", i + 1))?; + match serde_json::from_value::(rec.clone()) { + Ok(parsed) => relationships.push(parsed), + Err(err) => { + eprintln!( + "[golden] skipping relationship at line {}: {err}", + i + 1 + ); + } + } + } + "stamp" => { + // Stamp lines are flat — selector / enrichment / ts at + // the top level, no `record` envelope (matches the TS + // `stamp()` writer). + match serde_json::from_value::(v.clone()) { + Ok(parsed) => stamps.push(parsed), + Err(err) => { + eprintln!("[golden] skipping stamp at line {}: {err}", i + 1); + } + } + } + _ => { + // Unknown kind — skip with a stderr breadcrumb so + // fixture extensions surface explicitly. + eprintln!( + "[golden] skipping unknown ledger line kind `{kind}` at line {}", + i + 1 + ); + } + } + } + + let mut handle = Ledger::open(LedgerOpenOptions::with_home(sqlite_home))?; + handle.raw_mut().append_turns(&turns)?; + handle.raw_mut().append_user_turns(&user_turns)?; + handle.raw_mut().append_tool_result_events(&tool_results)?; + handle.raw_mut().append_relationships(&relationships)?; + for s in &stamps { + handle.raw_mut().append_stamp(s)?; + } + Ok(()) +} + fn tempdir_under(parent: &Path) -> PathBuf { use std::time::{SystemTime, UNIX_EPOCH}; let nanos = SystemTime::now() diff --git a/crates/relayburn-cli/tests/smoke.rs b/crates/relayburn-cli/tests/smoke.rs index 78401d58..8be4dc48 100644 --- a/crates/relayburn-cli/tests/smoke.rs +++ b/crates/relayburn-cli/tests/smoke.rs @@ -34,6 +34,21 @@ const SUBCOMMANDS: &[&str] = &[ "mcp-server", ]; +/// Subset of [`SUBCOMMANDS`] still on the scaffold "not yet implemented" +/// exit path. As each Wave 2 D1–D8 PR wires its presenter, drop the +/// command from this list — the missing entries fall under a more +/// targeted assertion (see `compare_command_rejects_missing_models` +/// below for an example). +const STUB_SUBCOMMANDS: &[&str] = &[ + "summary", + "hotspots", + "overhead", + "run", + "state", + "ingest", + "mcp-server", +]; + /// Helper: build a `Command` driving the locally-built `burn` binary. fn burn() -> Command { Command::cargo_bin("burn").expect("`burn` binary must build for the smoke test") @@ -76,7 +91,7 @@ fn each_subcommand_help_exits_zero_with_non_empty_stdout() { #[test] fn each_stub_exits_one_with_not_yet_implemented_message() { - for sub in SUBCOMMANDS { + for sub in STUB_SUBCOMMANDS { // Run the stub with no extra args. The default exit-code // contract for the scaffold is `EXIT_NOT_YET_IMPLEMENTED == 1`; // assert it explicitly so a future Wave 2 PR that wires up a @@ -90,6 +105,19 @@ fn each_stub_exits_one_with_not_yet_implemented_message() { } } +#[test] +fn compare_command_rejects_missing_models() { + // `burn compare` is wired (Wave 2 D3); no positional list means + // exit 2 + the canonical "needs at least 2 models" message. This + // asserts the wired path exists so a future regression that nukes + // the dispatch arm fails loud. + burn() + .arg("compare") + .assert() + .code(2) + .stderr(predicate::str::contains("needs at least 2 models")); +} + #[test] fn json_mode_emits_error_envelope_on_unimplemented() { // The `--json` global flips error reporting from a stderr line to diff --git a/crates/relayburn-sdk/src/lib.rs b/crates/relayburn-sdk/src/lib.rs index 0740dfd6..d3ddde21 100644 --- a/crates/relayburn-sdk/src/lib.rs +++ b/crates/relayburn-sdk/src/lib.rs @@ -91,16 +91,18 @@ pub use crate::ledger::{ pub use crate::analyze::{ aggregate_by_bash, aggregate_by_bash_verb, aggregate_by_file, aggregate_by_subagent, - attribute_hotspots, attribute_overhead, build_trim_recommendations, cost_for_turn, - cost_for_usage, detect_patterns, detect_tool_call_patterns, detect_tool_output_bloat, - find_overhead_files, findings_from_patterns, has_minimum_fidelity, load_overhead_file, - load_pricing, render_unified_diff_for_recommendation, summarize_fidelity, sum_costs, + attribute_hotspots, attribute_overhead, build_compare_table, build_trim_recommendations, + compare_from_archive, cost_for_turn, cost_for_usage, detect_patterns, + detect_tool_call_patterns, detect_tool_output_bloat, find_overhead_files, + findings_from_patterns, has_minimum_fidelity, load_overhead_file, load_pricing, + render_unified_diff_for_recommendation, summarize_fidelity, sum_costs, AttributeOverheadInput, AttributionMethod, BashAggregation, BashVerbAggregation, - CostBreakdown, FidelitySummary, FileAggregation, HotspotsOptions as AnalyzeHotspotsOptions, - HotspotsResult as AnalyzeHotspotsResult, ModelCost, OverheadAttribution, OverheadFile, - OverheadFileAttribution, OverheadFileKind, ParsedOverheadFile, PricingTable, ReasoningMode, - SessionTotals, SubagentAggregation, ToolAttribution, TrimRecommendation, WasteFinding, - WasteSeverity, + CompareCategory, CompareCell, CompareFromArchiveResult, CompareOptions, CompareTable, + CompareTotals, CostBreakdown, FidelitySummary, FileAggregation, + HotspotsOptions as AnalyzeHotspotsOptions, HotspotsResult as AnalyzeHotspotsResult, ModelCost, + OverheadAttribution, OverheadFile, OverheadFileAttribution, OverheadFileKind, + ParsedOverheadFile, PricingTable, ReasoningMode, SessionTotals, SubagentAggregation, + ToolAttribution, TrimRecommendation, WasteFinding, WasteSeverity, DEFAULT_MIN_SAMPLE, }; pub use crate::ingest::{ diff --git a/tests/fixtures/cli-golden/invocations.json b/tests/fixtures/cli-golden/invocations.json index 9baaa964..231030a4 100644 --- a/tests/fixtures/cli-golden/invocations.json +++ b/tests/fixtures/cli-golden/invocations.json @@ -51,13 +51,13 @@ "name": "compare", "args": ["compare", "claude-sonnet-4-6,claude-haiku-4-5", "--include-partial"], "expectStatus": 0, - "enabled": false + "enabled": true }, { "name": "compare-json", "args": ["compare", "claude-sonnet-4-6,claude-haiku-4-5", "--include-partial", "--json"], "expectStatus": 0, - "enabled": false + "enabled": true }, { "name": "state-status", From 822963c1198651858b833754ada2a54b39e57c64 Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Wed, 6 May 2026 14:12:14 -0400 Subject: [PATCH 2/2] relayburn-cli: address PR #314 review on burn compare - P1: normalize ISO `--since` inputs to UTC `...mmmZ` so offset-bearing timestamps (e.g. `2026-05-06T00:00:00-07:00`) lex-compare correctly against ledger rows. New `normalize_iso_to_utc_z` parses RFC 3339 with `Z`, `+HH:MM`, `-HH:MM`, optional fractional seconds, and date-only forms; truncates sub-millisecond precision. `ymd_to_days` added as the round-trip inverse of the existing `days_to_ymd`. - P2: relative `--since` (e.g. `7d`, `24h`) now emits `YYYY-MM-DDTHH:MM:SS.000Z` rather than `...SSZ`, so same-second ledger rows with sub-second precision (e.g. `...12.500Z`) sort consistently against the cutoff. - CodeRabbit: every `eprintln!`-then-return-exit-code branch in `run_inner` now returns `Err(anyhow!(...))`. The outer `run` already routes errors through `report_error`, so `burn --json compare ...` emits the documented `{"error": "..."}` envelope on stdout instead of plain stderr. Exit code remains 2. Messages stripped of their embedded `burn:` prefix since `report_error` adds one. Tests: 11 new unit tests covering the offset, fractional-second, date-only, lowercase-`z`, and reject-garbage cases plus a sanity check that the canonical `.000Z` format lex-orders correctly against ledger rows. `cargo test --workspace` and `BURN_GOLDEN=1 cargo test --test golden -p relayburn-cli` both green. --- crates/relayburn-cli/src/commands/compare.rs | 371 +++++++++++++++++-- 1 file changed, 334 insertions(+), 37 deletions(-) diff --git a/crates/relayburn-cli/src/commands/compare.rs b/crates/relayburn-cli/src/commands/compare.rs index bdc57344..01faf896 100644 --- a/crates/relayburn-cli/src/commands/compare.rs +++ b/crates/relayburn-cli/src/commands/compare.rs @@ -19,7 +19,7 @@ use relayburn_sdk::{ use serde_json::{json, Value}; use crate::cli::{CompareArgs, GlobalArgs}; -use crate::render::error::{report_error, EXIT_GENERIC_ERROR}; +use crate::render::error::report_error; use crate::render::json::render_json; const FIDELITY_CHOICES: &[&str] = &[ @@ -39,7 +39,7 @@ const FIDELITY_ORDER: &[&str] = &[ ]; const NEEDS_MODELS_MSG: &str = - "burn compare: needs at least 2 models. Run `burn summary --by-provider` (or `burn summary --by-tool`) to see which models have data."; + "compare: needs at least 2 models. Run `burn summary --by-provider` (or `burn summary --by-tool`) to see which models have data."; const NOTE_LIMIT: usize = 8; const DASH: &str = "—"; @@ -53,11 +53,15 @@ pub fn run(globals: &GlobalArgs, args: CompareArgs) -> i32 { fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result { // 1. Parse positional models list (comma-separated, dedup, preserve order). + // Argument-validation failures route through `report_error` (the outer + // `run` catches the `Err` from this function) so `--json` mode emits + // the documented `{"error": ...}` envelope instead of plain stderr. + // `report_error` prepends `burn: ` for human stderr, so the messages + // here read as the natural-language continuation (no leading `burn`). let raw = match args.models.as_deref() { Some(s) => s, None => { - eprintln!("{NEEDS_MODELS_MSG}"); - return Ok(EXIT_GENERIC_ERROR); + return Err(anyhow!("{NEEDS_MODELS_MSG}")); } }; let mut seen: BTreeSet = BTreeSet::new(); @@ -72,27 +76,26 @@ fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result { } } if models.len() < 2 { - eprintln!("{NEEDS_MODELS_MSG}"); - return Ok(EXIT_GENERIC_ERROR); + return Err(anyhow!("{NEEDS_MODELS_MSG}")); } // 2. Resolve --fidelity / --include-partial. let mut min_fidelity: FidelityClass = FidelityClass::UsageOnly; if let Some(raw) = args.fidelity.as_deref() { if !FIDELITY_CHOICES.contains(&raw) { - eprintln!( - "burn: invalid --fidelity: {raw} (expected one of {})", + return Err(anyhow!( + "invalid --fidelity: {raw} (expected one of {})", FIDELITY_CHOICES.join(", ") - ); - return Ok(EXIT_GENERIC_ERROR); + )); } min_fidelity = parse_fidelity(raw)?; } if args.include_partial { if let Some(raw) = args.fidelity.as_deref() { if raw != "partial" { - eprintln!("burn: --include-partial conflicts with --fidelity {raw}"); - return Ok(EXIT_GENERIC_ERROR); + return Err(anyhow!( + "--include-partial conflicts with --fidelity {raw}" + )); } } min_fidelity = FidelityClass::Partial; @@ -102,8 +105,7 @@ fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result { // per-command so the global JSON take-precedence rule in the TS CLI // becomes "explicit conflict" here — same exit code, same message. if globals.json && args.csv { - eprintln!("burn: --json and --csv are mutually exclusive; pick one."); - return Ok(EXIT_GENERIC_ERROR); + return Err(anyhow!("--json and --csv are mutually exclusive; pick one.")); } // 4. Provider filter. Surfaced as an explicit "not yet wired" error @@ -121,8 +123,7 @@ fn run_inner(globals: &GlobalArgs, args: CompareArgs) -> Result { // 5. min-sample. let min_sample = args.min_sample.unwrap_or(DEFAULT_MIN_SAMPLE); if min_sample < 1 { - eprintln!("burn: invalid --min-sample: {min_sample}"); - return Ok(EXIT_GENERIC_ERROR); + return Err(anyhow!("invalid --min-sample: {min_sample}")); } // 6. Honor --no-archive by exporting RELAYBURN_ARCHIVE=0 for the @@ -237,9 +238,22 @@ fn fidelity_class_str(cls: FidelityClass) -> &'static str { } } -/// Normalize `--since` exactly like the SDK's free-fn would (relative -/// `7d` → ISO Z, ISO pass-through, garbage → error). Inlined here rather -/// than imported because the SDK helper isn't on the public surface. +/// Normalize `--since` exactly like the TS CLI's `parseSinceArg` does: +/// +/// - Relative ranges (`7d`, `24h`, `4w`, `30m`) → `now - delta` rendered +/// as a fully canonical UTC ISO string with milliseconds +/// (`YYYY-MM-DDTHH:MM:SS.mmmZ`). +/// - ISO inputs (with or without an offset, with or without fractional +/// seconds) get parsed and re-rendered as UTC `...Z` with milliseconds. +/// This matters because `Ledger::query_turns` applies `since` via +/// lexicographic comparison against stored `...mmmZ` timestamps: +/// * an offset like `2026-05-06T00:00:00-07:00` would otherwise sort +/// before any ledger row regardless of the actual instant, and +/// * a no-fraction `...12Z` would sort before `...12.500Z` even +/// though `...12.500Z` is a later instant. +/// Re-emitting as UTC with `.000Z` (or the original sub-second +/// precision) keeps the lex order stable against the ledger. +/// - Garbage → error. fn normalize_since(since: Option<&str>) -> Result> { let Some(raw) = since else { return Ok(None); @@ -260,14 +274,14 @@ fn normalize_since(since: Option<&str>) -> Result> { .map(|d| d.as_secs()) .unwrap_or(0); let when = now.saturating_sub(secs_back); - return Ok(Some(format_iso_z(when))); + return Ok(Some(format_iso_z_ms(when, 0))); } - if !looks_like_iso(raw) { - return Err(anyhow!( - "invalid since: {raw} (expected ISO timestamp or relative range like 7d)" - )); + if let Some(canonical) = normalize_iso_to_utc_z(raw) { + return Ok(Some(canonical)); } - Ok(Some(raw.to_string())) + Err(anyhow!( + "invalid since: {raw} (expected ISO timestamp or relative range like 7d)" + )) } fn parse_relative(s: &str) -> Option<(u64, char)> { @@ -287,24 +301,182 @@ fn parse_relative(s: &str) -> Option<(u64, char)> { Some((n, unit)) } -fn looks_like_iso(s: &str) -> bool { - let b = s.as_bytes(); - b.len() >= 10 - && b[0..4].iter().all(|c| c.is_ascii_digit()) - && b[4] == b'-' - && b[5..7].iter().all(|c| c.is_ascii_digit()) - && b[7] == b'-' - && b[8..10].iter().all(|c| c.is_ascii_digit()) +/// Parse an ISO 8601 / RFC 3339 timestamp and re-emit it as a fully +/// canonical UTC `YYYY-MM-DDTHH:MM:SS.mmmZ` string. Handles: +/// +/// - `YYYY-MM-DD` (date-only — assumed midnight UTC). +/// - `YYYY-MM-DDTHH:MM:SS` (offset-less — assumed UTC). +/// - `YYYY-MM-DDTHH:MM:SS.fff` (fractional seconds, any width 1–9). +/// - `Z` suffix. +/// - `+HH:MM` / `-HH:MM` offsets. +/// +/// Returns `None` for inputs that don't look ISO-shaped, so the caller +/// can surface a usage error. Emits with millisecond precision: any +/// sub-millisecond fractional digits are truncated, matching JS +/// `Date.toISOString()` rounding behavior closely enough for ledger +/// `since` lex-ordering. Whole-second inputs are widened to `.000Z`. +fn normalize_iso_to_utc_z(s: &str) -> Option { + let bytes = s.as_bytes(); + if bytes.len() < 10 { + return None; + } + // YYYY-MM-DD prefix. + if !(bytes[0..4].iter().all(|c| c.is_ascii_digit()) + && bytes[4] == b'-' + && bytes[5..7].iter().all(|c| c.is_ascii_digit()) + && bytes[7] == b'-' + && bytes[8..10].iter().all(|c| c.is_ascii_digit())) + { + return None; + } + let year: i64 = std::str::from_utf8(&bytes[0..4]).ok()?.parse().ok()?; + let month: u32 = std::str::from_utf8(&bytes[5..7]).ok()?.parse().ok()?; + let day: u32 = std::str::from_utf8(&bytes[8..10]).ok()?.parse().ok()?; + if !(1..=12).contains(&month) || !(1..=31).contains(&day) { + return None; + } + + // Defaults for date-only inputs. + let mut hour: u32 = 0; + let mut minute: u32 = 0; + let mut second: u32 = 0; + let mut millis: u32 = 0; + let mut offset_minutes: i32 = 0; + + if bytes.len() > 10 { + // Expect a time component starting with 'T' or ' '. + if !(bytes[10] == b'T' || bytes[10] == b't' || bytes[10] == b' ') { + return None; + } + // HH:MM:SS at offsets 11..19. + if bytes.len() < 19 { + return None; + } + if !(bytes[11..13].iter().all(|c| c.is_ascii_digit()) + && bytes[13] == b':' + && bytes[14..16].iter().all(|c| c.is_ascii_digit()) + && bytes[16] == b':' + && bytes[17..19].iter().all(|c| c.is_ascii_digit())) + { + return None; + } + hour = std::str::from_utf8(&bytes[11..13]).ok()?.parse().ok()?; + minute = std::str::from_utf8(&bytes[14..16]).ok()?.parse().ok()?; + second = std::str::from_utf8(&bytes[17..19]).ok()?.parse().ok()?; + if hour > 23 || minute > 59 || second > 60 { + return None; + } + + // Optional fractional seconds. + let mut idx = 19; + if idx < bytes.len() && (bytes[idx] == b'.' || bytes[idx] == b',') { + idx += 1; + let frac_start = idx; + while idx < bytes.len() && bytes[idx].is_ascii_digit() { + idx += 1; + } + if idx == frac_start { + return None; + } + // Truncate to milliseconds: take the first 3 digits, pad + // with zeros if shorter, ignore the rest. + let mut frac_str = String::from(std::str::from_utf8(&bytes[frac_start..idx]).ok()?); + if frac_str.len() > 3 { + frac_str.truncate(3); + } + while frac_str.len() < 3 { + frac_str.push('0'); + } + millis = frac_str.parse().ok()?; + } + + // Optional offset. + if idx < bytes.len() { + match bytes[idx] { + b'Z' | b'z' => { + if idx + 1 != bytes.len() { + return None; + } + } + b'+' | b'-' => { + let sign: i32 = if bytes[idx] == b'-' { -1 } else { 1 }; + idx += 1; + if bytes.len() < idx + 5 { + return None; + } + if !(bytes[idx..idx + 2].iter().all(|c| c.is_ascii_digit()) + && bytes[idx + 2] == b':' + && bytes[idx + 3..idx + 5].iter().all(|c| c.is_ascii_digit())) + { + return None; + } + let oh: i32 = std::str::from_utf8(&bytes[idx..idx + 2]) + .ok()? + .parse() + .ok()?; + let om: i32 = std::str::from_utf8(&bytes[idx + 3..idx + 5]) + .ok()? + .parse() + .ok()?; + if oh > 23 || om > 59 { + return None; + } + offset_minutes = sign * (oh * 60 + om); + if idx + 5 != bytes.len() { + return None; + } + } + _ => return None, + } + } + } + + // Convert (year, month, day, h, m, s, offset) → unix seconds. + let days = ymd_to_days(year, month, day)?; + let local_secs: i64 = days * 86_400 + (hour as i64) * 3_600 + (minute as i64) * 60 + (second as i64); + // Subtract the offset to land on UTC seconds: `local = utc + offset`, + // so `utc = local - offset`. Offset is in minutes. + let utc_secs: i64 = local_secs - (offset_minutes as i64) * 60; + Some(format_iso_z_ms_signed(utc_secs, millis)) +} + +/// Format Unix-seconds as `YYYY-MM-DDTHH:MM:SS.mmmZ`. Always emits the +/// milliseconds component so the resulting string sorts correctly against +/// ledger rows that always carry sub-second precision. +fn format_iso_z_ms(secs: u64, millis: u32) -> String { + format_iso_z_ms_signed(secs as i64, millis) } -fn format_iso_z(secs: u64) -> String { - let total_days = (secs / 86_400) as i64; - let secs_in_day = (secs % 86_400) as u32; +fn format_iso_z_ms_signed(secs: i64, millis: u32) -> String { + // `secs` may be negative for pre-1970 timestamps — split into a + // floored day count and a non-negative seconds-in-day remainder so + // the formatting math doesn't have to care about sign. + let total_days = secs.div_euclid(86_400); + let secs_in_day = secs.rem_euclid(86_400) as u32; let hour = secs_in_day / 3_600; let minute = (secs_in_day / 60) % 60; let second = secs_in_day % 60; let (year, month, day) = days_to_ymd(total_days); - format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z") + format!( + "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z" + ) +} + +/// Civil-date → days-from-Unix-epoch (Howard Hinnant's algorithm, +/// proleptic Gregorian). Inverse of [`days_to_ymd`]. +fn ymd_to_days(year: i64, month: u32, day: u32) -> Option { + if !(1..=12).contains(&month) || !(1..=31).contains(&day) { + return None; + } + let m = month as i64; + let d = day as i64; + let y = if m <= 2 { year - 1 } else { year }; + let era = if y >= 0 { y } else { y - 399 } / 400; + let yoe = (y - era * 400) as u64; // [0, 399] + let mp = if m > 2 { m - 3 } else { m + 9 } as u64; // [0, 11] + let doy = (153 * mp + 2) / 5 + (d as u64) - 1; // [0, 365] + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; // [0, 146096] + Some(era * 146_097 + (doe as i64) - 719_468) } fn days_to_ymd(days_from_epoch: i64) -> (i64, u32, u32) { @@ -1051,4 +1223,129 @@ mod tests { assert_eq!(display_model_name("anthropic/claude-sonnet-4-6"), "claude-sonnet-4-6"); assert_eq!(display_model_name("claude-haiku-4-5"), "claude-haiku-4-5"); } + + // ------------------------------------------------------------------- + // Codex P1 / P2: ISO normalization + relative-range millisecond + // padding. Both bugs surface as ledger lex-order skews: the ledger + // stores rows with `...mmmZ` precision, so a `since` that doesn't + // match that shape gets compared as the wrong instant. + // ------------------------------------------------------------------- + + #[test] + fn normalize_iso_widens_no_fraction_to_three_zeros() { + // P2 root cause: same-second ledger row `...12.500Z` would sort + // *before* a `--since` cutoff of `...12Z`, dropping valid turns. + // Normalizing widens to `.000Z` so the cutoff is the lower bound + // for that second. + assert_eq!( + normalize_iso_to_utc_z("2026-05-06T00:00:00Z"), + Some("2026-05-06T00:00:00.000Z".to_string()), + ); + } + + #[test] + fn normalize_iso_preserves_millisecond_precision() { + assert_eq!( + normalize_iso_to_utc_z("2026-05-06T00:00:00.500Z"), + Some("2026-05-06T00:00:00.500Z".to_string()), + ); + // Sub-millisecond digits are truncated to 3 (matches the ledger + // shape; mirrors `Date.toISOString()` truncation closely enough + // for `since`-cutoff lex ordering). + assert_eq!( + normalize_iso_to_utc_z("2026-05-06T00:00:00.500999Z"), + Some("2026-05-06T00:00:00.500Z".to_string()), + ); + // Shorter fraction is right-padded. + assert_eq!( + normalize_iso_to_utc_z("2026-05-06T00:00:00.5Z"), + Some("2026-05-06T00:00:00.500Z".to_string()), + ); + } + + #[test] + fn normalize_iso_converts_negative_offset_to_utc() { + // P1 root cause: `-07:00` is 7h *behind* UTC, so the same + // wall-clock time corresponds to a UTC instant 7h *later*. + // 2026-05-06T00:00:00-07:00 == 2026-05-06T07:00:00Z. + assert_eq!( + normalize_iso_to_utc_z("2026-05-06T00:00:00-07:00"), + Some("2026-05-06T07:00:00.000Z".to_string()), + ); + } + + #[test] + fn normalize_iso_converts_positive_offset_to_utc() { + // 2026-05-06T00:00:00+09:00 == 2026-05-05T15:00:00Z. + assert_eq!( + normalize_iso_to_utc_z("2026-05-06T00:00:00+09:00"), + Some("2026-05-05T15:00:00.000Z".to_string()), + ); + } + + #[test] + fn normalize_iso_handles_lowercase_z() { + assert_eq!( + normalize_iso_to_utc_z("2026-05-06t00:00:00.500z"), + Some("2026-05-06T00:00:00.500Z".to_string()), + ); + } + + #[test] + fn normalize_iso_accepts_date_only() { + // Date-only input: no time component → midnight UTC. + assert_eq!( + normalize_iso_to_utc_z("2026-05-06"), + Some("2026-05-06T00:00:00.000Z".to_string()), + ); + } + + #[test] + fn normalize_iso_rejects_garbage() { + assert_eq!(normalize_iso_to_utc_z("not a date"), None); + assert_eq!(normalize_iso_to_utc_z("2026/05/06"), None); + assert_eq!(normalize_iso_to_utc_z("2026-13-01T00:00:00Z"), None); // bad month + assert_eq!(normalize_iso_to_utc_z("2026-05-06T25:00:00Z"), None); // bad hour + assert_eq!(normalize_iso_to_utc_z("2026-05-06T00:00:00+9"), None); // malformed offset + } + + #[test] + fn normalize_since_relative_emits_milliseconds() { + // P2: relative range output must carry the `.000Z` fragment so + // ledger rows with sub-second precision sort correctly against + // the cutoff. We can't pin the absolute value (depends on `now`), + // but we can assert the shape. + let out = normalize_since(Some("7d")).unwrap().unwrap(); + assert!(out.ends_with(".000Z"), "expected .000Z suffix in {out}"); + assert_eq!(out.len(), 24, "expected 24-char canonical shape: {out}"); + } + + #[test] + fn normalize_since_iso_pass_normalizes_offset() { + let out = normalize_since(Some("2026-05-06T00:00:00-07:00")) + .unwrap() + .unwrap(); + assert_eq!(out, "2026-05-06T07:00:00.000Z"); + } + + #[test] + fn normalize_since_relative_format_is_lex_compatible_with_ledger_rows() { + // Sanity check: a canonical `.000Z` cutoff must lex *before* the + // same-second ledger row carrying any non-zero millisecond + // suffix. This is the property the bug was breaking. + let cutoff = "2026-05-06T12:00:00.000Z"; + let row_a = "2026-05-06T12:00:00.500Z"; + let row_b = "2026-05-06T12:00:00.001Z"; + assert!(cutoff <= row_a); + assert!(cutoff <= row_b); + } + + #[test] + fn ymd_round_trip() { + for (y, m, d) in &[(1970, 1, 1), (2026, 5, 6), (2000, 2, 29), (1999, 12, 31)] { + let days = ymd_to_days(*y, *m, *d).unwrap(); + let (ry, rm, rd) = days_to_ymd(days); + assert_eq!((*y, *m, *d), (ry, rm, rd)); + } + } }