From 3fb2ff49f00d5a72a343d96d29cb8ec335606317 Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Sat, 25 Apr 2026 22:06:58 -0400 Subject: [PATCH] Complete fidelity-aware command contracts --- CHANGELOG.md | 4 + README.md | 4 +- packages/analyze/CHANGELOG.md | 4 + packages/analyze/src/compare.test.ts | 110 ++++++++++++++++++- packages/analyze/src/compare.ts | 109 +++++++++++++++++-- packages/analyze/src/index.ts | 10 +- packages/analyze/src/plan-usage.test.ts | 25 +++++ packages/analyze/src/plan-usage.ts | 42 +++++++- packages/cli/CHANGELOG.md | 4 + packages/cli/src/cli.ts | 2 +- packages/cli/src/commands/compare.ts | 97 +++++++++++++++-- packages/cli/src/commands/limits.test.ts | 24 +++++ packages/cli/src/commands/limits.ts | 9 +- packages/cli/src/commands/plans.ts | 5 +- packages/cli/src/commands/summary.ts | 131 ++++++++++++++++++++--- packages/cli/src/commands/waste.test.ts | 48 ++++++++- packages/cli/src/commands/waste.ts | 100 ++++++++++++++++- packages/reader/CHANGELOG.md | 4 + packages/reader/src/codex.test.ts | 63 +++++++++++ packages/reader/src/codex.ts | 99 ++++++++++++++++- packages/reader/src/opencode.test.ts | 54 ++++++++++ packages/reader/src/opencode.ts | 55 ++++++++-- 22 files changed, 947 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0499d0d..5563e32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] +### Changed + +- **Coverage/fidelity command contract completed** (#41). Codex and OpenCode turns now carry normalized fidelity metadata, and downstream commands consume it: `burn compare` defaults to full/usage-only samples and reports fidelity exclusions, `burn summary` marks partial usage/cost fields instead of rendering unknowns as zero, `burn waste` refuses unsupported attribution with missing prerequisites, and plan/limit projections flag partial-fidelity confidence. + ## [0.19.0] - 2026-04-26 ### Added diff --git a/README.md b/README.md index d1c6559..23e7391 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ You can override per-call via `costForUsage(usage, model, pricing, { reasoningMo ``` burn summary [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] burn by-tool [--since 7d] [--project ] [--session ] -burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--json|--csv] +burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--include-partial] [--fidelity full,usage-only] [--json|--csv] burn claude [--tag k=v ...] [-- ] ``` @@ -278,7 +278,7 @@ exploration 118 $0.013 — 52 $0.003 — One-shot rate = `turns with edits and zero intra-turn retries / edit turns`. It's `—` for categories that don't produce edits (`exploration`, `brainstorming`, etc.). Missing-data cells render as `—`, never `$0.00` or `0%`. -This is observed data, not counterfactual: it tells you what happened when you actually used both models, not what *would have* happened if you'd picked differently. Cells with `turns < --min-sample` (default 5) are flagged as indicative; categories where only one model has data surface a coverage note beneath the table. The JSON cell shape exposes both `noData` (we never saw this combination) and `insufficientSample` (we have data but not much) so consumers can tell them apart cleanly. +This is observed data, not counterfactual: it tells you what happened when you actually used both models, not what *would have* happened if you'd picked differently. By default `burn compare` includes `full` and `usage-only` fidelity turns and excludes `partial`, `aggregate-only`, and `cost-only` turns; use `--include-partial` or `--fidelity ` to opt in. Cells with `turns < --min-sample` (default 5) are flagged as indicative; categories where only one model has data surface a coverage note beneath the table. The JSON cell shape exposes both `noData` (we never saw this combination) and `insufficientSample` (we have data but not much) so consumers can tell them apart cleanly. Standard filters apply: `--session ` limits to a single session, `--agent ` limits to a stamped agent ID, `--workflow ` to a stamped workflow ID, `--project ` to a project path or git-canonical projectKey. diff --git a/packages/analyze/CHANGELOG.md b/packages/analyze/CHANGELOG.md index 166c4c0..5a6aa5f 100644 --- a/packages/analyze/CHANGELOG.md +++ b/packages/analyze/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- **Fidelity-aware compare and plan calculations** (#41). `buildCompareTable` now defaults to `full` + `usage-only` turns, tracks included/excluded sample counts by fidelity class, skips cost/cache metrics when required coverage is missing, and exposes priced-turn totals so unknown cost is not rendered as free. `computePlanUsage` now reports costed/skipped/partial/unknown fidelity counts and marks projections `partialData` when spend relies on incomplete coverage. + ## [0.18.0] - 2026-04-26 ### Fixed diff --git a/packages/analyze/src/compare.test.ts b/packages/analyze/src/compare.test.ts index e1f7ffc..4089ec8 100644 --- a/packages/analyze/src/compare.test.ts +++ b/packages/analyze/src/compare.test.ts @@ -2,7 +2,8 @@ import { strict as assert } from 'node:assert'; import { describe, it } from 'node:test'; import type { EnrichedTurn } from '@relayburn/ledger'; -import type { ActivityCategory } from '@relayburn/reader'; +import { EMPTY_COVERAGE, makeFidelity } from '@relayburn/reader'; +import type { ActivityCategory, Fidelity } from '@relayburn/reader'; import { buildCompareTable } from './compare.js'; import { loadBuiltinPricing } from './pricing.js'; @@ -35,6 +36,34 @@ function turn( }; } +const FULL_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + hasCacheReadTokens: true, + hasCacheCreateTokens: true, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: true, +}); + +const USAGE_ONLY_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, +}); + +const PARTIAL_FIDELITY: Fidelity = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, +}); + +const AGGREGATE_FIDELITY: Fidelity = makeFidelity('per-session-aggregate', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, +}); + describe('buildCompareTable', () => { it('buckets turns by (model, activity) and reports per-cell metrics', async () => { const pricing = await loadBuiltinPricing(); @@ -251,4 +280,83 @@ describe('buildCompareTable', () => { assert.ok(cell.cacheHitRate !== null); assert.ok(Math.abs(cell.cacheHitRate! - 3000 / 4000) < 1e-9); }); + + it('excludes partial and aggregate fidelity by default and reports the sample loss', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', { + fidelity: FULL_FIDELITY, + hasEdits: true, + retries: 0, + }), + turn('claude-sonnet-4-6', 'coding', { + fidelity: USAGE_ONLY_FIDELITY, + hasEdits: true, + retries: 0, + }), + turn('claude-sonnet-4-6', 'coding', { + fidelity: PARTIAL_FIDELITY, + hasEdits: true, + retries: 0, + }), + turn('claude-sonnet-4-6', 'coding', { + fidelity: AGGREGATE_FIDELITY, + hasEdits: true, + retries: 0, + }), + ]; + const t = buildCompareTable(turns, { pricing, minSample: 1 }); + assert.equal(t.sample.totalTurns, 4); + assert.equal(t.sample.includedTurns, 2); + assert.equal(t.sample.excludedTurns, 2); + assert.equal(t.sample.excludedByClass.partial, 1); + assert.equal(t.sample.excludedByClass['aggregate-only'], 1); + assert.equal(t.cells['claude-sonnet-4-6']!['coding']!.turns, 2); + }); + + it('includes partial turns when requested but keeps missing-cost fields null', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', { + fidelity: PARTIAL_FIDELITY, + hasEdits: true, + retries: 0, + usage: { + input: 1000, + output: 0, + reasoning: 0, + cacheRead: 0, + cacheCreate5m: 0, + cacheCreate1h: 0, + }, + }), + ]; + const t = buildCompareTable(turns, { + pricing, + includePartial: true, + minSample: 1, + }); + const cell = t.cells['claude-sonnet-4-6']!['coding']!; + assert.equal(t.sample.includedTurns, 1); + assert.equal(cell.turns, 1); + assert.equal(cell.pricedTurns, 0, 'missing output coverage prevents fake $0 cost'); + assert.equal(cell.costPerTurn, null); + }); + + it('honors an explicit fidelity allow-list', async () => { + const pricing = await loadBuiltinPricing(); + const turns: EnrichedTurn[] = [ + turn('claude-sonnet-4-6', 'coding', { fidelity: FULL_FIDELITY }), + turn('claude-sonnet-4-6', 'coding', { fidelity: USAGE_ONLY_FIDELITY }), + ]; + const t = buildCompareTable(turns, { + pricing, + fidelity: ['full'], + minSample: 1, + }); + assert.deepEqual(t.sample.allowedFidelity, ['full']); + assert.equal(t.sample.includedTurns, 1); + assert.equal(t.sample.excludedByClass['usage-only'], 1); + assert.equal(t.cells['claude-sonnet-4-6']!['coding']!.turns, 1); + }); }); diff --git a/packages/analyze/src/compare.ts b/packages/analyze/src/compare.ts index 0539ec0..949778e 100644 --- a/packages/analyze/src/compare.ts +++ b/packages/analyze/src/compare.ts @@ -1,5 +1,5 @@ import type { EnrichedTurn } from '@relayburn/ledger'; -import type { ActivityCategory } from '@relayburn/reader'; +import type { ActivityCategory, Coverage, FidelityClass } from '@relayburn/reader'; import { costForTurn } from './cost.js'; import type { PricingTable } from './pricing.js'; @@ -36,17 +36,34 @@ export interface CompareTable { models: string[]; categories: string[]; cells: Record>; - totals: Record; + totals: Record; minSample: number; + sample: CompareSample; +} + +export interface CompareSample { + totalTurns: number; + includedTurns: number; + excludedTurns: number; + allowedFidelity: FidelityClass[]; + includeUnknownFidelity: boolean; + unknownFidelityTurns: number; + excludedByClass: Record; } export interface CompareOptions { pricing: PricingTable; models?: string[]; minSample?: number; + fidelity?: FidelityClass[]; + includePartial?: boolean; } export const DEFAULT_MIN_SAMPLE = 5; +export const DEFAULT_COMPARE_FIDELITY: ReadonlyArray = [ + 'full', + 'usage-only', +]; interface Accum { turns: number; @@ -62,9 +79,12 @@ interface Accum { export function buildCompareTable(turns: EnrichedTurn[], opts: CompareOptions): CompareTable { const minSample = opts.minSample ?? DEFAULT_MIN_SAMPLE; const modelFilter = opts.models && opts.models.length > 0 ? new Set(opts.models) : null; + const allowedFidelity = normalizeAllowedFidelity(opts); + const allowedSet = new Set(allowedFidelity); + const sample = emptySample(allowedFidelity); const byModelCategory = new Map>(); - const modelTotals = new Map(); + const modelTotals = new Map(); const modelSet = new Set(); const categorySet = new Set(); @@ -75,13 +95,16 @@ export function buildCompareTable(turns: EnrichedTurn[], opts: CompareOptions): if (modelFilter) { for (const m of modelFilter) { modelSet.add(m); - modelTotals.set(m, { turns: 0, totalCost: 0 }); + modelTotals.set(m, { turns: 0, pricedTurns: 0, totalCost: 0 }); } } for (const t of turns) { const model = t.model || 'unknown'; if (modelFilter && !modelFilter.has(model)) continue; + sample.totalTurns++; + if (!isTurnIncludedByFidelity(t, allowedSet, sample)) continue; + sample.includedTurns++; const cat = (t.activity as string | undefined) ?? 'unclassified'; modelSet.add(model); categorySet.add(cat); @@ -97,12 +120,13 @@ export function buildCompareTable(turns: EnrichedTurn[], opts: CompareOptions): byCat.set(cat, acc); } acc.turns++; - const mt = modelTotals.get(model) ?? { turns: 0, totalCost: 0 }; + const mt = modelTotals.get(model) ?? { turns: 0, pricedTurns: 0, totalCost: 0 }; mt.turns++; - const c = costForTurn(t, opts.pricing); + const c = hasCostCoverage(t) ? costForTurn(t, opts.pricing) : null; if (c) { acc.pricedTurns++; acc.totalCost += c.total; + mt.pricedTurns++; mt.totalCost += c.total; } modelTotals.set(model, mt); @@ -112,10 +136,13 @@ export function buildCompareTable(turns: EnrichedTurn[], opts: CompareOptions): acc.retriesSamples.push(r); if (r === 0) acc.oneShotTurns++; } - acc.cacheRead += t.usage.cacheRead; - acc.tokenDenominator += - t.usage.input + t.usage.cacheRead + t.usage.cacheCreate5m + t.usage.cacheCreate1h; + if (hasCacheHitCoverage(t)) { + acc.cacheRead += t.usage.cacheRead; + acc.tokenDenominator += + t.usage.input + t.usage.cacheRead + t.usage.cacheCreate5m + t.usage.cacheCreate1h; + } } + sample.excludedTurns = sample.totalTurns - sample.includedTurns; const models = [...modelSet].sort((a, b) => { const ca = modelTotals.get(a)?.totalCost ?? 0; @@ -145,7 +172,69 @@ export function buildCompareTable(turns: EnrichedTurn[], opts: CompareOptions): const totals: CompareTable['totals'] = {}; for (const [m, v] of modelTotals) totals[m] = v; - return { models, categories, cells, totals, minSample }; + return { models, categories, cells, totals, minSample, sample }; +} + +function normalizeAllowedFidelity(opts: CompareOptions): FidelityClass[] { + const seen = new Set(); + const out: FidelityClass[] = []; + const requested = + opts.fidelity && opts.fidelity.length > 0 + ? opts.fidelity + : DEFAULT_COMPARE_FIDELITY; + for (const cls of requested) { + if (!seen.has(cls)) { + seen.add(cls); + out.push(cls); + } + } + if (opts.includePartial && !seen.has('partial')) out.push('partial'); + return out; +} + +function emptySample(allowedFidelity: FidelityClass[]): CompareSample { + return { + totalTurns: 0, + includedTurns: 0, + excludedTurns: 0, + allowedFidelity, + includeUnknownFidelity: true, + unknownFidelityTurns: 0, + excludedByClass: { + full: 0, + 'usage-only': 0, + 'aggregate-only': 0, + 'cost-only': 0, + partial: 0, + }, + }; +} + +function isTurnIncludedByFidelity( + turn: EnrichedTurn, + allowed: ReadonlySet, + sample: CompareSample, +): boolean { + const fidelity = turn.fidelity; + if (!fidelity) { + sample.unknownFidelityTurns++; + return true; + } + if (allowed.has(fidelity.class)) return true; + sample.excludedByClass[fidelity.class]++; + return false; +} + +function hasCostCoverage(turn: EnrichedTurn): boolean { + const c = turn.fidelity?.coverage; + if (!c) return true; + return c.hasInputTokens && c.hasOutputTokens; +} + +function hasCacheHitCoverage(turn: EnrichedTurn): boolean { + const c = turn.fidelity?.coverage; + if (!c) return true; + return c.hasInputTokens && c.hasCacheReadTokens && c.hasCacheCreateTokens; } function toCell(acc: Accum | undefined, minSample: number): CompareCell { diff --git a/packages/analyze/src/index.ts b/packages/analyze/src/index.ts index 61b2cd5..dc8b713 100644 --- a/packages/analyze/src/index.ts +++ b/packages/analyze/src/index.ts @@ -2,8 +2,14 @@ export { flatten, loadBuiltinPricing, loadPricing } from './pricing.js'; export type { ModelCost, PricingTable, ReasoningMode } from './pricing.js'; export { costForTurn, costForUsage, sumCosts } from './cost.js'; export type { CostBreakdown, CostForUsageOptions } from './cost.js'; -export { buildCompareTable, DEFAULT_MIN_SAMPLE } from './compare.js'; -export type { CompareCategory, CompareCell, CompareOptions, CompareTable } from './compare.js'; +export { buildCompareTable, DEFAULT_COMPARE_FIDELITY, DEFAULT_MIN_SAMPLE } from './compare.js'; +export type { + CompareCategory, + CompareCell, + CompareOptions, + CompareSample, + CompareTable, +} from './compare.js'; export { attributeWaste, aggregateByFile, diff --git a/packages/analyze/src/plan-usage.test.ts b/packages/analyze/src/plan-usage.test.ts index e3ef4cc..9953857 100644 --- a/packages/analyze/src/plan-usage.test.ts +++ b/packages/analyze/src/plan-usage.test.ts @@ -2,6 +2,7 @@ import { strict as assert } from 'node:assert'; import { describe, it } from 'node:test'; import type { Plan } from '@relayburn/ledger'; +import { EMPTY_COVERAGE, makeFidelity } from '@relayburn/reader'; import type { TurnRecord } from '@relayburn/reader'; import { computePlanUsage, cycleBounds } from './plan-usage.js'; @@ -204,4 +205,28 @@ describe('computePlanUsage', () => { const u = computePlanUsage(plan, turns, { pricing: PRICING, now }); assert.equal(u.spentUsd, 3); }); + + it('marks projections as partial when fidelity is incomplete', () => { + const turns: TurnRecord[] = [ + turn({ + ts: '2026-04-05T00:00:00.000Z', + inputTokens: 1_000_000, + outputTokens: 0, + }), + { + ...turn({ ts: '2026-04-06T00:00:00.000Z', inputTokens: 1_000_000 }), + fidelity: makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + }), + }, + ]; + const u = computePlanUsage(plan, turns, { pricing: PRICING, now }); + assert.equal(u.partialData, true); + assert.equal(u.fidelity.matchedTurns, 2); + assert.equal(u.fidelity.costedTurns, 1); + assert.equal(u.fidelity.skippedTurns, 1); + assert.equal(u.fidelity.partialTurns, 1); + assert.equal(u.fidelity.unknownTurns, 1); + }); }); diff --git a/packages/analyze/src/plan-usage.ts b/packages/analyze/src/plan-usage.ts index 93904bb..128c77e 100644 --- a/packages/analyze/src/plan-usage.ts +++ b/packages/analyze/src/plan-usage.ts @@ -27,6 +27,17 @@ export interface PlanUsage { // Renderers should mark these projections as "limited data" per #39's // acceptance criteria. limitedData: boolean; + // True when spend/projection used turns whose fidelity is not fully known + // or skipped turns whose usage/pricing was insufficient. Renderers should + // mark projections as lower confidence instead of presenting them as exact. + partialData: boolean; + fidelity: { + matchedTurns: number; + costedTurns: number; + skippedTurns: number; + partialTurns: number; + unknownTurns: number; + }; } const MS_PER_DAY = 24 * 60 * 60 * 1000; @@ -49,13 +60,26 @@ export function computePlanUsage( const nowMs = now.getTime(); let spent = 0; + let matchedTurns = 0; + let costedTurns = 0; + let skippedTurns = 0; + let partialTurns = 0; + let unknownTurns = 0; for (const t of turns) { if (!matchesProvider(plan.provider, t)) continue; const ts = Date.parse(t.ts); if (!Number.isFinite(ts)) continue; if (ts < cycleStartMs || ts >= cycleEndMs) continue; - const cost = costForTurn(t, opts.pricing); - if (cost) spent += cost.total; + matchedTurns++; + if (!t.fidelity) unknownTurns++; + else if (t.fidelity.class !== 'full') partialTurns++; + const cost = hasCostCoverage(t) ? costForTurn(t, opts.pricing) : null; + if (cost) { + spent += cost.total; + costedTurns++; + } else { + skippedTurns++; + } } const elapsedMs = Math.max(0, nowMs - cycleStartMs); @@ -92,6 +116,14 @@ export function computePlanUsage( runwayDays, resetAt: cycleEnd.toISOString(), limitedData: daysElapsed < LIMITED_DATA_DAYS, + partialData: partialTurns > 0 || unknownTurns > 0 || skippedTurns > 0, + fidelity: { + matchedTurns, + costedTurns, + skippedTurns, + partialTurns, + unknownTurns, + }, }; } @@ -127,6 +159,12 @@ export function cycleBounds(resetDay: number, now: Date): { cycleStart: Date; cy return { cycleStart, cycleEnd }; } +function hasCostCoverage(turn: TurnRecord): boolean { + const c = turn.fidelity?.coverage; + if (!c) return true; + return c.hasInputTokens && c.hasOutputTokens; +} + function makeCycleAnchor(year: number, month: number, day: number): Date { // Date.UTC handles month over/underflow (month -1 → previous December, // month 12 → next January). For day, we manually clamp to the actual diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 4e6969f..45f266e 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- **Fidelity-aware command output** (#41). `burn compare` adds `--include-partial` / `--fidelity`, reports included vs excluded sample size, and avoids `$0` for unpriced or missing-coverage costs. `burn summary` adds per-model usage coverage and priced-turn counts to JSON and marks partial TTY fields. `burn waste` now refuses source-level unsupported attribution with explicit missing prerequisites. `burn plans` / `burn limits` mark plan projections as partial-fidelity when underlying spend coverage is incomplete. + ## [0.21.0] - 2026-04-26 ### Added diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 1c71187..51bd905 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -30,7 +30,7 @@ Usage: burn limits [--watch [5s]] [--json] [--no-api] [--no-forecast] burn plans [add|remove|set-reset-day] … (run \`burn plans help\` for full usage) burn context [advise] [--project ] [--since 7d] [--kind ] [--top ] [--json] - burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--json|--csv] + burn compare [--models a,b] [--since 7d] [--project ] [--session ] [--workflow ] [--agent ] [--min-sample ] [--include-partial] [--fidelity full,usage-only] [--json|--csv] burn claude [--tag k=v ...] [-- ] burn codex [--tag k=v ...] [-- ] burn opencode [--tag k=v ...] [-- ] diff --git a/packages/cli/src/commands/compare.ts b/packages/cli/src/commands/compare.ts index cb55381..ac77a64 100644 --- a/packages/cli/src/commands/compare.ts +++ b/packages/cli/src/commands/compare.ts @@ -1,11 +1,13 @@ import { buildCompareTable, + DEFAULT_COMPARE_FIDELITY, DEFAULT_MIN_SAMPLE, loadPricing, type CompareCell, type CompareTable, } from '@relayburn/analyze'; import { queryAll, type Query } from '@relayburn/ledger'; +import type { FidelityClass } from '@relayburn/reader'; import { ingestAll } from '../ingest.js'; import { formatInt, formatUsd, parseSinceArg } from '../format.js'; @@ -15,7 +17,8 @@ const COMPARE_HELP = `burn compare — per-(model, activity) comparison table Usage: burn compare [--models a,b] [--since 7d] [--project ] [--session ] - [--workflow ] [--agent ] [--min-sample ] [--json|--csv] + [--workflow ] [--agent ] [--min-sample ] + [--include-partial] [--fidelity full,usage-only] [--json|--csv] Flags: --models comma-separated list of model names to include (default: all) @@ -26,6 +29,11 @@ Flags: --agent filter by stamped agentId --min-sample insufficient-sample threshold; cells below this get flagged in the coverage-notes block (default: 5) + --include-partial + include partial-fidelity turns in addition to the default + full,usage-only sample + --fidelity comma-separated fidelity classes to include. Valid: + full, usage-only, partial, aggregate-only, cost-only --json emit a stable JSON object (analyzedTurns, models, categories, totals, cells[]) --csv emit a CSV with one row per (model, category) pair @@ -72,6 +80,11 @@ export async function runCompare(args: ParsedArgs): Promise { process.stderr.write(`burn: invalid --min-sample: ${args.flags['min-sample']}\n`); return 2; } + const fidelity = parseFidelityFlag(args.flags['fidelity']); + if (fidelity instanceof Error) { + process.stderr.write(`burn: ${fidelity.message}\n`); + return 2; + } const wantJson = args.flags['json'] === true; const wantCsv = args.flags['csv'] === true; @@ -88,10 +101,12 @@ export async function runCompare(args: ParsedArgs): Promise { const opts: Parameters[1] = { pricing, minSample }; if (models) opts.models = models; + if (fidelity) opts.fidelity = fidelity; + if (args.flags['include-partial'] === true) opts.includePartial = true; const table = buildCompareTable(turns, opts); if (wantJson) { - process.stdout.write(JSON.stringify(toJson(table, turns.length), null, 2) + '\n'); + process.stdout.write(JSON.stringify(toJson(table), null, 2) + '\n'); return 0; } if (wantCsv) { @@ -99,11 +114,11 @@ export async function runCompare(args: ParsedArgs): Promise { return 0; } - process.stdout.write(renderTty(table, turns.length)); + process.stdout.write(renderTty(table)); return 0; } -function toJson(t: CompareTable, analyzedTurns: number): object { +function toJson(t: CompareTable): object { const cells: Array> = []; for (const m of t.models) { for (const cat of t.categories) { @@ -126,7 +141,10 @@ function toJson(t: CompareTable, analyzedTurns: number): object { } } return { - analyzedTurns, + analyzedTurns: t.sample.includedTurns, + matchedTurns: t.sample.totalTurns, + excludedTurns: t.sample.excludedTurns, + sample: t.sample, minSample: t.minSample, models: t.models, categories: t.categories, @@ -193,6 +211,31 @@ function round(n: number, digits: number): number { } const DASH = '—'; +const FIDELITY_CLASSES: ReadonlySet = new Set([ + 'full', + 'usage-only', + 'partial', + 'aggregate-only', + 'cost-only', +]); + +function parseFidelityFlag(flag: string | true | undefined): FidelityClass[] | undefined | Error { + if (flag === undefined) return undefined; + if (flag === true) { + return new Error('--fidelity requires a comma-separated list (for example: full,usage-only)'); + } + const out: FidelityClass[] = []; + for (const raw of flag.split(',').map((s) => s.trim()).filter(Boolean)) { + if (!FIDELITY_CLASSES.has(raw)) { + return new Error( + `invalid --fidelity value "${raw}" (valid: ${[...FIDELITY_CLASSES].join(', ')})`, + ); + } + out.push(raw as FidelityClass); + } + if (out.length === 0) return [...DEFAULT_COMPARE_FIDELITY]; + return out; +} function formatPct(p: number): string { return `${Math.round(p * 100)}%`; @@ -211,10 +254,18 @@ function cellFields(c: CompareCell): [string, string, string] { return [turns, cost, oneShot]; } -function renderTty(t: CompareTable, analyzedTurns: number): string { +function renderTty(t: CompareTable): string { const lines: string[] = []; lines.push(''); - lines.push(`turns analyzed: ${formatInt(analyzedTurns)}`); + if (t.sample.excludedTurns > 0) { + lines.push( + `turns analyzed: ${formatInt(t.sample.includedTurns)} of ${formatInt( + t.sample.totalTurns, + )} (${formatInt(t.sample.excludedTurns)} excluded by fidelity)`, + ); + } else { + lines.push(`turns analyzed: ${formatInt(t.sample.includedTurns)}`); + } lines.push(''); if (t.models.length === 0 || t.categories.length === 0) { @@ -293,17 +344,45 @@ function renderTty(t: CompareTable, analyzedTurns: number): string { lines.push(` … and ${notes.length - NOTE_LIMIT} more coverage gaps.`); } } + const fidelityNotes = renderFidelityExclusionNotes(t); + if (fidelityNotes.length > 0) { + lines.push(''); + for (const n of fidelityNotes) lines.push(` ${n}`); + } // Per-model totals lines.push(''); for (const m of t.models) { - const tot = t.totals[m] ?? { turns: 0, totalCost: 0 }; - lines.push(`${displayModelName(m)}: ${formatInt(tot.turns)} turns, ${formatUsd(tot.totalCost)} total`); + const tot = t.totals[m] ?? { turns: 0, pricedTurns: 0, totalCost: 0 }; + const cost = tot.pricedTurns > 0 ? formatUsd(tot.totalCost) : DASH; + const suffix = + tot.turns > 0 && tot.pricedTurns < tot.turns + ? ` (${formatInt(tot.pricedTurns)}/${formatInt(tot.turns)} priced)` + : ''; + lines.push(`${displayModelName(m)}: ${formatInt(tot.turns)} turns, ${cost} total${suffix}`); } lines.push(''); return lines.join('\n'); } +function renderFidelityExclusionNotes(t: CompareTable): string[] { + const notes: string[] = []; + const excluded = Object.entries(t.sample.excludedByClass) + .filter(([, count]) => count > 0) + .map(([cls, count]) => `${formatInt(count)} ${cls}`); + if (excluded.length > 0) { + notes.push( + `excluded by fidelity: ${excluded.join(', ')}. Use --include-partial or --fidelity to opt in.`, + ); + } + if (t.sample.unknownFidelityTurns > 0) { + notes.push( + `${formatInt(t.sample.unknownFidelityTurns)} older turns had unknown fidelity and were included for compatibility.`, + ); + } + return notes; +} + function renderRow(row: string[], widths: number[], sep: string): string { return row.map((cell, i) => cell.padEnd(widths[i]!)).join(sep).trimEnd(); } diff --git a/packages/cli/src/commands/limits.test.ts b/packages/cli/src/commands/limits.test.ts index 28c0e17..13c9f32 100644 --- a/packages/cli/src/commands/limits.test.ts +++ b/packages/cli/src/commands/limits.test.ts @@ -221,6 +221,14 @@ describe('burn limits', () => { runwayDays: 29, resetAt: '2026-05-01T00:00:00.000Z', limitedData: false, + partialData: false, + fidelity: { + matchedTurns: 10, + costedTurns: 10, + skippedTurns: 0, + partialTurns: 0, + unknownTurns: 0, + }, }, }, ], @@ -260,6 +268,14 @@ describe('burn limits', () => { runwayDays: null, resetAt: '2026-05-22T00:00:00.000Z', limitedData: true, + partialData: false, + fidelity: { + matchedTurns: 1, + costedTurns: 1, + skippedTurns: 0, + partialTurns: 0, + unknownTurns: 0, + }, }, }, ], @@ -318,6 +334,14 @@ describe('burn limits', () => { runwayDays: null, resetAt: '2026-05-01T00:00:00.000Z', limitedData: false, + partialData: false, + fidelity: { + matchedTurns: 3, + costedTurns: 3, + skippedTurns: 0, + partialTurns: 0, + unknownTurns: 0, + }, }, }, ], diff --git a/packages/cli/src/commands/limits.ts b/packages/cli/src/commands/limits.ts index 8f168cc..722c98b 100644 --- a/packages/cli/src/commands/limits.ts +++ b/packages/cli/src/commands/limits.ts @@ -144,6 +144,8 @@ export async function runLimits(args: ParsedArgs, deps: LimitsDeps = {}): Promis runwayDays: s.usage.runwayDays, resetAt: s.usage.resetAt, limitedData: s.usage.limitedData, + partialData: s.usage.partialData, + fidelity: s.usage.fidelity, })), }, null, @@ -250,8 +252,11 @@ function renderTty(opts: { const overOrUnder = u.overBudget ? `${formatUsd(u.projectedEndOfCycleUsd - u.plan.budgetUsd)} over` : `${(((u.plan.budgetUsd - u.projectedEndOfCycleUsd) / u.plan.budgetUsd) * 100).toFixed(0)}% under`; - const limited = u.limitedData ? ' (limited data)' : ''; - lines.push(` Projected: ${projected} end-of-cycle (${overOrUnder})${limited}`); + const notes = []; + if (u.limitedData) notes.push('limited data'); + if (u.partialData) notes.push('partial fidelity'); + const note = notes.length > 0 ? ` (${notes.join(', ')})` : ''; + lines.push(` Projected: ${projected} end-of-cycle (${overOrUnder})${note}`); if (u.runwayDays !== null) { lines.push(` Runway: ${u.runwayDays} more day${u.runwayDays === 1 ? '' : 's'} at current rate`); } diff --git a/packages/cli/src/commands/plans.ts b/packages/cli/src/commands/plans.ts index 822e04d..d328691 100644 --- a/packages/cli/src/commands/plans.ts +++ b/packages/cli/src/commands/plans.ts @@ -76,7 +76,10 @@ async function runList(args: ParsedArgs): Promise { for (const s of statuses) { const u = s.usage; const projected = formatUsd(u.projectedEndOfCycleUsd); - const projectedCell = u.limitedData ? `${projected} (limited data)` : projected; + const notes = []; + if (u.limitedData) notes.push('limited data'); + if (u.partialData) notes.push('partial fidelity'); + const projectedCell = notes.length > 0 ? `${projected} (${notes.join(', ')})` : projected; rows.push([ u.plan.id, u.plan.name, diff --git a/packages/cli/src/commands/summary.ts b/packages/cli/src/commands/summary.ts index 45ccce5..87130b6 100644 --- a/packages/cli/src/commands/summary.ts +++ b/packages/cli/src/commands/summary.ts @@ -15,7 +15,7 @@ import type { } from '@relayburn/analyze'; import { queryAll, readContent, type Query } from '@relayburn/ledger'; import type { EnrichedTurn } from '@relayburn/ledger'; -import type { ContentRecord } from '@relayburn/reader'; +import type { ContentRecord, Coverage } from '@relayburn/reader'; import { ingestAll } from '../ingest.js'; import { formatInt, formatUsd, parseSinceArg, table } from '../format.js'; @@ -58,11 +58,14 @@ export async function runSummary(args: ParsedArgs): Promise { appendedTurns: ingestReport.appendedTurns, }, turns: turns.length, + pricedTurns: rowsByModel.reduce((sum, r) => sum + r.pricedTurns, 0), totalCost, byModel: rowsByModel.map((r) => ({ model: r.model, turns: r.turns, usage: r.usage, + usageCoverage: r.usageCoverage, + pricedTurns: r.pricedTurns, cost: r.cost, })), fidelity, @@ -92,20 +95,27 @@ export async function runSummary(args: ParsedArgs): Promise { dataRows.push([ r.model, formatInt(r.turns), - formatInt(r.usage.input), - formatInt(r.usage.output), - formatInt(r.usage.reasoning), - formatInt(r.usage.cacheRead), - formatInt(r.usage.cacheCreate5m + r.usage.cacheCreate1h), - formatUsd(r.cost.total), + formatUsageCell(r, 'input'), + formatUsageCell(r, 'output'), + formatUsageCell(r, 'reasoning'), + formatUsageCell(r, 'cacheRead'), + formatUsageCell(r, 'cacheCreate'), + formatCostCell(r), ]); } lines.push(table(dataRows)); lines.push(''); - lines.push(`total cost: ${formatUsd(totalCost.total)}`); - lines.push( - ` input ${formatUsd(totalCost.input)} / output ${formatUsd(totalCost.output)} / reasoning ${formatUsd(totalCost.reasoning)} / cacheRead ${formatUsd(totalCost.cacheRead)} / cacheCreate ${formatUsd(totalCost.cacheCreate)}`, - ); + const costPartial = rowsByModel.some((r) => r.pricedTurns < r.turns); + const pricedTurns = rowsByModel.reduce((sum, r) => sum + r.pricedTurns, 0); + if (pricedTurns === 0) { + lines.push('total cost: —'); + lines.push(' cost breakdown unavailable'); + } else { + lines.push(`total cost: ${formatUsd(totalCost.total)}${costPartial ? '*' : ''}`); + lines.push( + ` input ${formatUsd(totalCost.input)} / output ${formatUsd(totalCost.output)} / reasoning ${formatUsd(totalCost.reasoning)} / cacheRead ${formatUsd(totalCost.cacheRead)} / cacheCreate ${formatUsd(totalCost.cacheCreate)}`, + ); + } lines.push(''); // Only print a fidelity line when *something* is below full — the common @@ -116,6 +126,12 @@ export async function runSummary(args: ParsedArgs): Promise { lines.push(fidelityNotice); lines.push(''); } + if (rowsByModel.some(hasPartialDisplayCoverage)) { + lines.push( + '* partial coverage: some numeric fields or prices are unknown for at least one row (use --json for counts).', + ); + lines.push(''); + } if (args.flags['quality'] === true) { const contentBySession = await loadContentForQuality(turns); @@ -192,9 +208,19 @@ interface ModelRow { model: string; turns: number; usage: EnrichedTurn['usage']; + usageCoverage: Record; + pricedTurns: number; cost: CostBreakdown; } +type UsageField = 'input' | 'output' | 'reasoning' | 'cacheRead' | 'cacheCreate'; + +interface FieldCoverage { + knownTurns: number; + missingTurns: number; + unknownTurns: number; +} + function renderSubagentTreeMode( args: ParsedArgs, turns: EnrichedTurn[], @@ -333,6 +359,8 @@ function aggregateByModel(turns: EnrichedTurn[], pricing: Parameters b.cost.total - a.cost.total); } + +function emptyUsageCoverage(): Record { + return { + input: emptyFieldCoverage(), + output: emptyFieldCoverage(), + reasoning: emptyFieldCoverage(), + cacheRead: emptyFieldCoverage(), + cacheCreate: emptyFieldCoverage(), + }; +} + +function emptyFieldCoverage(): FieldCoverage { + return { knownTurns: 0, missingTurns: 0, unknownTurns: 0 }; +} + +function updateUsageCoverage(row: ModelRow, turn: EnrichedTurn): void { + updateField(row.usageCoverage.input, turn, 'hasInputTokens'); + updateField(row.usageCoverage.output, turn, 'hasOutputTokens'); + updateField(row.usageCoverage.reasoning, turn, 'hasReasoningTokens'); + updateField(row.usageCoverage.cacheRead, turn, 'hasCacheReadTokens'); + updateField(row.usageCoverage.cacheCreate, turn, 'hasCacheCreateTokens'); +} + +function updateField( + field: FieldCoverage, + turn: EnrichedTurn, + coverageKey: keyof Coverage, +): void { + if (!turn.fidelity) { + field.unknownTurns++; + return; + } + if (turn.fidelity.coverage[coverageKey]) field.knownTurns++; + else field.missingTurns++; +} + +function formatUsageCell(row: ModelRow, field: UsageField): string { + const c = row.usageCoverage[field]; + const value = usageValue(row, field); + if (c.knownTurns === 0 && c.unknownTurns === 0 && c.missingTurns > 0) return '—'; + const suffix = c.knownTurns === row.turns && c.unknownTurns === 0 ? '' : '*'; + return `${formatInt(value)}${suffix}`; +} + +function usageValue(row: ModelRow, field: UsageField): number { + switch (field) { + case 'input': + return row.usage.input; + case 'output': + return row.usage.output; + case 'reasoning': + return row.usage.reasoning; + case 'cacheRead': + return row.usage.cacheRead; + case 'cacheCreate': + return row.usage.cacheCreate5m + row.usage.cacheCreate1h; + } +} + +function formatCostCell(row: ModelRow): string { + if (row.pricedTurns === 0) return '—'; + const suffix = row.pricedTurns === row.turns ? '' : '*'; + return `${formatUsd(row.cost.total)}${suffix}`; +} + +function hasPartialDisplayCoverage(row: ModelRow): boolean { + if (row.pricedTurns < row.turns) return true; + return Object.values(row.usageCoverage).some( + (c) => c.missingTurns > 0 || c.unknownTurns > 0, + ); +} + +function hasCostCoverage(turn: EnrichedTurn): boolean { + const c = turn.fidelity?.coverage; + if (!c) return true; + return c.hasInputTokens && c.hasOutputTokens; +} diff --git a/packages/cli/src/commands/waste.test.ts b/packages/cli/src/commands/waste.test.ts index 83d2980..554b4af 100644 --- a/packages/cli/src/commands/waste.test.ts +++ b/packages/cli/src/commands/waste.test.ts @@ -9,7 +9,9 @@ import type { WasteResult, } from '@relayburn/analyze'; -import { formatWasteReport, isAttributionDegraded } from './waste.js'; +import { EMPTY_COVERAGE, makeFidelity } from '@relayburn/reader'; + +import { checkWasteFidelity, formatWasteReport, isAttributionDegraded } from './waste.js'; function session( id: string, @@ -194,3 +196,47 @@ describe('formatWasteReport', () => { ); }); }); + +describe('checkWasteFidelity', () => { + it('rejects aggregate-only turns with explicit missing prerequisites', () => { + const support = checkWasteFidelity([ + { + fidelity: makeFidelity('per-session-aggregate', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + }), + toolCalls: [{ name: 'Bash' }], + }, + ]); + assert.equal(support.supported, false); + assert.equal(support.unsupportedTurns, 1); + assert.deepEqual(support.missingPrerequisites, [ + 'content lengths', + 'per-turn usage', + 'tool calls', + 'tool result events', + ]); + assert.equal(support.unsupportedByClass['aggregate-only'], 1); + }); + + it('requires session relationships only when subagent calls are present', () => { + const base = makeFidelity('per-turn', { + ...EMPTY_COVERAGE, + hasInputTokens: true, + hasOutputTokens: true, + hasToolCalls: true, + hasToolResultEvents: true, + hasRawContent: true, + }); + assert.equal( + checkWasteFidelity([{ fidelity: base, toolCalls: [{ name: 'Bash' }] }]).supported, + true, + ); + const withSubagent = checkWasteFidelity([ + { fidelity: base, toolCalls: [{ name: 'Agent' }] }, + ]); + assert.equal(withSubagent.supported, false); + assert.deepEqual(withSubagent.missingPrerequisites, ['session relationships']); + }); +}); diff --git a/packages/cli/src/commands/waste.ts b/packages/cli/src/commands/waste.ts index 0099e66..3dee88c 100644 --- a/packages/cli/src/commands/waste.ts +++ b/packages/cli/src/commands/waste.ts @@ -5,6 +5,7 @@ import { attributeWaste, detectPatterns, loadPricing, + summarizeFidelity, type BashAggregation, type FileAggregation, type PatternsResult, @@ -12,7 +13,7 @@ import { type WasteResult, } from '@relayburn/analyze'; import { queryAll, queryCompactions, readContent, type Query } from '@relayburn/ledger'; -import type { ContentRecord } from '@relayburn/reader'; +import type { ContentRecord, Fidelity, FidelityClass } from '@relayburn/reader'; import { ingestAll } from '../ingest.js'; import { formatInt, formatUsd, parseSinceArg, table } from '../format.js'; @@ -49,6 +50,15 @@ export async function runWaste(args: ParsedArgs): Promise { await ingestAll(); const pricing = await loadPricing(); const turns = await queryAll(q); + const fidelitySupport = checkWasteFidelity(turns); + if (!fidelitySupport.supported) { + if (args.flags['json'] === true) { + process.stdout.write(JSON.stringify(fidelitySupport, null, 2) + '\n'); + } else { + process.stderr.write(renderWasteFidelityError(fidelitySupport)); + } + return 2; + } const patternsFlag = args.flags['patterns']; if (patternsFlag !== undefined) { @@ -111,6 +121,94 @@ export async function runWaste(args: ParsedArgs): Promise { return 0; } +export interface WasteFidelitySupport { + supported: boolean; + turnsAnalyzed: number; + unsupportedTurns: number; + missingPrerequisites: string[]; + unsupportedByClass: Record; + fidelity: ReturnType; +} + +const WASTE_PREREQ_LABELS = { + toolCalls: 'tool calls', + toolResultEvents: 'tool result events', + contentLengths: 'content lengths', + sessionRelationships: 'session relationships', + perTurnUsage: 'per-turn usage', +} as const; + +export function checkWasteFidelity( + turns: readonly { fidelity?: Fidelity; toolCalls: { name: string }[] }[], +): WasteFidelitySupport { + const missing = new Set(); + const unsupportedByClass: Record = { + full: 0, + 'usage-only': 0, + 'aggregate-only': 0, + 'cost-only': 0, + partial: 0, + }; + let unsupportedTurns = 0; + const hasSubagentCalls = turns.some((t) => + t.toolCalls.some((tc) => tc.name === 'Agent' || tc.name === 'Task'), + ); + + for (const t of turns) { + const f = t.fidelity; + if (!f) continue; + let turnUnsupported = false; + if (f.granularity !== 'per-turn' && f.granularity !== 'per-message') { + missing.add(WASTE_PREREQ_LABELS.perTurnUsage); + turnUnsupported = true; + } + if (!f.coverage.hasToolCalls) { + missing.add(WASTE_PREREQ_LABELS.toolCalls); + turnUnsupported = true; + } + if (!f.coverage.hasToolResultEvents) { + missing.add(WASTE_PREREQ_LABELS.toolResultEvents); + turnUnsupported = true; + } + if (!f.coverage.hasRawContent) { + missing.add(WASTE_PREREQ_LABELS.contentLengths); + turnUnsupported = true; + } + if (hasSubagentCalls && !f.coverage.hasSessionRelationships) { + missing.add(WASTE_PREREQ_LABELS.sessionRelationships); + turnUnsupported = true; + } + if (turnUnsupported) { + unsupportedTurns++; + unsupportedByClass[f.class]++; + } + } + + return { + supported: missing.size === 0, + turnsAnalyzed: turns.length, + unsupportedTurns, + missingPrerequisites: [...missing].sort(), + unsupportedByClass, + fidelity: summarizeFidelity(turns), + }; +} + +function renderWasteFidelityError(support: WasteFidelitySupport): string { + const byClass = Object.entries(support.unsupportedByClass) + .filter(([, n]) => n > 0) + .map(([cls, n]) => `${n} ${cls}`) + .join(', '); + const detail = byClass.length > 0 ? ` (${byClass})` : ''; + return [ + 'burn waste: selected turns do not preserve enough fidelity for attribution.', + `missing prerequisites: ${support.missingPrerequisites.join(', ')}`, + `unsupported turns: ${support.unsupportedTurns}/${support.turnsAnalyzed}${detail}`, + 'No attribution was computed; re-ingest with a reader that preserves tool results and content lengths.', + '', + ].join('\n'); +} + interface FormatWasteReportInput { turnsAnalyzed: number; result: WasteResult; diff --git a/packages/reader/CHANGELOG.md b/packages/reader/CHANGELOG.md index ff492bf..bb8ad23 100644 --- a/packages/reader/CHANGELOG.md +++ b/packages/reader/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **Codex and OpenCode fidelity population** (#41). Both readers now attach `TurnRecord.fidelity` to emitted turns, preserving the difference between reported zero token fields and omitted fields. Codex reports per-turn delta coverage from cumulative token snapshots; OpenCode reports per-message coverage from its `tokens` object. Both mark tool-call / tool-result / raw-content capabilities and leave unsupported cache/session fields explicit instead of implied by numeric zero. + ## [0.19.0] - 2026-04-26 ### Added diff --git a/packages/reader/src/codex.test.ts b/packages/reader/src/codex.test.ts index 7fc36fb..7eb75be 100644 --- a/packages/reader/src/codex.test.ts +++ b/packages/reader/src/codex.test.ts @@ -32,6 +32,18 @@ describe('parseCodexSession', () => { }); assert.equal(t.toolCalls.length, 0); assert.equal(t.filesTouched, undefined); + assert.ok(t.fidelity); + assert.equal(t.fidelity!.granularity, 'per-turn'); + assert.equal(t.fidelity!.coverage.hasInputTokens, true); + assert.equal(t.fidelity!.coverage.hasOutputTokens, true); + assert.equal(t.fidelity!.coverage.hasReasoningTokens, true); + assert.equal(t.fidelity!.coverage.hasCacheReadTokens, true); + assert.equal(t.fidelity!.coverage.hasCacheCreateTokens, false); + assert.equal(t.fidelity!.coverage.hasToolCalls, true); + assert.equal(t.fidelity!.coverage.hasToolResultEvents, true); + assert.equal(t.fidelity!.coverage.hasSessionRelationships, false); + assert.equal(t.fidelity!.coverage.hasRawContent, true); + assert.equal(t.fidelity!.class, 'usage-only'); }); it('extracts function and custom tool calls and maps filesTouched from patch_apply_end', async () => { @@ -268,6 +280,57 @@ describe('parseCodexSession', () => { await rm(tmp, { recursive: true, force: true }); } }); + + it('marks omitted token fields as unknown rather than zero in fidelity coverage', async () => { + const { mkdtemp, writeFile, rm } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const tmp = await mkdtemp(path.join(tmpdir(), 'burn-codex-fidelity-')); + try { + const jsonl = + [ + JSON.stringify({ + timestamp: '2026-04-22T00:00:00.000Z', + type: 'session_meta', + payload: { id: 'sess_fidelity', cwd: '/tmp/proj' }, + }), + JSON.stringify({ + timestamp: '2026-04-22T00:00:00.100Z', + type: 'turn_context', + payload: { turn_id: 'turn_fidelity_1', cwd: '/tmp/proj', model: 'gpt-5.4' }, + }), + JSON.stringify({ + timestamp: '2026-04-22T00:00:00.200Z', + type: 'event_msg', + payload: { type: 'task_started', turn_id: 'turn_fidelity_1' }, + }), + JSON.stringify({ + timestamp: '2026-04-22T00:00:00.300Z', + type: 'event_msg', + payload: { + type: 'token_count', + info: { total_token_usage: { input_tokens: 10, output_tokens: 0 } }, + }, + }), + JSON.stringify({ + timestamp: '2026-04-22T00:00:00.400Z', + type: 'event_msg', + payload: { type: 'task_complete', turn_id: 'turn_fidelity_1' }, + }), + '', + ].join('\n'); + const file = path.join(tmp, 'fidelity.jsonl'); + await writeFile(file, jsonl, 'utf8'); + const { turns } = await parseCodexSession(file); + assert.equal(turns.length, 1); + const f = turns[0]!.fidelity!; + assert.equal(turns[0]!.usage.output, 0, 'reported zero output remains numeric zero'); + assert.equal(f.coverage.hasOutputTokens, true, 'output_tokens: 0 is known zero'); + assert.equal(f.coverage.hasCacheReadTokens, false, 'missing cached_input_tokens is unknown'); + assert.equal(f.coverage.hasReasoningTokens, false, 'missing reasoning_output_tokens is unknown'); + } finally { + await rm(tmp, { recursive: true, force: true }); + } + }); }); describe('parseCodexSessionIncremental', () => { diff --git a/packages/reader/src/codex.ts b/packages/reader/src/codex.ts index ebf72af..9bfe1a0 100644 --- a/packages/reader/src/codex.ts +++ b/packages/reader/src/codex.ts @@ -1,11 +1,13 @@ import { open } from 'node:fs/promises'; import { classifyActivity } from './classifier.js'; +import { EMPTY_COVERAGE, makeFidelity } from './fidelity.js'; import { resolveProject } from './git.js'; import { argsHash } from './hash.js'; import type { ContentRecord, ContentStoreMode, + Coverage, ToolCall, TurnRecord, Usage, @@ -26,6 +28,7 @@ export interface ParseCodexIncrementalOptions extends ParseCodexOptions { export interface CodexResumeState { cumulative: { input: number; output: number; cacheRead: number; reasoning: number }; + cumulativeCoverage?: UsageCoverage; sessionId: string; sessionCwd?: string; turnContexts: Record; @@ -149,12 +152,21 @@ interface CumulativeUsage { reasoning: number; } +type UsageCoverage = Pick< + Coverage, + | 'hasInputTokens' + | 'hasOutputTokens' + | 'hasReasoningTokens' + | 'hasCacheReadTokens' +>; + interface OpenTurn { turnId: string; ts: string; model: string; project?: string; startCumulative: CumulativeUsage; + startCumulativeCoverage: UsageCoverage; toolCalls: ToolCall[]; seenCallIds: Set; filesTouched: Set; @@ -169,9 +181,14 @@ interface OpenTurn { interface FinalizedTurn extends Omit< OpenTurn, - 'startCumulative' | 'seenCallIds' | 'filesTouched' | 'erroredCallIds' + | 'startCumulative' + | 'startCumulativeCoverage' + | 'seenCallIds' + | 'filesTouched' + | 'erroredCallIds' > { usage: Usage; + usageCoverage: UsageCoverage; filesTouched: string[]; erroredCallIds: Set; } @@ -240,6 +257,15 @@ export async function parseCodexSessionIncremental( cacheRead: options.resume?.cumulative.cacheRead ?? 0, reasoning: options.resume?.cumulative.reasoning ?? 0, }; + let cumulativeCoverage: UsageCoverage = options.resume?.cumulativeCoverage + ? { ...options.resume.cumulativeCoverage } + : { + hasInputTokens: false, + hasOutputTokens: false, + hasReasoningTokens: false, + hasCacheReadTokens: false, + }; + let seenUsageSnapshot = options.resume?.cumulativeCoverage !== undefined; let openTurn: OpenTurn | null = null; let pendingUserText = ''; // User content (and any stray records) that arrive before the next @@ -263,6 +289,8 @@ export async function parseCodexSessionIncremental( // Commit snapshot — only advanced at task_complete boundaries. let committedEndOffset = startOffset; let committedCumulative: CumulativeUsage = { ...cumulative }; + let committedCumulativeCoverage: UsageCoverage = { ...cumulativeCoverage }; + let committedSeenUsageSnapshot = seenUsageSnapshot; let committedSessionId = sessionId; let committedSessionCwd = sessionCwd; let committedTurnContexts = new Map(turnContexts); @@ -328,6 +356,8 @@ export async function parseCodexSessionIncremental( cumulative.cacheRead = cached; cumulative.output = total.output_tokens ?? 0; cumulative.reasoning = total.reasoning_output_tokens ?? 0; + cumulativeCoverage = usageCoverageFromTokenUsage(total); + seenUsageSnapshot = true; } continue; } @@ -338,7 +368,7 @@ export async function parseCodexSessionIncremental( const turnId = ev.turn_id; if (typeof turnId !== 'string') continue; if (openTurn) { - finalized.push(finalizeTurn(openTurn, cumulative)); + finalized.push(finalizeTurn(openTurn, cumulative, cumulativeCoverage)); } // Close the user-turn slot that bridges the previous assistant turn // and this one. `precedingMessageId` was stamped at the previous @@ -356,6 +386,7 @@ export async function parseCodexSessionIncremental( ts, model: ctx?.model ?? '', startCumulative: { ...cumulative }, + startCumulativeCoverage: baselineCoverage(cumulativeCoverage, seenUsageSnapshot), toolCalls: [], seenCallIds: new Set(), filesTouched: new Set(), @@ -396,10 +427,12 @@ export async function parseCodexSessionIncremental( // Stamp preceding so the next task_started knows this turn closed // off the slot and the record can be linked. userTurnSlot.precedingMessageId = openTurn.turnId; - finalized.push(finalizeTurn(openTurn, cumulative)); + finalized.push(finalizeTurn(openTurn, cumulative, cumulativeCoverage)); openTurn = null; committedEndOffset = lineEndOffset; committedCumulative = { ...cumulative }; + committedCumulativeCoverage = { ...cumulativeCoverage }; + committedSeenUsageSnapshot = seenUsageSnapshot; committedSessionId = sessionId; committedSessionCwd = sessionCwd; committedTurnContexts = new Map(turnContexts); @@ -616,6 +649,7 @@ export async function parseCodexSessionIncremental( record.activity = classified.activity; record.retries = classified.retries; record.hasEdits = classified.hasEdits; + record.fidelity = buildCodexFidelity(f.usageCoverage); turns.push(record); if (captureContent) content.push(...f.content); } @@ -626,6 +660,13 @@ export async function parseCodexSessionIncremental( turnContexts: Object.fromEntries(committedTurnContexts), userTurnSlot: cloneSlot(committedUserTurnSlot), }; + if ( + committedSeenUsageSnapshot || + committedFinalizedCount > 0 || + options.resume?.cumulativeCoverage !== undefined + ) { + resume.cumulativeCoverage = { ...committedCumulativeCoverage }; + } if (committedSessionCwd !== undefined) resume.sessionCwd = committedSessionCwd; const emittedUserTurns = userTurns.slice(0, committedUserTurnsCount); @@ -677,6 +718,9 @@ function cloneResume(r: CodexResumeState | undefined): CodexResumeState { sessionId: r.sessionId, turnContexts: { ...r.turnContexts }, }; + if (r.cumulativeCoverage !== undefined) { + out.cumulativeCoverage = { ...r.cumulativeCoverage }; + } if (r.sessionCwd !== undefined) out.sessionCwd = r.sessionCwd; if (r.userTurnSlot) out.userTurnSlot = cloneSlot(r.userTurnSlot); else out.userTurnSlot = { blocks: [], ts: '' }; @@ -721,7 +765,11 @@ function buildCodexUserTurnRecord( return record; } -function finalizeTurn(open: OpenTurn, cumulative: CumulativeUsage): FinalizedTurn { +function finalizeTurn( + open: OpenTurn, + cumulative: CumulativeUsage, + cumulativeCoverage: UsageCoverage, +): FinalizedTurn { const usage: Usage = { input: Math.max(0, cumulative.input - open.startCumulative.input), output: Math.max(0, cumulative.output - open.startCumulative.output), @@ -730,12 +778,14 @@ function finalizeTurn(open: OpenTurn, cumulative: CumulativeUsage): FinalizedTur cacheCreate5m: 0, cacheCreate1h: 0, }; + const usageCoverage = deltaCoverage(open.startCumulativeCoverage, cumulativeCoverage); const out: FinalizedTurn = { turnId: open.turnId, ts: open.ts, model: open.model, toolCalls: open.toolCalls, usage, + usageCoverage, filesTouched: [...open.filesTouched], userText: open.userText, assistantText: open.assistantText, @@ -746,6 +796,47 @@ function finalizeTurn(open: OpenTurn, cumulative: CumulativeUsage): FinalizedTur return out; } +function baselineCoverage(current: UsageCoverage, seenSnapshot: boolean): UsageCoverage { + if (seenSnapshot) return { ...current }; + return { + hasInputTokens: true, + hasOutputTokens: true, + hasReasoningTokens: true, + hasCacheReadTokens: true, + }; +} + +function usageCoverageFromTokenUsage(total: TokenUsage): UsageCoverage { + return { + hasInputTokens: total.input_tokens !== undefined, + hasOutputTokens: total.output_tokens !== undefined, + hasReasoningTokens: total.reasoning_output_tokens !== undefined, + hasCacheReadTokens: total.cached_input_tokens !== undefined, + }; +} + +function deltaCoverage(start: UsageCoverage, end: UsageCoverage): UsageCoverage { + return { + hasInputTokens: start.hasInputTokens && end.hasInputTokens, + hasOutputTokens: start.hasOutputTokens && end.hasOutputTokens, + hasReasoningTokens: start.hasReasoningTokens && end.hasReasoningTokens, + hasCacheReadTokens: start.hasCacheReadTokens && end.hasCacheReadTokens, + }; +} + +function buildCodexFidelity(usageCoverage: UsageCoverage) { + const coverage: Coverage = { + ...EMPTY_COVERAGE, + ...usageCoverage, + hasCacheCreateTokens: false, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: false, + hasRawContent: true, + }; + return makeFidelity('per-turn', coverage); +} + // Codex user messages mix real prompts with harness boilerplate // (environment_context, AGENTS.md injections, permissions instructions, // collaboration_mode banners). Strip those so the classifier sees the text diff --git a/packages/reader/src/opencode.test.ts b/packages/reader/src/opencode.test.ts index 56fbf27..475a75e 100644 --- a/packages/reader/src/opencode.test.ts +++ b/packages/reader/src/opencode.test.ts @@ -37,6 +37,18 @@ describe('parseOpencodeSession', () => { assert.equal(t.toolCalls.length, 0); assert.equal(t.filesTouched, undefined); assert.equal(t.subagent, undefined); + assert.ok(t.fidelity); + assert.equal(t.fidelity!.granularity, 'per-message'); + assert.equal(t.fidelity!.coverage.hasInputTokens, true); + assert.equal(t.fidelity!.coverage.hasOutputTokens, true); + assert.equal(t.fidelity!.coverage.hasReasoningTokens, true); + assert.equal(t.fidelity!.coverage.hasCacheReadTokens, true); + assert.equal(t.fidelity!.coverage.hasCacheCreateTokens, true); + assert.equal(t.fidelity!.coverage.hasToolCalls, true); + assert.equal(t.fidelity!.coverage.hasToolResultEvents, true); + assert.equal(t.fidelity!.coverage.hasSessionRelationships, false); + assert.equal(t.fidelity!.coverage.hasRawContent, true); + assert.equal(t.fidelity!.class, 'usage-only'); }); it('extracts tool calls and filesTouched only for file tools', async () => { @@ -195,6 +207,48 @@ describe('parseOpencodeSession', () => { await rm(tmp, { recursive: true, force: true }); } }); + + it('distinguishes missing token fields from reported zero token fields', async () => { + const { mkdtemp, mkdir, writeFile, rm } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const tmp = await mkdtemp(path.join(tmpdir(), 'burn-oc-fidelity-')); + try { + const storage = path.join(tmp, 'storage'); + const sessionDir = path.join(storage, 'session', 'global'); + const msgDir = path.join(storage, 'message', 'ses_fidelity'); + await mkdir(sessionDir, { recursive: true }); + await mkdir(msgDir, { recursive: true }); + await writeFile( + path.join(sessionDir, 'ses_fidelity.json'), + JSON.stringify({ id: 'ses_fidelity', directory: '/tmp/proj' }), + ); + await writeFile( + path.join(msgDir, 'msg_fidelity_asst.json'), + JSON.stringify({ + id: 'msg_fidelity_asst', + sessionID: 'ses_fidelity', + role: 'assistant', + providerID: 'anthropic', + modelID: 'claude-haiku-4-5', + time: { created: 1_776_988_001_000 }, + path: { cwd: '/tmp/proj' }, + tokens: { input: 0, output: 0, cache: { read: 0 } }, + }), + ); + const { turns } = await parseOpencodeSession(path.join(sessionDir, 'ses_fidelity.json')); + assert.equal(turns.length, 1); + const f = turns[0]!.fidelity!; + assert.equal(turns[0]!.usage.input, 0); + assert.equal(turns[0]!.usage.output, 0); + assert.equal(f.coverage.hasInputTokens, true, 'input: 0 is known zero'); + assert.equal(f.coverage.hasOutputTokens, true, 'output: 0 is known zero'); + assert.equal(f.coverage.hasCacheReadTokens, true, 'cache.read: 0 is known zero'); + assert.equal(f.coverage.hasReasoningTokens, false, 'missing reasoning is unknown'); + assert.equal(f.coverage.hasCacheCreateTokens, false, 'missing cache.write is unknown'); + } finally { + await rm(tmp, { recursive: true, force: true }); + } + }); }); describe('parseOpencodeSessionIncremental', () => { diff --git a/packages/reader/src/opencode.ts b/packages/reader/src/opencode.ts index f0d5d85..e1caaf9 100644 --- a/packages/reader/src/opencode.ts +++ b/packages/reader/src/opencode.ts @@ -2,11 +2,13 @@ import { readFile, readdir } from 'node:fs/promises'; import * as path from 'node:path'; import { classifyActivity } from './classifier.js'; +import { EMPTY_COVERAGE, makeFidelity } from './fidelity.js'; import { resolveProject } from './git.js'; import { argsHash } from './hash.js'; import type { ContentRecord, ContentStoreMode, + Coverage, Subagent, ToolCall, TurnRecord, @@ -37,6 +39,20 @@ interface MessageTokens { }; } +type UsageCoverage = Pick< + Coverage, + | 'hasInputTokens' + | 'hasOutputTokens' + | 'hasReasoningTokens' + | 'hasCacheReadTokens' + | 'hasCacheCreateTokens' +>; + +interface UsageWithCoverage { + usage: Usage; + coverage: UsageCoverage; +} + interface AssistantMessage { id: string; sessionID: string; @@ -176,7 +192,8 @@ export async function parseOpencodeSessionIncremental( const model = buildModel(m.providerID, m.modelID); const project = m.path?.cwd ?? session.directory; - const usage = toUsage(m.tokens); + const usageInfo = toUsage(m.tokens); + const usage = usageInfo.usage; const record: TurnRecord = { v: 1, @@ -189,6 +206,7 @@ export async function parseOpencodeSessionIncremental( usage, toolCalls, }; + record.fidelity = buildOpencodeFidelity(usageInfo.coverage); if (options.sessionPath !== undefined) record.sessionPath = options.sessionPath; if (project !== undefined) { const resolved = resolveProject(project); @@ -582,20 +600,41 @@ function lastStepFinishReason(parts: Part[]): string | undefined { return undefined; } -function toUsage(t: MessageTokens | undefined): Usage { +function toUsage(t: MessageTokens | undefined): UsageWithCoverage { const input = t?.input ?? 0; const output = t?.output ?? 0; const reasoning = t?.reasoning ?? 0; const cacheRead = t?.cache?.read ?? 0; const cacheWrite = t?.cache?.write ?? 0; return { - input, - output, - reasoning, - cacheRead, - cacheCreate5m: cacheWrite, - cacheCreate1h: 0, + usage: { + input, + output, + reasoning, + cacheRead, + cacheCreate5m: cacheWrite, + cacheCreate1h: 0, + }, + coverage: { + hasInputTokens: t?.input !== undefined, + hasOutputTokens: t?.output !== undefined, + hasReasoningTokens: t?.reasoning !== undefined, + hasCacheReadTokens: t?.cache?.read !== undefined, + hasCacheCreateTokens: t?.cache?.write !== undefined, + }, + }; +} + +function buildOpencodeFidelity(usageCoverage: UsageCoverage) { + const coverage: Coverage = { + ...EMPTY_COVERAGE, + ...usageCoverage, + hasToolCalls: true, + hasToolResultEvents: true, + hasSessionRelationships: false, + hasRawContent: true, }; + return makeFidelity('per-message', coverage); } function buildModel(providerID: string | undefined, modelID: string | undefined): string {