AgentWorkforce · willwashburn · Apr 26, 2026
diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- **`burn limits` honors fidelity on its 5-hour forecast** ([#105](https://github.com/AgentWorkforce/burn/issues/105)). The forecast still consumes every windowed turn — partial / aggregate-only / cost-only data still contributes to the running token total — but `burn limits` now classifies the contributing slice via `summarizeFidelity` and surfaces a binary `high` / `low` confidence flag. Text mode appends a `forecast: low-confidence (N of M contributing turns lack per-turn token data)` notice when at least one contributing turn is missing per-turn token coverage; full-fidelity windows print no notice. `--json` output gains a `forecast.fidelity` block carrying the `confidence` flag and the underlying `FidelitySummary`. `--watch` re-evaluates confidence on each tick so the flag flips as fresher full-fidelity turns land.
+
 ### Changed
 
 - **Persist user-turn block-size records during ingest** (#2). `burn ingest`, passive ingest, and the Claude/Codex/OpenCode wrappers now append parser-emitted `UserTurnRecord`s for all three harnesses. Codex passive cursors also carry the in-flight user-turn slot so resumed ingest can complete a bridge record across file-growth boundaries. `burn waste` and `burn diagnose` load these records and use them as the sized fallback when content sidecars are missing.

diff --git a/packages/cli/src/commands/limits.test.ts b/packages/cli/src/commands/limits.test.ts
@@ -1,10 +1,14 @@
 import { strict as assert } from 'node:assert';
 import { describe, it } from 'node:test';
 
+import type { Fidelity } from '@relayburn/reader';
+
 import {
+  deriveForecastFidelity,
   makeCachingFetcher,
   runLimits,
   type ForecastInput,
+  type ForecastResult,
   type LimitsDeps,
   type UsageResponse,
 } from './limits.js';
@@ -44,6 +48,49 @@ function fakeNow(): Date {
   return new Date(FIXED_NOW);
 }
 
+const FULL_FIDELITY: Fidelity = {
+  granularity: 'per-turn',
+  class: 'full',
+  coverage: {
+    hasInputTokens: true,
+    hasOutputTokens: true,
+    hasReasoningTokens: true,
+    hasCacheReadTokens: true,
+    hasCacheCreateTokens: true,
+    hasToolCalls: true,
+    hasToolResultEvents: true,
+    hasSessionRelationships: true,
+    hasRawContent: true,
+  },
+};
+
+const PARTIAL_FIDELITY: Fidelity = {
+  granularity: 'per-turn',
+  class: 'partial',
+  coverage: {
+    hasInputTokens: false,
+    hasOutputTokens: false,
+    hasReasoningTokens: false,
+    hasCacheReadTokens: false,
+    hasCacheCreateTokens: false,
+    hasToolCalls: true,
+    hasToolResultEvents: false,
+    hasSessionRelationships: false,
+    hasRawContent: false,
+  },
+};
+
+// Wrap a `ForecastInput` in a high-confidence `ForecastResult` so existing
+// tests (which only care about the numeric forecast) get a benign fidelity
+// block by default. Tests that exercise low-confidence behavior pass an
+// explicit `ForecastResult` instead.
+function highConfidence(input: ForecastInput): ForecastResult {
+  return {
+    input,
+    fidelity: deriveForecastFidelity([{ fidelity: FULL_FIDELITY }]),
+  };
+}
+
 function noTokenDeps(): LimitsDeps {
   return {
     loadToken: async () => null,
@@ -59,7 +106,7 @@ function tokenDeps(usage: UsageResponse, forecast: ForecastInput | null = null):
     loadToken: async () => 'fake-token',
     fetchUsage: async () => usage,
     now: fakeNow,
-    loadForecast: async () => forecast,
+    loadForecast: async () => (forecast ? highConfidence(forecast) : null),
     loadPlanStatuses: async () => [],
   };
 }
@@ -150,7 +197,7 @@ describe('burn limits', () => {
         throw new Error('should not be called when --no-api');
       },
       now: fakeNow,
-      loadForecast: async () => forecast,
+      loadForecast: async () => highConfidence(forecast),
     };
     const { result, stdout } = await captureStdout(() =>
       runLimits(args({ 'no-api': true }), deps),
@@ -330,6 +377,161 @@ describe('burn limits', () => {
     assert.equal(parsed.plans[0].limitedData, false);
   });
 
+  it('high-confidence forecast (all full) renders no fidelity notice', async () => {
+    // Acceptance criteria #105: full-fidelity windows show no notice.
+    const usage: UsageResponse = {
+      five_hour: { percent_used: 40, reset_at: '2026-04-24T14:00:00.000Z' },
+    };
+    const forecast: ForecastInput = {
+      tokensSoFar: 600_000,
+      elapsedMs: 2 * 60 * 60 * 1000,
+      remainingMs: 2 * 60 * 60 * 1000,
+    };
+    const result: ForecastResult = {
+      input: forecast,
+      fidelity: deriveForecastFidelity([
+        { fidelity: FULL_FIDELITY },
+        { fidelity: FULL_FIDELITY },
+        { fidelity: FULL_FIDELITY },
+      ]),
+    };
+    const deps: LimitsDeps = {
+      loadToken: async () => 'tok',
+      fetchUsage: async () => usage,
+      now: fakeNow,
+      loadForecast: async () => result,
+      loadPlanStatuses: async () => [],
+    };
+    const { stdout } = await captureStdout(() => runLimits(args(), deps));
+    assert.match(stdout, /burn rate/);
+    assert.doesNotMatch(stdout, /low-confidence/);
+  });
+
+  it('low-confidence forecast (one partial turn) appends a notice without refusing the projection', async () => {
+    // Acceptance criteria #105: rendered output shows a low-confidence notice
+    // when any contributing turn lacks per-turn token coverage; the forecast
+    // number itself is unchanged (still rendered).
+    const usage: UsageResponse = {
+      five_hour: { percent_used: 40, reset_at: '2026-04-24T14:00:00.000Z' },
+    };
+    const forecast: ForecastInput = {
+      tokensSoFar: 600_000,
+      elapsedMs: 2 * 60 * 60 * 1000,
+      remainingMs: 2 * 60 * 60 * 1000,
+    };
+    const result: ForecastResult = {
+      input: forecast,
+      fidelity: deriveForecastFidelity([
+        { fidelity: FULL_FIDELITY },
+        { fidelity: FULL_FIDELITY },
+        { fidelity: PARTIAL_FIDELITY },
+      ]),
+    };
+    const deps: LimitsDeps = {
+      loadToken: async () => 'tok',
+      fetchUsage: async () => usage,
+      now: fakeNow,
+      loadForecast: async () => result,
+      loadPlanStatuses: async () => [],
+    };
+    const { stdout } = await captureStdout(() => runLimits(args(), deps));
+    // Forecast is still rendered with both burn rate and projection.
+    assert.match(stdout, /burn rate 5\.0k tok\/min/);
+    assert.match(stdout, /projected 80% at reset/);
+    // And a low-confidence notice is appended naming the count.
+    assert.match(
+      stdout,
+      /forecast: low-confidence \(1 of 3 contributing turns lack per-turn token data\)/,
+    );
+  });
+
+  it('--json forecast block carries a fidelity sub-object with confidence + summary', async () => {
+    // Acceptance criteria #105: --json emits a fidelity block with confidence
+    // and the underlying FidelitySummary.
+    const usage: UsageResponse = {
+      five_hour: { percent_used: 40, reset_at: '2026-04-24T14:00:00.000Z' },
+    };
+    const forecast: ForecastInput = {
+      tokensSoFar: 600_000,
+      elapsedMs: 2 * 60 * 60 * 1000,
+      remainingMs: 2 * 60 * 60 * 1000,
+    };
+    const result: ForecastResult = {
+      input: forecast,
+      fidelity: deriveForecastFidelity([
+        { fidelity: FULL_FIDELITY },
+        { fidelity: PARTIAL_FIDELITY },
+      ]),
+    };
+    const deps: LimitsDeps = {
+      loadToken: async () => 'tok',
+      fetchUsage: async () => usage,
+      now: fakeNow,
+      loadForecast: async () => result,
+      loadPlanStatuses: async () => [],
+    };
+    const { stdout } = await captureStdout(() => runLimits(args({ json: true }), deps));
+    const parsed = JSON.parse(stdout);
+    assert.ok(parsed.forecast.fidelity, 'forecast.fidelity present');
+    assert.equal(parsed.forecast.fidelity.confidence, 'low');
+    assert.equal(parsed.forecast.fidelity.summary.total, 2);
+    assert.equal(parsed.forecast.fidelity.summary.byClass.full, 1);
+    assert.equal(parsed.forecast.fidelity.summary.byClass.partial, 1);
+    assert.equal(parsed.forecast.fidelity.summary.unknown, 0);
+  });
+
+  it('--json forecast fidelity reports high confidence when every turn is full', async () => {
+    const usage: UsageResponse = {
+      five_hour: { percent_used: 40, reset_at: '2026-04-24T14:00:00.000Z' },
+    };
+    const forecast: ForecastInput = {
+      tokensSoFar: 600_000,
+      elapsedMs: 2 * 60 * 60 * 1000,
+      remainingMs: 2 * 60 * 60 * 1000,
+    };
+    const { stdout } = await captureStdout(() =>
+      runLimits(args({ json: true }), tokenDeps(usage, forecast)),
+    );
+    const parsed = JSON.parse(stdout);
+    assert.equal(parsed.forecast.fidelity.confidence, 'high');
+    assert.equal(parsed.forecast.fidelity.summary.total, 1);
+    assert.equal(parsed.forecast.fidelity.summary.byClass.full, 1);
+  });
+
+  it('--watch re-evaluates confidence each tick (low → high as full turns arrive)', async () => {
+    // Acceptance criteria #105: --watch re-evaluates confidence on each tick.
+    // We exercise renderOnce indirectly by toggling the loadForecast result
+    // between calls and checking that runLimits picks up the change. (We
+    // don't actually run the watch loop here — the loop just calls
+    // renderOnce repeatedly, which is what we test below.)
+    const usage: UsageResponse = {
+      five_hour: { percent_used: 40, reset_at: '2026-04-24T14:00:00.000Z' },
+    };
+    const forecastInput: ForecastInput = {
+      tokensSoFar: 600_000,
+      elapsedMs: 2 * 60 * 60 * 1000,
+      remainingMs: 2 * 60 * 60 * 1000,
+    };
+    let tick = 0;
+    const deps: LimitsDeps = {
+      loadToken: async () => 'tok',
+      fetchUsage: async () => usage,
+      now: fakeNow,
+      loadForecast: async () => {
+        const turns =
+          tick++ === 0
+            ? [{ fidelity: FULL_FIDELITY }, { fidelity: PARTIAL_FIDELITY }]
+            : [{ fidelity: FULL_FIDELITY }, { fidelity: FULL_FIDELITY }];
+        return { input: forecastInput, fidelity: deriveForecastFidelity(turns) };
+      },
+      loadPlanStatuses: async () => [],
+    };
+    const { stdout: first } = await captureStdout(() => runLimits(args(), deps));
+    assert.match(first, /low-confidence/);
+    const { stdout: second } = await captureStdout(() => runLimits(args(), deps));
+    assert.doesNotMatch(second, /low-confidence/);
+  });
+
   it('renders very-low projected % without double-normalizing back to 0..1', async () => {
     // Regression: projectFromOauth returns a value already on the 0..100 scale
     // (and capped at 100). If the renderer pipes that through the same