Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/cli/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- **`burn` ingest runs cross-file Claude relationship reconciliation at end of pass** ([#112](https://github.com/AgentWorkforce/burn/issues/112)). After parsing every Claude session file in an ingest pass, the CLI now feeds the per-file evidence through `reconcileClaudeSessionRelationships` and appends any resulting `fork` / `continuation` rows. Idempotent — the writer's `relationshipIdHash` dedup folds repeats on subsequent runs.

## [0.21.0] - 2026-04-26

### Added
Expand Down
31 changes: 30 additions & 1 deletion packages/cli/src/ingest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import {
parseClaudeSessionIncremental,
parseCodexSessionIncremental,
parseOpencodeSessionIncremental,
reconcileClaudeSessionRelationships,
} from '@relayburn/reader';
import type {
CodexResumeState,
ContentRecord,
ContentStoreMode,
ReconcileClaudeRelationshipsInput,
TurnRecord,
} from '@relayburn/reader';
import {
Expand Down Expand Up @@ -209,6 +211,11 @@ async function ingestClaudeInto(
gap: GapStats,
): Promise<void> {
const projects = await listDirs(claudeProjectsDir());
// Cross-file relationship reconciliation (#112). Collect per-file evidence
// from every successful parse this pass and run one reconciliation step at
// the end so fork / continuation rows that need cross-file knowledge get
// emitted alongside the per-file `root` / `subagent` / `/resume` rows.
const reconcileInputs: ReconcileClaudeRelationshipsInput[] = [];
for (const projectDir of projects) {
const files = await listJsonlFiles(projectDir);
for (const file of files) {
Expand All @@ -225,7 +232,14 @@ async function ingestClaudeInto(
const startOffset = rotated ? 0 : priorClaude.offsetBytes;

if (!rotated && startOffset >= st.size) {
// nothing new; refresh mtime bookkeeping
// Nothing new; refresh mtime bookkeeping and skip reconciliation
// evidence — the file's relationships were emitted on the pass
// that last touched it, and the writer's `relationshipIdHash`
// dedup keeps subsequent passes idempotent. Cross-file detection
// for an unchanged-vs-changed pair runs on the changed file's
// pass when both happen to be active in the same window; one-off
// late-arriving relationships rely on a future modification of
// either file (or an explicit re-scan) to surface.
priorClaude.mtimeMs = st.mtimeMs;
continue;
}
Expand All @@ -246,6 +260,7 @@ async function ingestClaudeInto(
userTurns,
endOffset,
lastUserText,
evidence,
} = await parseClaudeSessionIncremental(file, parseOpts);
if (turns.length > 0) {
await appendTurns(turns);
Expand Down Expand Up @@ -274,6 +289,13 @@ async function ingestClaudeInto(
if (userTurns.length > 0) {
await appendUserTurns(userTurns);
}
// The incremental call only returned evidence for what it just read;
// for cross-file reconciliation we want the full picture, so re-derive
// evidence from the prefix when this pass started past offset 0.
// The `firstParentUuid` / `seenUuids` carried by the prescan are
// already populated when startOffset > 0, so the returned `evidence`
// is whole — no second pass needed.
reconcileInputs.push({ evidence });
const next: ClaudeCursor = {
kind: 'claude',
inode: st.ino,
Expand All @@ -288,6 +310,13 @@ async function ingestClaudeInto(
}
}
}
// Cross-file reconciliation (#112). Emits `fork` / `continuation` rows
// beyond what each file's own parse pass could surface. The append writer's
// `relationshipIdHash` dedup handles re-runs with identical inputs.
if (reconcileInputs.length > 0) {
const reconciled = reconcileClaudeSessionRelationships(reconcileInputs);
if (reconciled.length > 0) await appendRelationships(reconciled);
}
}

async function ingestCodexInto(
Expand Down
4 changes: 4 additions & 0 deletions packages/reader/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- **Claude parser emits `fork` and `continuation` `SessionRelationshipRecord` rows** ([#112](https://github.com/AgentWorkforce/burn/issues/112)). Closes the deferred-work item from #77/#42: the Claude passive reader now populates the full `RelationshipType` lattice instead of only `root` / `subagent`. Per-file evidence — in-log `sessionId` mismatches against the on-disk filename, the first user line's `parentUuid`, the first non-empty `version` field, all in-file uuids, and `/resume` / `/continue` slash-command markers — is collected during the existing parse pass and surfaced as a new `evidence: ClaudeRelationshipEvidence` field on `ParseResult` / `ParseIncrementalResult`. A `/resume` marker emits a local `continuation` row with `relatedSessionId` set to the resumed-from id; a new exported `reconcileClaudeSessionRelationships(inputs)` helper takes per-file evidence from a multi-file pass and emits the cross-file `fork` / `continuation` rows that single-file parsers can't surface. Existing `root` / `subagent` rows are stamped with `sourceSessionId` (foreign in-log id) and `sourceVersion` whenever the file carries them. Reconciliation strategy is **append, not mutate**: a prior `root` row and a later `continuation` / `fork` row for the same session id produce different `relationshipIdHash` values, so both rows coexist on disk and consumers prefer the more specific row when both are present. Re-ingesting a session is idempotent — the writer's existing dedup folds duplicates. New `ParseOptions.fileSessionId` lets callers pin the canonical session id explicitly; when omitted but `sessionPath` is set, the parser derives it from the `.jsonl` basename.

## [0.19.0] - 2026-04-26

### Added
Expand Down
215 changes: 214 additions & 1 deletion packages/reader/src/claude.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ import { fileURLToPath } from 'node:url';
import * as path from 'node:path';
import { afterEach, beforeEach, describe, it } from 'node:test';

import { parseClaudeSession, parseClaudeSessionIncremental } from './claude.js';
import {
parseClaudeSession,
parseClaudeSessionIncremental,
reconcileClaudeSessionRelationships,
} from './claude.js';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const FIXTURES = path.resolve(__dirname, '..', '..', '..', 'tests', 'fixtures', 'claude');
Expand Down Expand Up @@ -823,3 +827,212 @@ describe('parseClaudeSessionIncremental', () => {
assert.equal(sub2_1!.subagent!.parentToolUseId, 'toolu_inner');
});
});

// ---------------------------------------------------------------------------
// Fork / continuation relationships (#112).
// ---------------------------------------------------------------------------

describe('parseClaudeSession fork / continuation relationships (#112)', () => {
it('emits a continuation row from a /resume marker, with relatedSessionId set to the resumed-from id', async () => {
const file = path.join(FIXTURES, 'resume-marker.jsonl');
const { relationships } = await parseClaudeSessionIncremental(file, {
sessionPath: file,
});
const cont = relationships.find((r) => r.relationshipType === 'continuation');
assert.ok(cont, '/resume marker must produce a continuation row');
// The on-disk filename's session id is what consumers join on; relatedSessionId
// is the id named in the slash-command argument.
assert.equal(cont!.sessionId, 'resume-marker');
assert.equal(cont!.relatedSessionId, '11111111-1111-1111-1111-111111111111');
// Provenance: the line carries an in-log `sessionId` distinct from the file
// basename (`99999999-...` vs `resume-marker`), so it surfaces as
// `sourceSessionId`. `version` becomes `sourceVersion`.
assert.equal(cont!.sourceSessionId, '99999999-9999-9999-9999-999999999999');
assert.equal(cont!.sourceVersion, '2.1.97');
});

it('populates sourceSessionId and sourceVersion on existing root rows when the in-log id differs from the file id', async () => {
const file = path.join(FIXTURES, 'resume-marker.jsonl');
const { relationships } = await parseClaudeSession(file, { sessionPath: file });
const root = relationships.find((r) => r.relationshipType === 'root');
assert.ok(root, 'root row should still be emitted alongside the continuation row');
assert.equal(root!.sessionId, 'resume-marker');
assert.equal(root!.sourceSessionId, '99999999-9999-9999-9999-999999999999');
assert.equal(root!.sourceVersion, '2.1.97');
});

it('captures firstParentUuid from the first non-sidechain user line even when a sidechain user line precedes it', async () => {
const file = path.join(FIXTURES, 'sidechain-leading-then-main.jsonl');
const { evidence } = await parseClaudeSession(file, { sessionPath: file });
assert.equal(evidence.firstParentUuid, 'u-original-asst');
});

it('exposes per-file evidence so a cross-file pass can resolve fork / continuation', async () => {
const file = path.join(FIXTURES, 'resume-marker.jsonl');
const { evidence } = await parseClaudeSession(file, { sessionPath: file });
assert.equal(evidence.fileSessionId, 'resume-marker');
assert.equal(evidence.sourceVersion, '2.1.97');
assert.equal(evidence.hasResumeMarker, true);
assert.equal(evidence.resumeTargetSessionId, '11111111-1111-1111-1111-111111111111');
// The first non-sidechain line's parentUuid is null in this fixture, so
// the parser leaves firstParentUuid undefined.
assert.equal(evidence.firstParentUuid, undefined);
// Both line uuids show up (assistant + user).
assert.ok(evidence.seenUuids.includes('u-resume-1'));
assert.ok(evidence.seenUuids.includes('u-asst-r'));
});

it('reconcileClaudeSessionRelationships emits a continuation row when one file\'s first parentUuid lives in another file', async () => {
const originalFile = path.join(FIXTURES, 'original-session.jsonl');
const crossFile = path.join(FIXTURES, 'cross-file-parent.jsonl');
const { evidence: originalEv } = await parseClaudeSession(originalFile, {
sessionPath: originalFile,
});
const { evidence: crossEv, relationships: crossRows } = await parseClaudeSession(
crossFile,
{ sessionPath: crossFile },
);

// Sanity: cross-file evidence carries the original's last assistant uuid.
assert.equal(crossEv.firstParentUuid, 'u-original-asst');
// The local pass alone produced no continuation row (no /resume marker).
assert.equal(
crossRows.find((r) => r.relationshipType === 'continuation'),
undefined,
);

const reconciled = reconcileClaudeSessionRelationships([
{ evidence: originalEv },
{ evidence: crossEv },
]);
const cont = reconciled.find((r) => r.relationshipType === 'continuation');
assert.ok(cont, 'cross-file parentUuid match must produce a continuation row');
assert.equal(cont!.sessionId, 'cross-file-parent');
assert.equal(cont!.relatedSessionId, 'original-session');
assert.equal(cont!.sourceVersion, '2.1.97');
});

it('reconcileClaudeSessionRelationships emits fork rows when two files share a sourceSessionId', async () => {
const branchA = path.join(FIXTURES, 'fork-branch-a.jsonl');
const branchB = path.join(FIXTURES, 'fork-branch-b.jsonl');
const { evidence: evA, relationships: rowsA } = await parseClaudeSession(branchA, {
sessionPath: branchA,
});
const { evidence: evB, relationships: rowsB } = await parseClaudeSession(branchB, {
sessionPath: branchB,
});

// Each branch has a root row keyed on its own filename, with the shared
// in-log id surfaced as sourceSessionId.
const rootA = rowsA.find((r) => r.relationshipType === 'root');
const rootB = rowsB.find((r) => r.relationshipType === 'root');
assert.equal(rootA!.sessionId, 'fork-branch-a');
assert.equal(rootB!.sessionId, 'fork-branch-b');
assert.equal(rootA!.sourceSessionId, '00000000-0000-0000-0000-000000000fff');
assert.equal(rootB!.sourceSessionId, '00000000-0000-0000-0000-000000000fff');

const reconciled = reconcileClaudeSessionRelationships([
{ evidence: evA },
{ evidence: evB },
]);
const forks = reconciled.filter((r) => r.relationshipType === 'fork');
assert.equal(forks.length, 2, 'each branch should get a fork row');
const sids = forks.map((r) => r.sessionId).sort();
assert.deepEqual(sids, ['fork-branch-a', 'fork-branch-b']);
for (const f of forks) {
assert.equal(f.relatedSessionId, '00000000-0000-0000-0000-000000000fff');
assert.equal(f.sourceSessionId, '00000000-0000-0000-0000-000000000fff');
assert.equal(f.sourceVersion, '2.1.97');
}
});

it('reconcileClaudeSessionRelationships does not emit a fork row when one file is a strict continuation of the other', async () => {
// Two files share a sourceSessionId but file B's firstParentUuid lives in
// file A — that's a continuation, not a fork. Reconciliation should emit
// exactly one continuation row and zero fork rows.
const fileA = path.join(FIXTURES, 'original-session.jsonl');
const fileB = path.join(FIXTURES, 'cross-file-parent.jsonl');
const { evidence: evA } = await parseClaudeSession(fileA, { sessionPath: fileA });
const { evidence: evB } = await parseClaudeSession(fileB, { sessionPath: fileB });
const reconciled = reconcileClaudeSessionRelationships([
{ evidence: evA },
{ evidence: evB },
]);
assert.equal(
reconciled.filter((r) => r.relationshipType === 'fork').length,
0,
'strict continuation must not also be classified as a fork',
);
assert.equal(reconciled.filter((r) => r.relationshipType === 'continuation').length, 1);
});

it('re-parsing the same session produces relationship rows with stable hashes (dedup target)', async () => {
// Acceptance: re-ingesting the same session does not create duplicate
// relationship rows. The on-disk dedup is keyed by `relationshipIdHash`
// (source + sessionId + relationshipType + relatedSessionId + agentId +
// parentToolUseId), so the parser must produce equivalent rows on both
// passes for the writer's existing dedup to fold them.
const { relationshipIdHash } = await import('@relayburn/ledger');
const file = path.join(FIXTURES, 'resume-marker.jsonl');
const a = await parseClaudeSession(file, { sessionPath: file });
const b = await parseClaudeSession(file, { sessionPath: file });
const idsA = new Set(a.relationships.map(relationshipIdHash));
const idsB = new Set(b.relationships.map(relationshipIdHash));
assert.equal(idsA.size, a.relationships.length);
assert.deepEqual([...idsA].sort(), [...idsB].sort());
});

it('reconciliation skips a duplicate continuation when the local /resume already named the same parent', async () => {
// Local /resume + cross-file parentUuid pointing at the same parent should
// dedup at the reconciliation layer — we don't want two continuation rows
// for the same edge with identical hashes.
// Construct an in-memory evidence pair that matches the resume target
// exactly.
const parentEvidence = {
fileSessionId: '11111111-1111-1111-1111-111111111111',
inLogSessionIds: ['11111111-1111-1111-1111-111111111111'],
seenUuids: ['u-original-asst'],
hasResumeMarker: false,
};
const childEvidence = {
fileSessionId: 'resume-marker',
inLogSessionIds: ['99999999-9999-9999-9999-999999999999'],
seenUuids: [],
hasResumeMarker: true,
resumeTargetSessionId: '11111111-1111-1111-1111-111111111111',
firstParentUuid: 'u-original-asst',
sourceVersion: '2.1.97',
};
const reconciled = reconcileClaudeSessionRelationships([
{ evidence: parentEvidence },
{ evidence: childEvidence },
]);
// The local parse already emitted a continuation for (resume-marker ->
// 11111111…); reconciliation should not add a duplicate edge here.
const continuations = reconciled.filter(
(r) =>
r.relationshipType === 'continuation' &&
r.sessionId === 'resume-marker' &&
r.relatedSessionId === '11111111-1111-1111-1111-111111111111',
);
assert.equal(continuations.length, 0);
});

it('preserves sourceSessionId / sourceVersion on subagent rows when the in-log id differs from the file basename', async () => {
// sub-rows need the same provenance stamp as roots so cross-source joins
// can group all rows from one log under a common version banner. We
// deliberately use a tmp filename whose basename differs from the
// in-log session id (`55555555-…`) so the mismatch surfaces as
// sourceSessionId on the subagent row.
const dir = await mkdtemp(path.join(tmpdir(), 'claude-sub-'));
const tmpFile = path.join(dir, 'session.jsonl');
const subSrc = path.join(FIXTURES, 'nested-subagent.jsonl');
await copyFile(subSrc, tmpFile);
const { relationships } = await parseClaudeSession(tmpFile, { sessionPath: tmpFile });
const sub = relationships.find((r) => r.relationshipType === 'subagent');
assert.ok(sub, 'fixture has subagent rows');
assert.equal(sub!.sourceSessionId, '55555555-5555-5555-5555-555555555555');
await rm(dir, { recursive: true, force: true });
});
});

Loading