From 1c54b709dfb21a1c8f6d0a39ffaaacf669f788da Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Tue, 2 Jun 2026 13:27:49 -0700
Subject: [PATCH 1/5] feat: parse OpenAI harmony channel tokens to record
 gpt-oss tool calls

---
 src/__tests__/harmony-boundary.test.ts | 568 +++++++++++++++++++
 src/harmony.ts                         | 731 +++++++++++++++++++++++++
 2 files changed, 1299 insertions(+)
 create mode 100644 src/__tests__/harmony-boundary.test.ts
 create mode 100644 src/harmony.ts

diff --git a/src/__tests__/harmony-boundary.test.ts b/src/__tests__/harmony-boundary.test.ts
new file mode 100644
index 0000000..e2f0254
--- /dev/null
+++ b/src/__tests__/harmony-boundary.test.ts
@@ -0,0 +1,568 @@
+import { describe, it, expect } from "vitest";
+import { parseHarmonyContent } from "../harmony.js";
+import { collapseOpenAISSE } from "../stream-collapse.js";
+
+// ===========================================================================
+// Harmony body fail-safe — STRUCTURAL (not per-token/per-exit) regressions.
+//
+// A 7-agent review found the per-branch guard (`absorbedTerminatorLiteral`,
+// checked only on the EOF exit) leaked control tokens on OTHER exit paths. The
+// fix makes the non-tool-body fail-safe STRUCTURAL: a terminator-shaped literal
+// (END/RETURN/CALL) or a START/CONSTRAIN may only be absorbed as embedded prose
+// when its immediate follower is real prose text; a literal immediately
+// followed by another control token or by EOF is NOT legitimately embedded and
+// fails the WHOLE input safe (verbatim + harmonyUnparsed). This pins the
+// verified-bad leak shapes the review found, and proves legitimate harmony
+// (including bodies that quote tokens as prose, multi-message streams, and tool
+// calls with whitespace-padded args) still parses.
+//
+// SSE-body idiom mirrors stream-collapse.test.ts:
+//   data: ${JSON.stringify({ choices: [{ delta: { content: "..." } }] })}
+// joined by "\n".
+// ===========================================================================
+
+/** Build an OpenAI SSE body whose content chunks carry harmony tokens. */
+function openAIHarmonyBody(chunks: string[], id = "chatcmpl-hb"): string {
+  return [
+    ...chunks.flatMap((content) => [
+      `data: ${JSON.stringify({ id, choices: [{ delta: { content } }] })}`,
+      "",
+    ]),
+    "data: [DONE]",
+    "",
+  ].join("\n");
+}
+
+describe("harmony body fail-safe — structural (no control-token literal reaches routed output)", () => {
+  // RED 1: a final body terminated by <|return|> immediately followed by a
+  // SECOND <|return|> (then EOF). The first <|return|> is a terminator-shaped
+  // literal whose follower is a control token (not prose) — NOT legitimately
+  // embedded. The OLD code absorbed it and routed "A<|return|>" to content via
+  // the `terminated` exit. Correct: uniform fail-safe (verbatim + signal).
+  it("RED1: final<|message|>A<|return|><|return|> fails safe (no <|return|> leak via terminated exit)", () => {
+    const raw = "<|channel|>final<|message|>A<|return|><|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    // On failure the ORIGINAL bytes are preserved verbatim (the tokens are NOT
+    // stripped) — the no-leak guarantee is that a token never reaches a
+    // SUCCESSFUL routed body. Here the OLD code routed "A<|return|>"; now it
+    // fails safe, so nothing is routed at all.
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // RED 2: an analysis body terminated by <|end|> immediately followed by a
+  // SECOND <|end|> (then EOF). The OLD code absorbed the first <|end|> and
+  // routed "A<|end|>" to reasoning via the `terminated` exit (the EOF-only
+  // guard never fired because the loop exited via `terminated`). Correct:
+  // uniform fail-safe — no <|end|> may reach reasoning.
+  it("RED2: analysis<|message|>A<|end|><|end|> fails safe (no <|end|> leak into reasoning)", () => {
+    const raw = "<|channel|>analysis<|message|>A<|end|><|end|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.reasoning).not.toContain("<|end|>");
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  // RED 3: a final body whose trailing text absorbs a <|start|> that runs
+  // straight to EOF (no following message, no terminator). The OLD code did not
+  // track START absorption, so "answer <|start|>" leaked into content via the
+  // EOF exit. Correct: a START absorbed with no real boundary after it fails.
+  it("RED3: final<|message|>answer <|start|> fails safe (absorbed <|start|> at EOF does not leak)", () => {
+    const raw = "<|channel|>final<|message|>answer <|start|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    // Verbatim on failure (nothing routed); the OLD code leaked "answer <|start|>".
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+  });
+
+  // A terminator-shaped literal immediately followed by EOF (no second token)
+  // is also not legitimately embedded.
+  it("RED1b: final<|message|>A<|return|> followed only by a control token fails safe", () => {
+    // <|return|> then <|call|> then EOF — first terminator's follower is a
+    // control token, so it cannot be absorbed as prose.
+    const raw = "<|channel|>final<|message|>A<|return|><|call|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    // Verbatim on failure; the embedded <|return|> is never routed to content.
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  // RED 4: a final body that QUOTES a complete well-formed message which is then
+  // followed by trailing prose + a final terminator
+  // (`...hello<|return|> and then stop<|return|>`). The OLD code split it into
+  // two final messages and routed the quoted body "hello<|return|> and then
+  // stop" — MANGLED, leaking <|return|>. The quoted-split message's body must
+  // not absorb an embedded control literal, so the WHOLE input fails safe
+  // verbatim (never mangled). This is the harmony.ts "verbatim-or-clean, never
+  // mangled" contract at the quoted-message edge.
+  it("RED4: quoted message + trailing junk + final terminator fails safe (no <|return|> mangle)", () => {
+    const raw =
+      "<|channel|>final<|message|>To emit write " +
+      "<|start|>assistant<|channel|>final<|message|>hello<|return|> and then stop<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.content).not.toBe("To emit write hello<|return|> and then stop");
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // GENERIC SAFETY INVARIANT across all four verified-bad inputs: whatever the
+  // outcome, the parse must NEVER leak a raw control-token literal — a success
+  // carries zero literals in routed output; a failure preserves bytes verbatim.
+  it("never leaks a raw control token: clean-or-verbatim across all four RED inputs", () => {
+    const inputs = [
+      "<|channel|>final<|message|>A<|return|><|return|>",
+      "<|channel|>analysis<|message|>A<|end|><|end|>",
+      "<|channel|>final<|message|>answer <|start|>",
+      "<|channel|>final<|message|>To emit write <|start|>assistant<|channel|>final<|message|>hello<|return|> and then stop<|return|>",
+    ];
+    for (const raw of inputs) {
+      const r = parseHarmonyContent(raw);
+      if (!r.failed) {
+        // A clean success must carry zero control-token literals in output.
+        expect(r.content).not.toMatch(/<\|(start|end|return|call|channel|message|constrain)\|>/);
+        expect(r.reasoning).not.toMatch(/<\|(start|end|return|call|channel|message|constrain)\|>/);
+      } else {
+        // A failure preserves the original bytes verbatim.
+        expect(r.content).toBe(raw);
+      }
+    }
+  });
+});
+
+describe("harmony bare-<|message|>-at-message-position fail-safe (silent-corruption gap)", () => {
+  // A bare <|message|> token at MESSAGE POSITION — with no preceding <|start|>
+  // or <|channel|> introducing it — is a grammar deviation, not a channel-less
+  // message. The OLD code accepted it, silently stripping control tokens and
+  // gluing bodies together. Correct: uniform fail-safe (verbatim +
+  // harmonyUnparsed), matching the parser's all-or-nothing contract and the
+  // isHarmonyContent gate. A legitimate message ALWAYS has START or CHANNEL
+  // before MESSAGE, so these failures cannot touch any valid harmony.
+
+  // BARE-RED 1: a valid final message followed by a SECOND message that begins
+  // with a bare <|message|> (no <|start|>/<|channel|>). The OLD code consumed
+  // the bare <|message|> as a channel-less message and glued the two bodies
+  // -> content "realinjected". Correct: fail safe verbatim.
+  it("BARE-RED1: final<|message|>real<|return|><|message|>injected<|return|> fails safe (no body glue)", () => {
+    const raw = "<|channel|>final<|message|>real<|return|><|message|>injected<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.content).not.toBe("realinjected");
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // BARE-RED 2: the FIRST message itself begins with a bare <|message|> (no
+  // <|start|>/<|channel|>), followed by another bare <|message|>. The OLD code
+  // glued both bare bodies -> content "onetwo". Correct: parseHarmonyContent
+  // fails safe verbatim. At the collapse layer the content has NO real header
+  // (no <|channel|>/<|start|> before <|message|>), so it does not even trip the
+  // cheap `isHarmonyContent` gate (which requires channel-then-message or
+  // start-then-message ordering): the collapser leaves it VERBATIM with no glue
+  // and does not flag harmonyUnparsed. The corruption (body glue) is fixed
+  // either way; the bytes are preserved untouched.
+  it("BARE-RED2: <|message|>one<|end|><|message|>two<|return|> fails safe (no body glue)", () => {
+    const raw = "<|message|>one<|end|><|message|>two<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.content).not.toBe("onetwo");
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    // Verbatim no-op at the collapse layer (no channel/start header -> not
+    // recognized as harmony structure, so content is preserved untouched).
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.content).not.toBe("onetwo");
+    expect(result.harmonyUnparsed).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // BARE-RED 3: leading prose text followed immediately by a bare <|message|>
+  // (no <|start|>/<|channel|>). The OLD code treated the leading text as a
+  // channel-less preamble and consumed the bare <|message|>, gluing them ->
+  // content "preamblebody". Correct: parseHarmonyContent fails safe verbatim. At
+  // the collapse layer there is again no real header before <|message|>, so it
+  // does not trip the `isHarmonyContent` gate and is left VERBATIM with no glue.
+  it("BARE-RED3: preamble<|message|>body<|return|> fails safe (no leading-text glue)", () => {
+    const raw = "preamble<|message|>body<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.content).not.toBe("preamblebody");
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    // Verbatim no-op at the collapse layer (no channel/start header).
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.content).not.toBe("preamblebody");
+    expect(result.harmonyUnparsed).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // POSITIVE pin: leading channel-less TEXT that IS followed by a real
+  // <|channel|>-introduced message stays a valid preamble (the leading-text
+  // branch must still fire for START/CHANNEL, only MESSAGE is removed).
+  it("leading text followed by a real <|channel|> message still parses (preamble preserved)", () => {
+    const raw = "preamble <|channel|>final<|message|>body<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("preamble body");
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  // POSITIVE pin: leading channel-less TEXT followed by a real <|start|> message
+  // stays a valid preamble likewise.
+  it("leading text followed by a real <|start|> message still parses (preamble preserved)", () => {
+    const raw = "preamble <|start|>assistant<|channel|>final<|message|>body<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("preamble body");
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony body fail-safe — legitimate prose-quoted tokens still parse (guard against over-failing)", () => {
+  // A final body that QUOTES <|end|>/<|return|> as inline-code prose, each
+  // followed by real text and closed by a REAL <|return|> at EOF, is the
+  // documented "embedded literal" case. It must still parse cleanly (the literal
+  // is bracketed by prose on both sides — its follower is real text).
+  it("final body quoting <|end|>/<|return|> as prose keeps the full sentence", () => {
+    const body = "See `<|end|>` for the end token and `<|return|>` too.";
+    const raw = `<|channel|>final<|message|>${body}<|return|>`;
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe(body);
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  // An analysis body quoting <|call|>/<|start|> as prose, closed by a real
+  // <|end|> before a real next message, must keep the full reasoning body.
+  it("analysis body quoting <|call|>/<|start|> as prose keeps the full body", () => {
+    const body = "Consider the `<|call|>` and `<|start|>` markers carefully.";
+    const raw = `<|channel|>analysis<|message|>${body}<|end|><|start|>assistant<|channel|>final<|message|>Done.<|return|>`;
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.reasoning).toBe(body);
+    expect(direct.content).toBe("Done.");
+  });
+});
+
+describe("harmony boundary — KNOWN_CHANNELS tightening of looksLikeMessageStart", () => {
+  // A lookahead <|start|>...<|channel|>X<|message|> whose channel X is NOT a
+  // known harmony channel (analysis/commentary/final) is NOT a real message
+  // boundary. Inside a final body, such a <|start|> is therefore embedded prose
+  // — but here it is immediately followed by control tokens / runs without real
+  // prose bracketing, so the structural body rule fails it safe rather than
+  // splitting on a bogus channel. The key assertion: it does NOT split into a
+  // second message routed on an unknown channel, and no token leaks.
+  it("a <|start|>...<|channel|>UNKNOWN<|message|> lookahead is not treated as a real boundary", () => {
+    const raw =
+      "<|channel|>final<|message|>body <|start|>assistant<|channel|>bogus<|message|>x<|return|>";
+    const direct = parseHarmonyContent(raw);
+    // Not split on the bogus channel; whatever the outcome, no leak on success.
+    if (!direct.failed) {
+      expect(direct.content).not.toMatch(/<\|(start|channel|message|return)\|>/);
+    } else {
+      expect(direct.content).toBe(raw);
+    }
+  });
+
+  // Positive: a KNOWN-channel lookahead still terminates the body and starts the
+  // next message (no regression to legitimate multi-message splitting).
+  it("a KNOWN-channel <|start|> lookahead still terminates the current body", () => {
+    const raw =
+      "<|channel|>final<|message|>first answer<|start|>assistant<|channel|>final<|message|>second answer<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    // Two final messages concatenate into content; zero leaked tokens.
+    expect(direct.content).toBe("first answersecond answer");
+    expect(direct.content).not.toMatch(/<\|/);
+  });
+
+  // A directly-channel-less trailing message (<|start|>assistant<|message|>...,
+  // NO <|channel|>) is still a valid boundary — KNOWN_CHANNELS only gates a
+  // lookahead that actually carries a <|channel|> header.
+  it("a channel-less <|start|>...<|message|> trailing message is still a valid boundary", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"x"}<|call|>' +
+      "<|start|>assistant<|message|>The answer.<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' });
+    expect(direct.content).toBe("The answer.");
+  });
+
+  // MULTI-CALL: the commentary tool-body scan picks the FIRST <|call|> whose
+  // accumulated preceding text is a COMPLETE JSON OBJECT and terminates the
+  // args there — it must NOT over-consume into a second trailing <|call|>. The
+  // body `{"a":1}` is already a complete object at the first CALL, so the args
+  // are exactly `{"a":1}` and the parser does not greedily swallow the second
+  // CALL. A clean trailing final message proves the first CALL was selected as
+  // the boundary (over-consumption would have mangled this into a single body).
+  it("MULTI-CALL: first valid-object <|call|> terminates the args (no over-consume)", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1}<|call|>' +
+      "<|start|>assistant<|message|>done<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    // Args terminated at the FIRST CALL — exactly the first complete object.
+    expect(direct.toolCalls[0]).toEqual({ name: "t", arguments: '{"a":1}' });
+    expect(direct.content).toBe("done");
+  });
+
+  // MULTI-CALL fail-safe: the same first-CALL selection holds when a bare second
+  // <|call|> immediately follows. The first CALL closes `{"a":1}` and is chosen
+  // as the terminator (not over-consumed across the second CALL); the stray
+  // trailing CALL then has no owning message, so the WHOLE parse fails safe
+  // verbatim rather than fabricating an over-consumed tool call.
+  it("MULTI-CALL: a stray trailing <|call|> after the first object fails safe verbatim", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1}<|call|><|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony tool-arg whitespace canonicalization", () => {
+  // Leading whitespace in the captured JSON args (e.g. "<|message|> {\"a\":1}")
+  // must be trimmed so the recorded arguments are the canonical JSON value.
+  it("trims leading whitespace from tool-call JSON args", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|> {"a":1}<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0]).toEqual({ name: "t", arguments: '{"a":1}' });
+    expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow();
+  });
+
+  // Trailing whitespace is likewise trimmed.
+  it("trims trailing whitespace from tool-call JSON args", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1} <|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0].arguments).toBe('{"a":1}');
+  });
+
+  // Both-sides whitespace (including newlines) is trimmed to the canonical JSON.
+  it("trims surrounding whitespace/newlines from tool-call JSON args", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>\n  {"a":1}\n  <|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls[0].arguments).toBe('{"a":1}');
+  });
+
+  // Interior whitespace inside the JSON value is preserved (only leading/
+  // trailing is trimmed).
+  it("preserves interior whitespace inside the JSON args", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|> {"a": 1, "b": 2} <|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls[0].arguments).toBe('{"a": 1, "b": 2}');
+  });
+});
+
+describe("harmony tool-arg must be a JSON OBJECT (scalar/array/null are malformed)", () => {
+  // Harmony tool-call arguments are JSON OBJECTS. A commentary tool body that is
+  // a bare JSON SCALAR (number / boolean / string) parses as valid JSON but is
+  // NOT a valid tool-call argument object. The OLD code accepted it as a tool
+  // call with the scalar text as `arguments` (e.g. arguments "123"). Correct:
+  // the body does NOT terminate a valid tool call -> uniform fail-safe (verbatim
+  // + harmonyUnparsed), no fabricated tool call.
+  it("a bare numeric scalar body (123) is NOT a tool call (fail-safe verbatim)", () => {
+    const raw = "<|channel|>commentary to=functions.f<|message|>123<|call|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  it("a bare boolean scalar body (true) is NOT a tool call (fail-safe verbatim)", () => {
+    const raw = "<|channel|>commentary to=functions.f<|message|>true<|call|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  it('a bare string scalar body ("str") is NOT a tool call (fail-safe verbatim)', () => {
+    const raw = '<|channel|>commentary to=functions.f<|message|>"str"<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  it("a JSON array body ([1,2]) is NOT a tool call (arguments must be an object)", () => {
+    const raw = "<|channel|>commentary to=functions.f<|message|>[1,2]<|call|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  it("a JSON null body is NOT a tool call (arguments must be a non-null object)", () => {
+    const raw = "<|channel|>commentary to=functions.f<|message|>null<|call|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  // POSITIVE pin: a genuine JSON OBJECT body still parses as a tool call — the
+  // object requirement must not regress the happy path, including an empty {}.
+  it("an empty object body ({}) IS a valid tool call", () => {
+    const raw = "<|channel|>commentary to=functions.f<|message|>{}<|call|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toEqual([{ name: "f", arguments: "{}" }]);
+  });
+
+  // POSITIVE pin: matrix 13/14 — embedded control-token literals INSIDE a JSON
+  // OBJECT arg remain valid (the object requirement only rejects scalars/arrays,
+  // not objects whose string values happen to contain token-shaped substrings).
+  it("an object arg containing embedded <|call|> substrings stays a valid tool call (matrix 13)", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.say<|constrain|>json<|message|>{"text":"say <|call|> now"}<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toEqual([{ name: "say", arguments: '{"text":"say <|call|> now"}' }]);
+  });
+});
+
+describe("harmony — recipient does NOT carry over across messages", () => {
+  // A prior analysis message carries `to=functions.x` (matrix 23: analysis +
+  // recipient is NOT a tool call). The NEXT message is a plain commentary
+  // message with NO recipient of its own. The recipient must NOT carry over —
+  // commentary-without-recipient is a preamble that routes to CONTENT, and no
+  // tool call named `x` may be fabricated.
+  it("analysis to=functions.x then plain commentary does not fabricate tool x", () => {
+    const raw =
+      "<|channel|>analysis to=functions.x<|message|>thinking<|end|>" +
+      "<|start|>assistant<|channel|>commentary<|message|>plain<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    // analysis body -> reasoning; commentary-without-recipient -> content.
+    expect(direct.reasoning).toBe("thinking");
+    expect(direct.content).toBe("plain");
+    // No tool call fabricated from the carried-over recipient.
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony — cross-channel quoted-split routing (KNOWN LIMITATION)", () => {
+  // The quoted-whole-message ambiguity is channel-agnostic: a body that QUOTES a
+  // complete well-formed message of a DIFFERENT channel is structurally
+  // indistinguishable from two real messages, so it splits and routes each half
+  // by its (quoted) channel. We PIN the documented imperfect behavior so a future
+  // change to the split logic is a conscious decision, not an accident.
+
+  // (a) An analysis body that quotes a complete FINAL message: splits into an
+  // analysis half (-> reasoning) and a final half (-> content). The quoted
+  // control tokens are stripped (the known limitation), never leaked.
+  it("analysis body quoting a complete final message splits reasoning|content", () => {
+    const raw =
+      "<|channel|>analysis<|message|>note " +
+      "<|start|>assistant<|channel|>final<|message|>answer<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.reasoning).toBe("note ");
+    expect(direct.content).toBe("answer");
+    // Whatever the split, no raw control token leaks into routed output.
+    expect(direct.reasoning).not.toMatch(/<\|/);
+    expect(direct.content).not.toMatch(/<\|/);
+  });
+
+  // (b) A final body that quotes a complete commentary TOOL message: the quoted
+  // <|start|>...commentary to=functions.X...<|call|> is a well-formed message
+  // boundary, so it splits — the final half routes to content and the quoted
+  // commentary-tool half materializes as a real tool call (the known limitation:
+  // a quoted tool message is indistinguishable from a real one). No leak.
+  it("final body quoting a complete commentary-tool message splits content|toolCall", () => {
+    const raw =
+      "<|channel|>final<|message|>see " +
+      '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1}<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("see ");
+    expect(direct.toolCalls).toEqual([{ name: "t", arguments: '{"a":1}' }]);
+    expect(direct.content).not.toMatch(/<\|/);
+  });
+});
+
+describe("harmony — legitimate multi-message + tool calls (positive end-to-end)", () => {
+  // analysis -> reasoning, commentary tool call (whitespace-padded args), final
+  // -> content, with inter-message whitespace. Proves the structural fail-safe
+  // does not regress the realistic happy path.
+  it("parses analysis + commentary tool (padded args) + final with separators", () => {
+    const chunks = [
+      "<|channel|>analysis<|message|>Plan the call.<|end|>",
+      "\n",
+      '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|> {"q":"x"} <|call|>',
+      "\n",
+      "<|start|>assistant<|channel|>final<|message|>Here is the result.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Plan the call.");
+    expect(result.content).toBe("Here is the result.");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' });
+    expect(result.content).not.toMatch(/<\|/);
+    expect(result.content).not.toContain("\n");
+  });
+});
diff --git a/src/harmony.ts b/src/harmony.ts
new file mode 100644
index 0000000..8e0416c
--- /dev/null
+++ b/src/harmony.ts
@@ -0,0 +1,731 @@
+/**
+ * OpenAI harmony channel parsing for open-weight gpt-oss models.
+ *
+ * Hosted api.openai.com pre-parses harmony output into structured
+ * `tool_calls` / `message.content`, but open-weight gpt-oss models served via
+ * Ollama / vLLM / OpenRouter (i.e. whenever OPENAI_BASE_URL points at a
+ * local/open-weights backend) stream tool calls as RAW harmony channel tokens
+ * INSIDE `delta.content`. Without parsing, the recorded fixture leaks the
+ * tool-call routing marker (`to=functions.NAME`) and its args JSON as plain
+ * text content instead of capturing a structured tool call.
+ *
+ * Harmony grammar (authoritative, from OpenAI's harmony spec):
+ *   Special tokens: <|start|> <|end|> <|message|> <|channel|> <|constrain|>
+ *                   <|return|> <|call|>
+ *   A message is laid out as:
+ *     <|start|>{role/recipient header}<|channel|>{channel header}<|message|>{body}{terminator}
+ *   where the leading <|start|> and/or <|channel|> may be absent on the very
+ *   first message of a stream, and the channel header carries the channel name
+ *   plus optional `to=functions.NAME` routing and `<|constrain|>json`.
+ *   Channels:
+ *     - analysis    chain-of-thought  -> reasoning
+ *     - commentary  function/tool calls + preambles
+ *     - final       user-facing answer -> content
+ *   A tool call is a `commentary`-channel message whose header (role segment OR
+ *   channel header) carries recipient routing `to=functions.NAME`; its args are
+ *   the JSON body after `<|message|>`, terminated by `<|call|>`. Example:
+ *     <|channel|>analysis<|message|>Need to call the tool.<|end|>
+ *     <|start|>assistant<|channel|>commentary to=functions.generate_a2ui
+ *       <|constrain|>json<|message|>{"component":"card","props":{}}<|call|>
+ *     <|start|>assistant<|channel|>final<|message|>Here you go.<|return|>
+ *
+ * Implementation: a TWO-PHASE parser, NOT an indexOf scanner.
+ *
+ *   Phase 1 — LEXER ({@link lex}). One left-to-right pass over the accumulated
+ *   content producing an ordered {@link Token}[]: each element is either a
+ *   CONTROL token (matched by exact prefix at the cursor) or a TEXT span (the
+ *   literal run between control tokens). Once bytes are consumed into a TEXT
+ *   span they are NEVER re-scanned for control tokens — so a literal
+ *   "<|call|>"/"<|channel|>" substring inside a JSON string or prose can never
+ *   be mistaken for structure. The lexer NEVER throws; it always returns a
+ *   complete token stream.
+ *
+ *   Phase 2 — STATE MACHINE ({@link parseTokens}). Walks the token stream
+ *   against the harmony grammar:
+ *     Stream      := TEXT? Message+ TEXT?
+ *     Message     := START? Header? MESSAGE Body Terminator
+ *     Header      := role-TEXT? CHANNEL header-TEXT?
+ *     Body        := the token span following MESSAGE up to its real Terminator
+ *     Terminator  := END | RETURN | CALL | (lookahead START) | EOF (final only)
+ *   The Terminator is located over the TOKEN STREAM, never via indexOf: a body
+ *   may re-materialize embedded control-token literals as prose (e.g. a final
+ *   answer that quotes "`<|end|>`"), so the real terminator is the first
+ *   END/RETURN/CALL (or a START that begins a well-formed next message) whose
+ *   follower is a real message boundary — EOF or a parseable next header. A
+ *   commentary tool body additionally requires Terminator==CALL AND a body that
+ *   parses as JSON; the "first CALL whose preceding TEXT parses as valid JSON,
+ *   else fail-safe" rule is preserved but operates over the token stream. A
+ *   CHANNEL header must name a known channel (analysis/commentary/final) and a
+ *   dangling CHANNEL/MESSAGE inside a body is a grammar deviation (fail-safe).
+ *
+ * Fail-safe contract: parsing is UNIFORM all-or-nothing. {@link
+ * parseHarmonyContent} returns `failed:true` with `content` set to the ORIGINAL
+ * raw input VERBATIM on ANY grammar deviation (TEXT-only / prose mention with
+ * no Message, CHANNEL with no following MESSAGE, a tool body that is not valid
+ * JSON or not CALL-terminated, an unterminated non-final body, a body
+ * terminator followed by trailing non-message junk, or any leftover unexpected
+ * token). There is EXACTLY ONE success path that strips tokens; it never
+ * partial-strips and never leaks a control token into content/reasoning.
+ * Harmony-present-but-unparseable is NOT transport loss — the caller preserves
+ * the bytes verbatim and surfaces a distinct `harmonyUnparsed` signal rather
+ * than `droppedChunks`/`truncated`.
+ *
+ * KNOWN LIMITATION — quoted whole-message ambiguity. Harmony tokens arrive as
+ * detokenized TEXT, so a body that QUOTES a COMPLETE, well-formed harmony
+ * message is structurally indistinguishable from two real messages. Example:
+ *   <|channel|>final<|message|>To emit write <|start|>assistant<|channel|>
+ *     final<|message|>hello<|return|>
+ * The lexer cannot know the inner `<|start|>...<|message|>hello<|return|>` is a
+ * quotation rather than a real second message, so this parses as TWO final
+ * messages and the quoted control tokens are stripped (content "To emit write
+ * hello"). This is the irreducible quoted-vs-real ambiguity; the parser does
+ * NOT over-engineer a guess. The fail-safe contract still holds at its edges:
+ * the split is only accepted when it yields cleanly well-formed messages — if
+ * any resulting message is malformed (e.g. the quoted message is followed by
+ * trailing junk, "...hello<|return|> and then stop"), the body terminator /
+ * trailing-junk rule fails the WHOLE input safe (verbatim) rather than emit a
+ * mangled middle. So the behavior is always verbatim-or-clean, never mangled.
+ */
+
+import type { ToolCall } from "./types.js";
+
+// Harmony special tokens.
+const START_TOKEN = "<|start|>";
+const END_TOKEN = "<|end|>";
+const RETURN_TOKEN = "<|return|>";
+const CALL_TOKEN = "<|call|>";
+const CHANNEL_TOKEN = "<|channel|>";
+const MESSAGE_TOKEN = "<|message|>";
+const CONSTRAIN_TOKEN = "<|constrain|>";
+
+/** The seven harmony control-token kinds. */
+type ControlType = "START" | "END" | "RETURN" | "CALL" | "CHANNEL" | "MESSAGE" | "CONSTRAIN";
+
+// Control tokens ordered for prefix matching at the cursor. All seven literals
+// are distinct prefixes, so match order is irrelevant for correctness; the
+// array simply drives the single cursor scan in the lexer.
+const CONTROL_TOKENS: ReadonlyArray<{ type: ControlType; literal: string }> = [
+  { type: "START", literal: START_TOKEN },
+  { type: "END", literal: END_TOKEN },
+  { type: "RETURN", literal: RETURN_TOKEN },
+  { type: "CALL", literal: CALL_TOKEN },
+  { type: "CHANNEL", literal: CHANNEL_TOKEN },
+  { type: "MESSAGE", literal: MESSAGE_TOKEN },
+  { type: "CONSTRAIN", literal: CONSTRAIN_TOKEN },
+];
+
+// Reverse map: control-token kind -> its literal. Used by the state machine to
+// re-materialize a control token's literal text when reconstructing a JSON
+// tool-call body that legitimately contains "<|call|>"-shaped substrings.
+const CONTROL_LITERAL: Record<ControlType, string> = {
+  START: START_TOKEN,
+  END: END_TOKEN,
+  RETURN: RETURN_TOKEN,
+  CALL: CALL_TOKEN,
+  CHANNEL: CHANNEL_TOKEN,
+  MESSAGE: MESSAGE_TOKEN,
+  CONSTRAIN: CONSTRAIN_TOKEN,
+};
+
+// Recipient routing marker carried by the role segment or the channel header.
+// Requires a valid identifier after `to=functions.` — must START with a letter
+// or underscore (so `to=functions.-` / `to=functions.` are NOT recipients),
+// then allow word chars, dots, and dashes.
+const RECIPIENT_RE = /to=functions\.([A-Za-z_][\w.-]*)/;
+
+/**
+ * Cheap detection guard — only ATTEMPT a parse when a harmony structure looks
+ * present, i.e. a `<|channel|>` followed (anywhere after it) by a `<|message|>`,
+ * OR a `<|message|>` appearing after a `<|start|>`.
+ *
+ * This is a fast-path gate, NOT the authority on well-formedness: the state
+ * machine in {@link parseHarmonyContent} makes the real decision and is itself
+ * fully fail-safe. Requiring the token pairing keeps hosted/structured answers
+ * that merely MENTION a single token as prose out of the parser entirely.
+ */
+export function isHarmonyContent(content: string): boolean {
+  const channelIdx = content.indexOf(CHANNEL_TOKEN);
+  if (channelIdx !== -1) {
+    if (content.indexOf(MESSAGE_TOKEN, channelIdx + CHANNEL_TOKEN.length) !== -1) {
+      return true;
+    }
+  }
+  const startIdx = content.indexOf(START_TOKEN);
+  if (startIdx !== -1) {
+    if (content.indexOf(MESSAGE_TOKEN, startIdx + START_TOKEN.length) !== -1) {
+      return true;
+    }
+  }
+  return false;
+}
+
+export interface HarmonyParseResult {
+  content: string;
+  reasoning: string;
+  toolCalls: ToolCall[];
+  /**
+   * True when the input could NOT be parsed as a complete, valid harmony
+   * structure and the ORIGINAL content was returned VERBATIM (fail-safe). The
+   * bytes are preserved, so this is NOT transport loss — the caller surfaces it
+   * via a distinct `harmonyUnparsed` signal, not `droppedChunks`/`truncated`.
+   */
+  failed: boolean;
+}
+
+// ---------------------------------------------------------------------------
+// Phase 1: Lexer
+// ---------------------------------------------------------------------------
+
+/** A control token (one of the seven harmony special tokens). */
+interface ControlToken {
+  kind: "control";
+  type: ControlType;
+}
+
+/** A literal text span between control tokens. Never empty. */
+interface TextToken {
+  kind: "text";
+  value: string;
+}
+
+type Token = ControlToken | TextToken;
+
+/**
+ * Lex the accumulated content into an ordered token stream via a single
+ * left-to-right cursor walk. At each position, match a control token by exact
+ * prefix; otherwise accumulate bytes into the current TEXT run until the next
+ * control token or EOF.
+ *
+ * Bytes consumed into a TEXT span are NEVER re-scanned for control tokens, so
+ * an embedded literal "<|call|>"/"<|channel|>" inside a JSON string or prose is
+ * inert. The lexer NEVER throws; it always returns a complete token stream.
+ */
+function lex(raw: string): Token[] {
+  const tokens: Token[] = [];
+  let cursor = 0;
+  let textStart = 0;
+
+  const flushText = (end: number): void => {
+    if (end > textStart) {
+      tokens.push({ kind: "text", value: raw.slice(textStart, end) });
+    }
+  };
+
+  while (cursor < raw.length) {
+    let matched: { type: ControlType; literal: string } | undefined;
+    // A control token only begins at "<|"; cheap reject avoids scanning the
+    // literal list on every plain character.
+    if (raw.startsWith("<|", cursor)) {
+      for (const tok of CONTROL_TOKENS) {
+        if (raw.startsWith(tok.literal, cursor)) {
+          matched = tok;
+          break;
+        }
+      }
+    }
+    if (matched) {
+      flushText(cursor);
+      tokens.push({ kind: "control", type: matched.type });
+      cursor += matched.literal.length;
+      textStart = cursor;
+    } else {
+      cursor += 1;
+    }
+  }
+  flushText(raw.length);
+
+  return tokens;
+}
+
+// ---------------------------------------------------------------------------
+// Phase 2: State machine
+// ---------------------------------------------------------------------------
+
+/** True when `s` is empty or only whitespace. */
+function isBlank(s: string): boolean {
+  return s.trim().length === 0;
+}
+
+/**
+ * True when `s` parses as a JSON OBJECT — a non-null, non-array `{...}` value.
+ *
+ * Harmony tool-call arguments are JSON OBJECTS. A bare JSON SCALAR (number /
+ * boolean / string / null) or ARRAY parses as valid JSON but is NOT a valid
+ * tool-call argument, so it must not terminate a tool call (fail-safe verbatim
+ * per the uniform contract). The object check (and ONLY the object check) is
+ * what makes a commentary `<|call|>` body a tool call; embedded token-shaped
+ * substrings INSIDE the object's string values remain valid data (matrix 13/14).
+ */
+function isToolArgsObject(s: string): boolean {
+  let value: unknown;
+  try {
+    value = JSON.parse(s);
+  } catch {
+    return false;
+  }
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+/**
+ * Extract the channel name from the header text that follows `<|channel|>`. The
+ * channel name is the leading token, delimited by whitespace (the rest of the
+ * header carries optional `to=functions.NAME` routing). A `<|constrain|>` token
+ * is lexed separately, so it never appears inside this text.
+ */
+function headerChannel(headerText: string): string {
+  return headerText.trim().split(/\s+/)[0] ?? "";
+}
+
+/** The harmony channels a real `<|channel|>` header may name. */
+const KNOWN_CHANNELS = new Set(["analysis", "commentary", "final"]);
+
+/**
+ * True when token index `idx` begins a well-formed harmony message header —
+ * used as lookahead to decide whether a `<|start|>` is a real message boundary
+ * (terminating the current body) or a literal `<|start|>` quoted inside a prose
+ * body. A real message header reaches a `<|message|>` token via the optional
+ * `START? role-TEXT? CHANNEL? header-TEXT? CONSTRAIN? constraint-TEXT?` prefix
+ * WITHOUT first crossing a body terminator (END/RETURN/CALL) or EOF.
+ *
+ * When the lookahead carries a `<|channel|>` header, the channel name it names
+ * must be a KNOWN harmony channel (analysis/commentary/final). A lookahead like
+ * `<|start|>...<|channel|>X<|message|>` whose X is unknown is NOT a real
+ * boundary — it narrows the quoted-message ambiguity so a body quoting a
+ * bogus-channel pseudo-message is not split on it. A channel-LESS header
+ * (`<|start|>role<|message|>...`) is unaffected (KNOWN_CHANNELS only gates a
+ * present `<|channel|>` name).
+ */
+function looksLikeMessageStart(tokens: Token[], idx: number): boolean {
+  let k = idx;
+  if (tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "START") {
+    k += 1;
+    if (tokens[k]?.kind === "text") k += 1; // optional role-TEXT
+  }
+  if (tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "CHANNEL") {
+    k += 1;
+    if (tokens[k]?.kind === "text") {
+      // The channel name must be a known harmony channel for this to be a real
+      // message boundary; an unknown channel header is not a true boundary.
+      if (!KNOWN_CHANNELS.has(headerChannel((tokens[k] as TextToken).value))) return false;
+      k += 1; // header-TEXT
+    }
+    if (tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "CONSTRAIN") {
+      k += 1;
+      if (tokens[k]?.kind === "text") k += 1; // optional constraint-name TEXT
+    }
+  }
+  return tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "MESSAGE";
+}
+
+/**
+ * True when the position right after a body terminator candidate (END/RETURN/
+ * CALL at the token before `idx`) is a REAL message boundary: either EOF
+ * (optionally preceded by whitespace-only TEXT spans) or the start of a
+ * well-formed next message. When false, the terminator candidate is a literal
+ * control token embedded in a prose body.
+ */
+function isRealBoundaryAfter(tokens: Token[], idx: number): boolean {
+  let k = idx;
+  // Skip whitespace-only TEXT spans (inter-message / trailing whitespace).
+  while (tokens[k]?.kind === "text" && (tokens[k] as TextToken).value.trim().length === 0) {
+    k += 1;
+  }
+  if (k >= tokens.length) return true; // EOF (final message)
+  return looksLikeMessageStart(tokens, k);
+}
+
+/**
+ * True when the token at index `idx` is a NON-BLANK TEXT span — i.e. real prose
+ * follows. A control-token literal embedded in a non-tool body is only
+ * LEGITIMATELY prose when it is bracketed by real text (e.g. a final answer that
+ * quotes "the `<|end|>` token"); the lexer will have tokenized the quoted
+ * literal, and its immediate follower being non-blank prose is what makes it
+ * inert body text rather than structure. When the follower is instead another
+ * control token or EOF (or only whitespace), the literal is NOT embedded prose —
+ * it is a control token that would leak into routed content/reasoning, so the
+ * body must fail safe. This is the STRUCTURAL fail-safe invariant: it fires at
+ * absorption time on EVERY exit path, not per-exit, so a leak-shaped body can
+ * never reach the routing step regardless of how its loop terminates.
+ */
+function hasProseFollower(tokens: Token[], idx: number): boolean {
+  const next = tokens[idx];
+  return next !== undefined && next.kind === "text" && next.value.trim().length > 0;
+}
+
+/** Sentinel thrown internally to unwind to the uniform fail-safe path. */
+const FAIL = Symbol("harmony-fail");
+
+/**
+ * Walk the token stream against the harmony grammar and route each message by
+ * channel. Throws {@link FAIL} on ANY grammar deviation so {@link
+ * parseHarmonyContent} returns the original bytes verbatim (uniform
+ * all-or-nothing fail-safe). On success returns fully-routed channels.
+ */
+function parseTokens(tokens: Token[]): {
+  content: string;
+  reasoning: string;
+  toolCalls: ToolCall[];
+} {
+  let content = "";
+  let reasoning = "";
+  const toolCalls: ToolCall[] = [];
+
+  let i = 0;
+  const peek = (): Token | undefined => tokens[i];
+  const fail = (): never => {
+    throw FAIL;
+  };
+
+  // ----- Leading channel-less TEXT (before the first Message) -----
+  // Whitespace-only leading text is absorbed; non-whitespace leading text is
+  // channel-less content (a pre-channel preamble).
+  if (peek()?.kind === "text") {
+    const t = tokens[i] as TextToken;
+    // Only treat this as leading content when a real Message header actually
+    // follows (START / CHANNEL). A bare MESSAGE is NOT a message header — a
+    // legitimate message always opens with START or CHANNEL before MESSAGE — so
+    // leading text followed by a bare <|message|> is a grammar deviation, left
+    // for the main loop to fail safe (verbatim) rather than glued to the body.
+    // Otherwise the text is handled by the trailing / no-message rules below
+    // (which fail-safe when no message exists).
+    const next = tokens[i + 1];
+    const nextStartsMessage =
+      next !== undefined &&
+      next.kind === "control" &&
+      (next.type === "START" || next.type === "CHANNEL");
+    if (nextStartsMessage) {
+      if (!isBlank(t.value)) content += t.value;
+      i += 1;
+    }
+  }
+
+  // A well-formed stream has at least one Message.
+  let sawMessage = false;
+
+  // Set when the PREVIOUS body terminated on a START-lookahead — i.e. a body
+  // ran (without an intervening real terminator) into a `<|start|>...` that
+  // looks like a message header, so the parser SPLIT it off as a separate
+  // message. This is the irreducible quoted-whole-message ambiguity: in
+  // detokenized TEXT a body that QUOTES a complete well-formed message is
+  // indistinguishable from two real messages. The split is only accepted when
+  // BOTH resulting messages are cleanly well-formed (matrix-doc "verbatim-or-
+  // clean, never mangled"). A quoted-split message whose OWN body would have to
+  // absorb an embedded control literal (e.g. the quoted body
+  // "hello<|return|> and then stop") is NOT clean — absorbing it would leak the
+  // token into routed content/reasoning — so it fails the WHOLE input safe.
+  let nextIsQuotedSplit = false;
+
+  while (i < tokens.length) {
+    const tok = peek();
+    if (tok === undefined) break;
+
+    // Absorb whitespace-only inter-message / trailing TEXT spans. A non-blank
+    // stray TEXT span at message position is a grammar deviation.
+    if (tok.kind === "text") {
+      if (isBlank(tok.value)) {
+        i += 1;
+        continue;
+      }
+      // Non-blank text where a message (or EOF) was expected: this is leftover,
+      // unexpected token content — fail safe.
+      fail();
+    }
+
+    // Capture-and-reset the quoted-split marker for THIS message.
+    const fromQuotedSplit = nextIsQuotedSplit;
+    nextIsQuotedSplit = false;
+
+    // tok is a control token: the start of a Message.
+    let recipient: string | undefined;
+    let channel = "";
+    // A well-formed message ALWAYS opens with a real header (START and/or
+    // CHANNEL) before <|message|>. Track whether such a header was seen so a
+    // bare <|message|> at message position (no preceding START/CHANNEL) fails
+    // safe instead of being silently accepted as a channel-less message (which
+    // would strip control tokens and glue bodies together).
+    let sawHeader = false;
+
+    // ----- optional START + role-TEXT -----
+    if (tok.kind === "control" && tok.type === "START") {
+      sawHeader = true;
+      i += 1;
+      // Optional role header text carrying `to=functions.NAME`.
+      if (peek()?.kind === "text") {
+        const roleText = (tokens[i] as TextToken).value;
+        recipient = roleText.match(RECIPIENT_RE)?.[1];
+        i += 1;
+      }
+    }
+
+    // ----- optional CHANNEL + header-TEXT (+ optional CONSTRAIN) -----
+    if (peek()?.kind === "control" && (peek() as ControlToken).type === "CHANNEL") {
+      sawHeader = true;
+      i += 1;
+      // Optional header text carrying the channel name + optional routing.
+      if (peek()?.kind === "text") {
+        const headerText = (tokens[i] as TextToken).value;
+        channel = headerChannel(headerText);
+        const headerRecipient = headerText.match(RECIPIENT_RE)?.[1];
+        if (headerRecipient !== undefined) recipient = headerRecipient;
+        i += 1;
+      }
+      // An optional <|constrain|> token (e.g. <|constrain|>json) may sit
+      // between the channel header and <|message|>. It carries a constraint
+      // hint only — consume it and any following constraint-name text. It does
+      // NOT make a body <|call|>-terminated on its own (only a commentary
+      // recipient does).
+      if (peek()?.kind === "control" && (peek() as ControlToken).type === "CONSTRAIN") {
+        i += 1;
+        if (peek()?.kind === "text") {
+          // e.g. "json" — discard; it is a constraint hint, not body content.
+          i += 1;
+        }
+      }
+      // A real <|channel|> header names a KNOWN channel (analysis / commentary
+      // / final). If it does not, this is not harmony structure — it is a prose
+      // mention of the literal token (e.g. "use `<|channel|>` to pick a
+      // channel"). Fail safe so the original bytes are preserved verbatim.
+      if (!KNOWN_CHANNELS.has(channel)) fail();
+    }
+
+    // ----- mandatory MESSAGE -----
+    // A message must be introduced by a real header (START and/or CHANNEL)
+    // before <|message|> is consumed. A bare <|message|> at message position
+    // — with no preceding START/CHANNEL in this message — is a grammar
+    // deviation (not a channel-less message): accepting it would silently strip
+    // control tokens and glue bodies together. Fail safe (uniform verbatim),
+    // mirroring the bare CHANNEL/MESSAGE-inside-a-non-tool-body rule below.
+    if (!sawHeader) fail();
+    if (!(peek()?.kind === "control" && (peek() as ControlToken).type === "MESSAGE")) {
+      // A header (START and/or CHANNEL) with no following <|message|> is an
+      // incomplete message — fail safe.
+      fail();
+    }
+    i += 1; // consume MESSAGE
+    const bodyStart = i; // token index of the first body token
+
+    const isCommentaryToolCall = recipient !== undefined && channel === "commentary";
+
+    if (isCommentaryToolCall) {
+      // A commentary tool-call body is a JSON value terminated by <|call|>. The
+      // literal substring "<|call|>" can legitimately appear INSIDE a JSON
+      // string, and the lexer will have tokenized it as a CALL control token.
+      // So scan CALL tokens left-to-right, re-materializing the body text from
+      // tokens between <|message|> and each CALL, and pick the FIRST CALL whose
+      // accumulated preceding text parses as a COMPLETE JSON OBJECT (A2). A bare
+      // JSON SCALAR/array (e.g. `123`, `true`, `[1,2]`, `null`) is valid JSON but
+      // is NOT a valid tool-call argument, so it does NOT terminate the call.
+      // If no CALL closes a valid JSON object, fail safe.
+      let acc = "";
+      let j = bodyStart;
+      let parsed: string | undefined;
+      for (; j < tokens.length; j++) {
+        const t = tokens[j];
+        if (t.kind === "control" && t.type === "CALL") {
+          // Canonicalize the captured args: leading/trailing whitespace around
+          // the JSON value is not part of the value (e.g. "<|message|> {...} ")
+          // — trim it so the recorded arguments are the canonical JSON. Interior
+          // whitespace inside the JSON is preserved. JSON.parse already tolerates
+          // surrounding whitespace, so validate the TRIMMED form to pick the
+          // terminator correctly.
+          const candidate = acc.trim();
+          if (isToolArgsObject(candidate)) {
+            parsed = candidate;
+            break;
+          }
+          // Not a complete JSON object yet (incomplete, or a scalar/array that is
+          // not a valid tool-call argument) — the embedded "<|call|>" is part of
+          // the JSON string; keep accumulating.
+          acc += CONTROL_LITERAL.CALL;
+          continue;
+        }
+        if (t.kind === "control") {
+          acc += CONTROL_LITERAL[t.type];
+        } else {
+          acc += t.value;
+        }
+      }
+      if (parsed === undefined) fail();
+      i = j + 1; // consume body tokens + the terminating CALL
+      toolCalls.push({ name: recipient!, arguments: parsed! });
+      sawMessage = true;
+      continue;
+    }
+
+    // ----- Non-tool Body + Terminator -----
+    // The body runs from MESSAGE to its REAL terminator. A literal control
+    // token can legitimately appear in a prose body (e.g. "the `<|end|>`
+    // token"), and the lexer will have tokenized it. So scan forward,
+    // re-materializing control literals into the body text, and stop at the
+    // FIRST END/RETURN/CALL whose follower is a real message boundary — i.e.
+    // EOF (optionally preceded by whitespace-only TEXT) or the start of a
+    // well-formed next message ({@link looksLikeMessageStart}). A bare START
+    // that begins a parseable message is also a (lookahead) terminator.
+    //
+    // STRUCTURAL FAIL-SAFE INVARIANT. A control-token literal may only be
+    // ABSORBED into a routed (content/reasoning) body when it is genuinely
+    // embedded prose — i.e. its immediate follower is real text ({@link
+    // hasProseFollower}), as in a final answer quoting "the `<|end|>` token"
+    // (matrix 10-12). When an embedded terminator-shaped literal (END/RETURN/
+    // CALL), a non-boundary START, or a stray CONSTRAIN is followed by another
+    // control token or by EOF (i.e. NOT bracketed by prose), it is not embedded
+    // prose — it is a control token that would LEAK into routed output. Rather
+    // than a per-exit guard (which the old code only applied on the EOF exit,
+    // leaking on the `terminated` exit), the check fires HERE, at absorption
+    // time, so a leak-shaped body fails safe uniformly no matter how its loop
+    // ends. This single invariant subsumes the terminated-exit leak
+    // (`A<|return|><|return|>`), the trailing `<|start|>` absorption leak
+    // (`answer <|start|>`), and the stray-CONSTRAIN re-materialization. Tool
+    // (commentary+recipient) bodies are handled separately above and are NOT
+    // subject to this rule (embedded tokens inside a JSON string arg are valid
+    // data validated by JSON.parse). `absorbedControlLiteral` records that a
+    // literal was legitimately absorbed mid-prose so the EOF branch can reject a
+    // body that runs past such a token straight to EOF with no real terminator.
+    let body = "";
+    let terminated = false;
+    let reachedEof = false;
+    let absorbedControlLiteral = false;
+    let j = i;
+    for (; j < tokens.length; j++) {
+      const t = tokens[j];
+      if (t.kind === "control" && (t.type === "END" || t.type === "RETURN" || t.type === "CALL")) {
+        if (isRealBoundaryAfter(tokens, j + 1)) {
+          terminated = true;
+          break;
+        }
+        // Embedded terminator-shaped literal. It is inert body prose ONLY when
+        // bracketed by real text AND this message is not itself a quoted split
+        // (a quoted-split body that must absorb a literal is not clean — it
+        // would leak the token); otherwise fail safe.
+        if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();
+        absorbedControlLiteral = true;
+        body += CONTROL_LITERAL[t.type];
+        continue;
+      }
+      if (t.kind === "control" && t.type === "START") {
+        if (looksLikeMessageStart(tokens, j)) {
+          // Lookahead terminator: the NEXT message begins here. Do NOT consume.
+          // The next message is a quoted-message split (see nextIsQuotedSplit).
+          terminated = true;
+          break;
+        }
+        // Embedded <|start|> inside prose — inert body text ONLY when bracketed
+        // by real text and not a quoted split; a START with no prose after it
+        // (e.g. trailing "answer <|start|>" or "<|start|>" before another
+        // control token) would leak — fail safe.
+        if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();
+        absorbedControlLiteral = true;
+        body += CONTROL_LITERAL.START;
+        continue;
+      }
+      if (t.kind === "control" && (t.type === "CHANNEL" || t.type === "MESSAGE")) {
+        // A bare CHANNEL / MESSAGE inside a non-tool body is structural, not
+        // prose: in a well-formed stream the next message's CHANNEL is always
+        // introduced by a START (caught above as a real boundary), so a
+        // dangling CHANNEL/MESSAGE here means the structure is malformed (e.g.
+        // "<|message|>body<|channel|>analysis"). Fail safe.
+        fail();
+      }
+      if (t.kind === "control") {
+        // A stray CONSTRAIN inside a body is an inert hint, but its literal
+        // would leak unless it is bracketed by prose (and not a quoted split) —
+        // fail safe otherwise.
+        if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail();
+        absorbedControlLiteral = true;
+        body += CONTROL_LITERAL[t.type];
+        continue;
+      }
+      // TEXT span — part of the body.
+      body += t.value;
+    }
+    if (j >= tokens.length) reachedEof = true;
+
+    if (terminated) {
+      const term = tokens[j] as ControlToken;
+      if (term.type === "START") {
+        // Lookahead: leave START in place for the next loop iteration, and flag
+        // that the next message is a quoted-message split (the current body ran
+        // into a START without a real terminator of its own).
+        i = j;
+        nextIsQuotedSplit = true;
+      } else {
+        i = j + 1; // consume END/RETURN/CALL
+      }
+      routeBody(channel, body);
+      sawMessage = true;
+      continue;
+    }
+    if (reachedEof) {
+      // EOF terminates the FINAL message only. A content-routing channel
+      // (final / commentary-preamble-without-recipient / channel-less) may
+      // legitimately run to EOF with no explicit terminator, so it is accepted
+      // verbatim. But:
+      //   - An `analysis` body is a terminator-expecting reasoning body
+      //     (closed by <|end|>); an UNTERMINATED analysis body at EOF is a
+      //     grammar deviation (B-A3) — fail safe rather than surface dangling
+      //     reasoning.
+      //   - If the body legitimately absorbed a mid-prose control literal and
+      //     then ran to EOF, the message was never properly terminated and the
+      //     control token would leak into the output (B-A1) — fail safe rather
+      //     than mangle.
+      if (channel === "analysis" || absorbedControlLiteral) fail();
+      i = j;
+      routeBody(channel, body);
+      sawMessage = true;
+      break;
+    }
+    // Unreachable in practice (loop only exits via terminator or EOF), but keep
+    // the uniform fail-safe for any unexpected fallthrough.
+    fail();
+  }
+
+  if (!sawMessage) fail();
+
+  return { content, reasoning, toolCalls };
+
+  // Route a non-tool body by channel. Only two channel shapes reach this
+  // function: `analysis` (-> reasoning) and `final` / commentary-without-
+  // recipient (preamble) / channel-less (-> content). An UNKNOWN <|channel|>
+  // name never reaches here — it fail-safes upstream at the
+  // `if (!KNOWN_CHANNELS.has(channel)) fail()` guard during header parsing — so
+  // there is no unknown-channel case to route.
+  function routeBody(ch: string, body: string): void {
+    if (ch === "analysis") {
+      reasoning += body;
+    } else {
+      // final, commentary-without-recipient (preamble), and channel-less bodies
+      // all surface as user-facing content.
+      content += body;
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Public entry point
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse harmony channel tokens out of an accumulated assistant `content`
+ * string, splitting them into final-channel content, analysis-channel
+ * reasoning, and commentary-channel tool calls. Pure function — no I/O.
+ *
+ * Callers should gate this behind {@link isHarmonyContent} so ordinary
+ * (already-structured) streams are never touched. Even so, this function is
+ * itself UNIFORM all-or-nothing fail-safe: on ANY structural/validation failure
+ * it returns `{ content: raw, reasoning: "", toolCalls: [], failed: true }` so
+ * the original content is preserved VERBATIM and the caller can surface a
+ * distinct `harmonyUnparsed` signal (NOT a dropped/truncated chunk).
+ */
+export function parseHarmonyContent(raw: string): HarmonyParseResult {
+  const tokens = lex(raw);
+  try {
+    const { content, reasoning, toolCalls } = parseTokens(tokens);
+    return { content, reasoning, toolCalls, failed: false };
+  } catch (err) {
+    if (err === FAIL) {
+      return { content: raw, reasoning: "", toolCalls: [], failed: true };
+    }
+    // Unexpected error — still fail safe rather than throw to the caller.
+    return { content: raw, reasoning: "", toolCalls: [], failed: true };
+  }
+}

From 1d744104b3b630d5dd9f3ddbb7dbce481157fef0 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Tue, 2 Jun 2026 13:27:53 -0700
Subject: [PATCH 2/5] fix: harden stream collapsers and wire harmony fallback
 for recording

---
 src/__tests__/collapser-robustness.test.ts |  786 ++++++++++++
 src/__tests__/stream-collapse.test.ts      | 1291 ++++++++++++++++++++
 src/stream-collapse.ts                     |  410 ++++++-
 3 files changed, 2436 insertions(+), 51 deletions(-)
 create mode 100644 src/__tests__/collapser-robustness.test.ts

diff --git a/src/__tests__/collapser-robustness.test.ts b/src/__tests__/collapser-robustness.test.ts
new file mode 100644
index 0000000..2717c73
--- /dev/null
+++ b/src/__tests__/collapser-robustness.test.ts
@@ -0,0 +1,786 @@
+import { describe, it, expect } from "vitest";
+import { crc32 } from "node:zlib";
+import {
+  collapseOpenAISSE,
+  collapseGeminiSSE,
+  collapseCohereSSE,
+  collapseAnthropicSSE,
+  collapseBedrockEventStream,
+  collapseGeminiInteractionsSSE,
+} from "../stream-collapse.js";
+import { encodeEventStreamMessage } from "../aws-event-stream.js";
+
+// ===========================================================================
+// Robustness hardening for the per-provider stream collapsers.
+//
+// Each `describe` block targets one pre-existing defect found by review. The
+// assertions encode the intended graceful behavior; they fail (RED) against
+// the unfixed collapsers and pass (GREEN) once the fix lands.
+// ===========================================================================
+
+// ---------------------------------------------------------------------------
+// 1. Bedrock EventStream header bounds (decodeEventStreamFrames)
+//
+// `headersLength` / per-header `nameLen` / `valueLen` are read without
+// bounds-checking. A frame with a VALID prelude CRC but a `headersLength`
+// that overruns the payload throws an uncaught RangeError instead of the
+// intended graceful `{ frames, truncated: true }`.
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a single AWS EventStream frame whose prelude CRC is VALID but whose
+ * declared `headersLength` is `headersLength`, independent of the actual
+ * payload. `totalLength` is sized so the whole frame fits inside the buffer
+ * (so the existing total-length bounds check passes) and the message CRC is
+ * computed correctly — the ONLY corruption is the oversized headers length,
+ * which must be caught by header bounds validation.
+ */
+function buildFrameWithHeadersLength(headersLength: number, payload: Buffer): Buffer {
+  // prelude (8) + prelude_crc (4) + payload + message_crc (4).
+  // We intentionally allocate NO real header bytes — headersLength lies.
+  const totalLength = 4 + 4 + 4 + payload.length + 4;
+  const frame = Buffer.alloc(totalLength);
+  let offset = 0;
+
+  frame.writeUInt32BE(totalLength, offset);
+  offset += 4;
+  frame.writeUInt32BE(headersLength, offset); // bogus, oversized
+  offset += 4;
+
+  // Valid prelude CRC over the first 8 bytes (passes the prelude check).
+  const preludeCrc = crc32(frame.subarray(0, 8));
+  frame.writeUInt32BE(preludeCrc >>> 0, offset);
+  offset += 4;
+
+  payload.copy(frame, offset);
+  offset += payload.length;
+
+  // Valid message CRC over everything but the last 4 bytes.
+  const messageCrc = crc32(frame.subarray(0, totalLength - 4));
+  frame.writeUInt32BE(messageCrc >>> 0, offset);
+
+  return frame;
+}
+
+describe("collapseBedrockEventStream malformed header bounds", () => {
+  it("returns truncated (not a RangeError) when headersLength overruns the frame", () => {
+    const payload = Buffer.from(JSON.stringify({ contentBlockDelta: { delta: { text: "Hi" } } }));
+    // headersLength=1000 far exceeds the tiny payload region.
+    const frame = buildFrameWithHeadersLength(1000, payload);
+
+    let result: ReturnType<typeof collapseBedrockEventStream>;
+    expect(() => {
+      result = collapseBedrockEventStream(frame);
+    }).not.toThrow();
+
+    expect(result!.truncated).toBe(true);
+  });
+
+  it("processes valid earlier frames, then truncates on a malformed-header frame", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Good" } },
+    });
+    const payload = Buffer.from(JSON.stringify({ contentBlockDelta: { delta: { text: "Bad" } } }));
+    const badFrame = buildFrameWithHeadersLength(5000, payload);
+    const buf = Buffer.concat([goodFrame, badFrame]);
+
+    let result: ReturnType<typeof collapseBedrockEventStream>;
+    expect(() => {
+      result = collapseBedrockEventStream(buf);
+    }).not.toThrow();
+
+    expect(result!.content).toBe("Good");
+    expect(result!.truncated).toBe(true);
+  });
+
+  it("returns truncated when a per-header value length overruns the headers region", () => {
+    // headersLength=4 leaves room for a 1-byte nameLen + 1-byte name + type
+    // byte... but then the 2-byte valueLen read pushes past headersEnd, and the
+    // declared value length itself overruns. Build the header bytes by hand.
+    //
+    // Layout inside the headers region (4 bytes): nameLen=1, name="x", type=7,
+    // and then there is no room for the 2-byte valueLen → must be caught.
+    const headerBytes = Buffer.from([
+      0x01, // nameLen = 1
+      0x78, // "x"
+      0x07, // type = STRING
+      0x00, // first byte of a valueLen that runs off the end of the region
+    ]);
+    const headersLength = headerBytes.length;
+    const payload = Buffer.from(JSON.stringify({ contentBlockDelta: { delta: { text: "Z" } } }));
+    const totalLength = 4 + 4 + 4 + headersLength + payload.length + 4;
+    const frame = Buffer.alloc(totalLength);
+    let offset = 0;
+    frame.writeUInt32BE(totalLength, offset);
+    offset += 4;
+    frame.writeUInt32BE(headersLength, offset);
+    offset += 4;
+    const preludeCrc = crc32(frame.subarray(0, 8));
+    frame.writeUInt32BE(preludeCrc >>> 0, offset);
+    offset += 4;
+    headerBytes.copy(frame, offset);
+    offset += headersLength;
+    payload.copy(frame, offset);
+    offset += payload.length;
+    const messageCrc = crc32(frame.subarray(0, totalLength - 4));
+    frame.writeUInt32BE(messageCrc >>> 0, offset);
+
+    let result: ReturnType<typeof collapseBedrockEventStream>;
+    expect(() => {
+      result = collapseBedrockEventStream(frame);
+    }).not.toThrow();
+    expect(result!.truncated).toBe(true);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 2. Missing tool_call index — OpenAI SSE and Cohere SSE
+//
+// `const index = tc.index as number` assumes index present. If a delta omits
+// `index`, every index-less delta collapses under a single map key, merging
+// distinct tool calls and corrupting arguments. Distinct calls must stay
+// distinct.
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE missing tool_call index", () => {
+  it("keeps two index-less tool_call deltas as two distinct tool calls", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-noidx",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  id: "call_a",
+                  type: "function",
+                  function: { name: "func_a", arguments: '{"x":1}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-noidx",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  id: "call_b",
+                  type: "function",
+                  function: { name: "func_b", arguments: '{"y":2}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    const names = result.toolCalls!.map((tc) => tc.name).sort();
+    expect(names).toEqual(["func_a", "func_b"]);
+    // Arguments must not be cross-contaminated into one entry.
+    const byName = Object.fromEntries(result.toolCalls!.map((tc) => [tc.name, tc.arguments]));
+    expect(byName.func_a).toBe('{"x":1}');
+    expect(byName.func_b).toBe('{"y":2}');
+  });
+
+  it("still merges streamed argument fragments that DO carry an index", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-idx",
+        choices: [
+          {
+            delta: {
+              tool_calls: [{ index: 0, id: "call_x", function: { name: "fn", arguments: '{"a' } }],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-idx",
+        choices: [{ delta: { tool_calls: [{ index: 0, function: { arguments: '":1}' } }] } }],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0].arguments).toBe('{"a":1}');
+  });
+});
+
+describe("collapseCohereSSE missing tool_call index", () => {
+  it("keeps two index-less tool-call-start events as two distinct tool calls", () => {
+    const body = [
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_a",
+              type: "function",
+              function: { name: "func_a", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } },
+      })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_b",
+              type: "function",
+              function: { name: "func_b", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        delta: { message: { tool_calls: { function: { arguments: '{"y":2}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    const names = result.toolCalls!.map((tc) => tc.name).sort();
+    expect(names).toEqual(["func_a", "func_b"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 3. Gemini SSE tool args default — JSON.stringify(undefined) === undefined
+//
+// `JSON.stringify(fc.args)` returns the VALUE undefined when args is omitted,
+// violating the ToolCall.arguments:string contract. Should be "{}".
+// ---------------------------------------------------------------------------
+
+describe("collapseGeminiSSE functionCall with no args", () => {
+  it("defaults missing args to the JSON object string '{}'", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [{ content: { role: "model", parts: [{ functionCall: { name: "ping" } }] } }],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].arguments).toBe("{}");
+    expect(typeof result.toolCalls![0].arguments).toBe("string");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 4. Gemini SSE audio branch drops accumulated tool calls / content / reasoning
+//
+// When audioB64 is present the early return silently discards any tool calls,
+// content, and reasoning accumulated earlier in the same stream.
+// ---------------------------------------------------------------------------
+
+describe("collapseGeminiSSE audio branch preserves accumulated data", () => {
+  it("returns BOTH audio and a tool call when the stream has both", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [{ functionCall: { name: "get_weather", args: { city: "Paris" } } }],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [{ inlineData: { mimeType: "audio/pcm", data: "QUJD" } }],
+            },
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.audioB64).toBe("QUJD");
+    expect(result.audioMimeType).toBe("audio/pcm");
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+  });
+
+  it("returns BOTH audio and accumulated content + reasoning", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [{ text: "thinking", thought: true }, { text: "visible answer" }],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [{ inlineData: { mimeType: "audio/pcm", data: "WFla" } }],
+            },
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.audioB64).toBe("WFla");
+    expect(result.content).toBe("visible answer");
+    expect(result.reasoning).toBe("thinking");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 5. SSE multi-line `data:` fields — only the first data: line per event read
+//
+// Per the SSE spec a single event may carry multiple `data:` lines that are
+// joined with "\n" to form one payload. The collapsers `.find` only the first.
+// ---------------------------------------------------------------------------
+
+/**
+ * Emit a single SSE event whose JSON payload is spread across MULTIPLE
+ * `data:` lines, the way a server splits a value at structural boundaries.
+ *
+ * Pretty-printing the object embeds newlines only between JSON tokens (where
+ * whitespace is legal), so prefixing each resulting line with `data:` and
+ * letting the collapser rejoin them with "\n" reconstructs valid JSON. This
+ * is the realistic multi-`data:` case; a mid-token split would be malformed
+ * SSE, not something a collapser should silently accept.
+ */
+function multiLineDataEvent(obj: unknown, eventLine?: string): string {
+  const dataLines = JSON.stringify(obj, null, 2)
+    .split("\n")
+    .map((l) => `data: ${l}`);
+  const parts = eventLine ? [eventLine, ...dataLines] : dataLines;
+  return parts.join("\n");
+}
+
+describe("multi-line SSE data fields", () => {
+  it("collapseOpenAISSE joins multiple data: lines into one JSON payload", () => {
+    const event = multiLineDataEvent({ choices: [{ delta: { content: "Hello multiline" } }] });
+    const body = [event, "", "data: [DONE]", ""].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("Hello multiline");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseAnthropicSSE joins multiple data: lines into one JSON payload", () => {
+    const event = multiLineDataEvent(
+      { index: 0, delta: { type: "text_delta", text: "Split text" } },
+      "event: content_block_delta",
+    );
+    const body = [event, ""].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("Split text");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseGeminiSSE joins multiple data: lines into one JSON payload", () => {
+    const event = multiLineDataEvent({
+      candidates: [{ content: { parts: [{ text: "Gemini split" }] } }],
+    });
+    const body = [event, ""].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("Gemini split");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseGeminiInteractionsSSE joins multiple data: lines into one JSON payload", () => {
+    const event = multiLineDataEvent({
+      event_type: "content.delta",
+      index: 0,
+      delta: { type: "text", text: "Interactions split" },
+    });
+    const body = [event, ""].join("\n");
+
+    const result = collapseGeminiInteractionsSSE(body);
+    expect(result.content).toBe("Interactions split");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 6. webSearches dropped from collapseOpenAISSE tool-call return branch
+//
+// The text-only return includes `webSearches`, but the tool-call return branch
+// omits it. A Responses-API stream with both a web_search_call AND a tool_call
+// loses the web searches.
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE webSearches with tool calls", () => {
+  it("returns BOTH toolCalls and webSearches when the stream has both", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        type: "response.output_item.done",
+        item: { type: "web_search_call", status: "completed", action: { query: "weather paris" } },
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_ws",
+                  type: "function",
+                  function: { name: "get_weather", arguments: '{"city":"Paris"}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.webSearches).toEqual(["weather paris"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 7. Anthropic SSE missing content_block index
+//
+// `const index = parsed.index as number` is unguarded. When two tool_use
+// content_block_start events both OMIT `index`, they collapse under the single
+// `undefined` key and merge into one tool call. The OpenAI/Cohere/Bedrock
+// collapsers already guard this; Anthropic must too.
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE missing content_block index", () => {
+  it("keeps two index-less tool_use blocks as two distinct tool calls", () => {
+    const body = [
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        content_block: { type: "tool_use", id: "toolu_a", name: "func_a" },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        delta: { type: "input_json_delta", partial_json: '{"x":1}' },
+      })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        content_block: { type: "tool_use", id: "toolu_b", name: "func_b" },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        delta: { type: "input_json_delta", partial_json: '{"y":2}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    const names = result.toolCalls!.map((tc) => tc.name).sort();
+    expect(names).toEqual(["func_a", "func_b"]);
+    // Arguments must land on the block they followed, not cross-contaminate.
+    const byName = Object.fromEntries(result.toolCalls!.map((tc) => [tc.name, tc.arguments]));
+    expect(byName.func_a).toBe('{"x":1}');
+    expect(byName.func_b).toBe('{"y":2}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 8. Cohere SSE mixed-key delta correlation + uncorrelated-delta accounting
+//
+// `lastSyntheticIndex` was only set for index-LESS starts, so a real-indexed
+// start followed by an index-less delta fell back to a stale/undefined key and
+// silently dropped the args. The most-recent start key must be tracked
+// regardless of how it was keyed. And a delta that cannot correlate to any
+// known start must increment droppedChunks rather than vanish.
+// ---------------------------------------------------------------------------
+
+describe("collapseCohereSSE mixed-key delta correlation", () => {
+  it("lands an index-less delta on the most recent REAL-indexed start", () => {
+    const body = [
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_a",
+              type: "function",
+              function: { name: "func_a", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("func_a");
+    expect(result.toolCalls![0].arguments).toBe('{"x":1}');
+  });
+
+  it("counts an index-less delta with no prior start as a dropped chunk", () => {
+    const body = [
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        delta: { message: { tool_calls: { function: { arguments: '{"orphan":true}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.droppedChunks).toBe(1);
+    expect(result.firstDroppedSample).toBeDefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 9. CRLF-delimited SSE streams
+//
+// Real HTTP/SSE streams use CRLF (`\r\n`) line endings and `\r\n\r\n` between
+// events. Splitting events on `\n\n` and data lines on `\n` leaves a trailing
+// `\r` on each data line, so the final `data: [DONE]\r` mis-parses and earlier
+// payloads carry a stray `\r`, corrupting JSON.parse.
+// ---------------------------------------------------------------------------
+
+describe("CRLF-delimited SSE streams", () => {
+  it("collapseOpenAISSE parses a CRLF stream (content + [DONE])", () => {
+    const body = [
+      `data: ${JSON.stringify({ choices: [{ delta: { content: "Hello CRLF" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\r\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("Hello CRLF");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseAnthropicSSE parses a multi-event CRLF stream", () => {
+    const body = [
+      "event: content_block_delta",
+      `data: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "CRLF " } })}`,
+      "",
+      "event: content_block_delta",
+      `data: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "text" } })}`,
+      "",
+    ].join("\r\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("CRLF text");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseGeminiSSE parses a multi-event CRLF stream", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Gemini " }] } }] })}`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "CRLF" }] } }] })}`,
+      "",
+    ].join("\r\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("Gemini CRLF");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseGeminiInteractionsSSE parses a multi-event CRLF stream", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        event_type: "content.delta",
+        index: 0,
+        delta: { type: "text", text: "Interactions " },
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        event_type: "content.delta",
+        index: 0,
+        delta: { type: "text", text: "CRLF" },
+      })}`,
+      "",
+    ].join("\r\n");
+
+    const result = collapseGeminiInteractionsSSE(body);
+    expect(result.content).toBe("Interactions CRLF");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+
+  it("collapseCohereSSE parses a multi-event CRLF stream", () => {
+    const body = [
+      "event: content-delta",
+      `data: ${JSON.stringify({
+        type: "content-delta",
+        delta: { message: { content: { text: "Cohere " } } },
+      })}`,
+      "",
+      "event: content-delta",
+      `data: ${JSON.stringify({
+        type: "content-delta",
+        delta: { message: { content: { text: "CRLF" } } },
+      })}`,
+      "",
+    ].join("\r\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.content).toBe("Cohere CRLF");
+    expect(result.droppedChunks).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 10. Uncorrelated tool-arg deltas — Anthropic SSE + Bedrock EventStream
+//
+// The Cohere collapser already accounts for a tool-call-delta that has no
+// correlating start as a dropped chunk (droppedChunks++ / firstDroppedSample).
+// The Anthropic `input_json_delta` path and both Bedrock arg-delta paths
+// (Messages `input_json_delta` and Converse `toolUse.input`) silently dropped
+// the analogous uncorrelated delta. They must mirror Cohere. AND the Anthropic
+// `lastSyntheticIndex` fallback must still let a real-indexed start correlate
+// to a following index-less delta (positive coverage).
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE uncorrelated input_json_delta", () => {
+  it("counts an input_json_delta with no correlating tool_use start as a dropped chunk", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        delta: { type: "input_json_delta", partial_json: '{"orphan":true}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.droppedChunks).toBe(1);
+    // The sample carries the raw SSE payload (the orphan partial_json is
+    // JSON-escaped inside it), so assert the orphan token survives — stronger
+    // than the bare `.toBeDefined()` it would otherwise be.
+    expect(result.firstDroppedSample).toContain("no correlating tool_use start");
+    expect(result.firstDroppedSample).toContain('orphan\\":true');
+  });
+
+  it("lands an index-less delta on the most recent REAL-indexed tool_use start", () => {
+    const body = [
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        index: 3,
+        content_block: { type: "tool_use", id: "toolu_real", name: "func_real" },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        delta: { type: "input_json_delta", partial_json: '{"k":9}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("func_real");
+    expect(result.toolCalls![0].arguments).toBe('{"k":9}');
+    expect(result.droppedChunks).toBeUndefined();
+  });
+});
+
+describe("collapseBedrockEventStream uncorrelated tool-arg deltas", () => {
+  it("counts a Messages input_json_delta with no correlating start as a dropped chunk", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      type: "content_block_delta",
+      index: 0,
+      delta: { type: "input_json_delta", partial_json: '{"orphanMsg":1}' },
+    });
+
+    const result = collapseBedrockEventStream(frame);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.droppedChunks).toBe(1);
+    expect(result.firstDroppedSample).toContain("no correlating tool_use start");
+    expect(result.firstDroppedSample).toContain('orphanMsg\\":1');
+  });
+
+  it("counts a Converse toolUse.input with no correlating start as a dropped chunk", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        delta: { toolUse: { input: '{"orphanConverse":2}' } },
+      },
+    });
+
+    const result = collapseBedrockEventStream(frame);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.droppedChunks).toBe(1);
+    expect(result.firstDroppedSample).toContain("no correlating tool_use start");
+    expect(result.firstDroppedSample).toContain('orphanConverse\\":2');
+  });
+});
diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts
index d9f27fb..8bd88e0 100644
--- a/src/__tests__/stream-collapse.test.ts
+++ b/src/__tests__/stream-collapse.test.ts
@@ -9,6 +9,7 @@ import {
   collapseStreamingResponse,
 } from "../stream-collapse.js";
 import { encodeEventStreamMessage, encodeEventStreamFrame } from "../aws-event-stream.js";
+import { parseHarmonyContent } from "../harmony.js";
 
 // ---------------------------------------------------------------------------
 // 1. OpenAI SSE
@@ -1834,3 +1835,1293 @@ describe("collapseOpenAISSE with chat completions reasoning_content", () => {
     expect(result.content).toBe("");
   });
 });
+
+// ---------------------------------------------------------------------------
+// collapseOpenAISSE — OpenAI harmony channel tokens (open-weight gpt-oss)
+//
+// Open-weight gpt-oss models served via Ollama / vLLM / OpenRouter stream
+// tool calls as RAW harmony channel tokens inside `delta.content`, not in
+// `delta.tool_calls`. aimock must parse those channels so the recorded
+// fixture captures a structured tool call instead of leaking the raw
+// `to=functions...` marker as plain text content.
+//
+// Harmony grammar (authoritative, from OpenAI's harmony spec):
+//   <|channel|>analysis<|message|>...<|end|>            -> reasoning
+//   <|start|>assistant<|channel|>commentary to=functions.NAME
+//     <|constrain|>json<|message|>{...args...}<|call|>  -> tool call
+//   <|channel|>final<|message|>...<|return|>            -> content
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE harmony channel tokens", () => {
+  it("parses a harmony tool call streamed as raw tokens inside delta.content", () => {
+    // The canonical harmony sequence, chunked across SSE deltas like a real
+    // open-weight stream would emit it (token boundaries fall mid-marker).
+    const harmonyChunks = [
+      "<|channel|>analysis<|message|>Need to call the ",
+      "tool to render the card.<|end|>",
+      "<|start|>assistant<|channel|>commentary to=functions.generate_a2ui ",
+      '<|constrain|>json<|message|>{"component":"card",',
+      '"props":{"title":"Hi"}}<|call|>',
+    ];
+
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-harm", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    // Desired end state: a structured tool call is emitted...
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("generate_a2ui");
+    expect(result.toolCalls![0].arguments).toBe('{"component":"card","props":{"title":"Hi"}}');
+
+    // ...the analysis channel becomes reasoning...
+    expect(result.reasoning).toBe("Need to call the tool to render the card.");
+
+    // ...and NO harmony control tokens or routing markers leak into content.
+    const leak = result.content ?? "";
+    expect(leak).not.toContain("<|channel|>");
+    expect(leak).not.toContain("<|message|>");
+    expect(leak).not.toContain("<|constrain|>");
+    expect(leak).not.toContain("<|call|>");
+    expect(leak).not.toContain("to=generate_a2ui");
+    expect(leak).not.toContain("to=functions.generate_a2ui");
+    expect(leak).not.toContain("functions.generate_a2ui");
+  });
+
+  it("captures the final channel as content and analysis as reasoning", () => {
+    const harmonyChunks = [
+      "<|channel|>analysis<|message|>The user said hello.<|end|>",
+      "<|start|>assistant<|channel|>final<|message|>Hello! How can ",
+      "I help you today?<|return|>",
+    ];
+
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-harm2", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.content).toBe("Hello! How can I help you today?");
+    expect(result.reasoning).toBe("The user said hello.");
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.content).not.toContain("<|channel|>");
+    expect(result.content).not.toContain("<|return|>");
+  });
+
+  it("parses multiple interleaved harmony tool calls", () => {
+    const harmonyChunks = [
+      "<|channel|>analysis<|message|>Call two tools.<|end|>",
+      "<|start|>assistant<|channel|>commentary to=functions.first ",
+      '<|constrain|>json<|message|>{"a":1}<|call|>',
+      "<|start|>assistant<|channel|>commentary to=functions.second ",
+      '<|constrain|>json<|message|>{"b":2}<|call|>',
+    ];
+
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-harm3", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("first");
+    expect(result.toolCalls![0].arguments).toBe('{"a":1}');
+    expect(result.toolCalls![1].name).toBe("second");
+    expect(result.toolCalls![1].arguments).toBe('{"b":2}');
+    expect(result.content ?? "").not.toContain("functions.first");
+  });
+
+  it("is a no-op for normal (non-harmony) structured streams", () => {
+    // A plain text stream with no harmony control tokens must be untouched.
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-plain", choices: [{ delta: { content: "Just " } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-plain", choices: [{ delta: { content: "text." } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("Just text.");
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.reasoning).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseOpenAISSE harmony channel — fail-safe, token-aware, observable
+//
+// Regression coverage for the CR findings on the harmony parser. The shared
+// root cause was a naive indexOf-scan over detokenized text that could
+// DESTROY valid hosted-OpenAI answers (mere prose mention of a token),
+// TRUNCATE tool-call JSON containing a literal token substring, DROP
+// pre-channel / trailing-message text, and MISROUTE analysis-channel
+// recipients into tool calls — all silently. The fix makes parsing fail-safe
+// (return original content on any incomplete/invalid structure), terminate
+// json bodies at their spec terminator with JSON validation, anchor
+// pre/trailing text, gate recipient routing to the commentary channel, and
+// surface drops/truncations via droppedChunks/truncated.
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE harmony fail-safe + token-aware", () => {
+  // A1 — content destruction: a hosted/structured answer that merely MENTIONS
+  // the token must NOT be mangled into empty content.
+  it("preserves a final answer that merely mentions <|channel|> as prose (no destruction)", () => {
+    const prose = "The special token is <|channel|> and it routes model output to channels.";
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-prose", choices: [{ delta: { content: prose } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    // The whole answer must survive verbatim — never collapsed to "".
+    expect(result.content).toBe(prose);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.reasoning).toBeUndefined();
+  });
+
+  // A1 — explicit no-op guard: a genuinely structured tool_calls stream whose
+  // text content happens to mention the token must keep its content verbatim
+  // and fabricate NO harmony tool calls / reasoning.
+  it("is a no-op for a structured tool_calls stream whose content mentions <|channel|> as prose", () => {
+    const prose = "I will call a tool. Note: <|channel|> is a harmony marker.";
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-struct",
+        choices: [
+          {
+            delta: {
+              content: prose,
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  function: { name: "get_weather", arguments: '{"city":"SF"}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    // Content preserved verbatim; only the genuine structured tool call present.
+    expect(result.content).toBe(prose);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+  });
+
+  // A2 — body termination: tool-call args JSON containing a literal token
+  // substring must NOT be truncated to invalid JSON.
+  it("does not truncate tool-call args JSON containing a literal token substring", () => {
+    // The args contain the text "<|call|>" inside a JSON string value — the
+    // body must run to the REAL <|call|> terminator, not the embedded one.
+    const harmonyChunks = [
+      "<|start|>assistant<|channel|>commentary to=functions.say ",
+      '<|constrain|>json<|message|>{"text":"say <|call|> now"}<|call|>',
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-emb", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("say");
+    // Args must be the COMPLETE, valid JSON — not cut at the embedded token.
+    expect(result.toolCalls![0].arguments).toBe('{"text":"say <|call|> now"}');
+    expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+  });
+
+  // A2 — channel/start must not truncate an open json body.
+  it("does not let <|channel|> inside tool-call args truncate the JSON body", () => {
+    const harmonyChunks = [
+      "<|start|>assistant<|channel|>commentary to=functions.render ",
+      '<|constrain|>json<|message|>{"markup":"<|channel|> tag in a2ui"}<|call|>',
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-emb2", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].arguments).toBe('{"markup":"<|channel|> tag in a2ui"}');
+    expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow();
+  });
+
+  // A3 — anchoring: text BEFORE the first <|channel|> must be captured.
+  it("captures pre-channel text as content", () => {
+    const harmonyChunks = [
+      "Here is a preamble. ",
+      "<|channel|>final<|message|>The answer.<|return|>",
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-pre", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.content).toBe("Here is a preamble. The answer.");
+    expect(result.content).not.toContain("<|channel|>");
+  });
+
+  // A3 — anchoring: a trailing <|start|>assistant<|message|> message that has
+  // NO <|channel|> (final-answer-after-tool-call) must be captured.
+  it("captures a trailing <|start|>...<|message|> final message that lacks <|channel|>", () => {
+    const harmonyChunks = [
+      "<|start|>assistant<|channel|>commentary to=functions.lookup ",
+      '<|constrain|>json<|message|>{"q":"weather"}<|call|>',
+      "<|start|>assistant<|message|>The weather is sunny.<|return|>",
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-trail", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("lookup");
+    expect(result.content).toBe("The weather is sunny.");
+    expect(result.content).not.toContain("<|start|>");
+    expect(result.content).not.toContain("<|message|>");
+  });
+
+  // A5 — recipient routing: recipient on the ROLE line (before <|channel|>)
+  // must be recognized as a commentary tool call.
+  it("recognizes a recipient placed on the role segment before <|channel|>", () => {
+    const harmonyChunks = [
+      "<|start|>assistant to=functions.role_placed<|channel|>commentary ",
+      '<|constrain|>json<|message|>{"x":1}<|call|>',
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-role", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("role_placed");
+    expect(result.toolCalls![0].arguments).toBe('{"x":1}');
+  });
+
+  // A5 — recipient routing: an analysis-channel header carrying a recipient
+  // must NOT be misrouted into a tool call (only commentary routes).
+  it("does not route an analysis-channel recipient into a tool call", () => {
+    const harmonyChunks = [
+      "<|channel|>analysis to=functions.should_not_fire<|message|>Thinking about it.<|end|>",
+      "<|start|>assistant<|channel|>final<|message|>Done.<|return|>",
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-an", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    // No tool call fabricated from the analysis channel.
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.reasoning).toBe("Thinking about it.");
+    expect(result.content).toBe("Done.");
+  });
+
+  // A2/A5 edge: a <|constrain|>json marker on a NON-tool channel (no
+  // commentary recipient) must NOT trigger <|call|>-terminated parsing and
+  // swallow the following final message — it ends at its own <|end|>.
+  it("does not let a non-tool-call <|constrain|>json body swallow the next message", () => {
+    const harmonyChunks = [
+      "<|channel|>analysis<|constrain|>json<|message|>Thinking.<|end|>",
+      "<|start|>assistant<|channel|>final<|message|>The final answer.<|return|>",
+    ];
+    const body = [
+      ...harmonyChunks.flatMap((chunk) => [
+        `data: ${JSON.stringify({ id: "chatcmpl-cj", choices: [{ delta: { content: chunk } }] })}`,
+        "",
+      ]),
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.reasoning).toBe("Thinking.");
+    expect(result.content).toBe("The final answer.");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  // A6 — observability: a malformed harmony structure must fail SAFE (content
+  // preserved VERBATIM). Because the bytes are NOT lost, this is NOT transport
+  // loss: it surfaces via the distinct `harmonyUnparsed` signal, NOT
+  // droppedChunks/truncated (those are reserved for genuine transport loss).
+  it("surfaces a malformed harmony structure via harmonyUnparsed (not droppedChunks/truncated)", () => {
+    // A <|channel|> + <|message|> opener whose tool-call body never yields
+    // valid JSON (no terminator, no closing brace) — unparseable.
+    const broken =
+      "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid json";
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-broken", choices: [{ delta: { content: broken } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    // Fail-safe: original content preserved verbatim, no fabricated/empty loss.
+    expect(result.content).toBe(broken);
+    expect(result.toolCalls).toBeUndefined();
+    // Distinct signal — NOT a dropped/truncated chunk.
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseOpenAISSE — A4: reasoning + webSearches parity on the tool-call
+// return branch (non-harmony structured streams).
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE reasoning/webSearches parity with tool calls", () => {
+  // A4 — a NON-harmony structured stream with delta.reasoning_content +
+  // tool_calls must preserve reasoning (DeepSeek / OpenRouter shape).
+  it("preserves reasoning_content alongside structured tool_calls (no harmony)", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-r", choices: [{ delta: { reasoning_content: "Let me think. " } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-r", choices: [{ delta: { reasoning_content: "I will call a tool." } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-r", choices: [{ delta: { tool_calls: [{ index: 0, id: "call_a", function: { name: "get_weather", arguments: '{"city":"SF"}' } }] } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    // reasoning must survive even though this is NOT a harmony stream.
+    expect(result.reasoning).toBe("Let me think. I will call a tool.");
+  });
+
+  // A4 — webSearches parity: a Responses-API web_search_call followed by a
+  // tool call must surface webSearches on the tool-call return branch too.
+  it("preserves webSearches alongside tool calls", () => {
+    const body = [
+      `data: ${JSON.stringify({ type: "response.output_item.done", item: { type: "web_search_call", action: { query: "weather SF" } } })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-ws", choices: [{ delta: { tool_calls: [{ index: 0, id: "call_w", function: { name: "get_weather", arguments: "{}" } }] } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.webSearches).toEqual(["weather SF"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseOllamaNDJSON — A7: harmony parsing parity for gpt-oss over Ollama
+// (NDJSON). gpt-oss served via Ollama streams harmony tokens inside
+// message.content; without parsing they leak as raw text.
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON harmony channel tokens", () => {
+  it("parses a harmony tool call streamed as raw tokens inside message.content", () => {
+    const harmonyChunks = [
+      "<|channel|>analysis<|message|>Need to render a card.<|end|>",
+      "<|start|>assistant<|channel|>commentary to=functions.generate_a2ui ",
+      '<|constrain|>json<|message|>{"component":"card","props":{"title":"Hi"}}<|call|>',
+    ];
+    const body = [
+      ...harmonyChunks.map((chunk) =>
+        JSON.stringify({
+          model: "gpt-oss",
+          message: { role: "assistant", content: chunk },
+          done: false,
+        }),
+      ),
+      JSON.stringify({ model: "gpt-oss", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("generate_a2ui");
+    expect(result.toolCalls![0].arguments).toBe('{"component":"card","props":{"title":"Hi"}}');
+    expect(result.reasoning).toBe("Need to render a card.");
+    const leak = result.content ?? "";
+    expect(leak).not.toContain("<|channel|>");
+    expect(leak).not.toContain("to=functions.generate_a2ui");
+  });
+
+  it("is a no-op for normal (non-harmony) Ollama content", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "Just " },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "text." },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("Just text.");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  // A7 — pre-existing bug: JSON.stringify(undefined arguments) yields the
+  // literal string "undefined". Must default to "{}".
+  it("defaults arguments to {} when a structured tool_call omits arguments", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [{ function: { name: "no_args" } }],
+        },
+        done: false,
+      }),
+      JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("no_args");
+    expect(result.toolCalls![0].arguments).toBe("{}");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseOpenAISSE — multibyte content across SSE event boundaries
+//
+// collapseOpenAISSE receives the FULL concatenated response body (decoded
+// once from the complete buffer at the recorder layer), so multibyte UTF-8
+// in delta.content is already safe regardless of how deltas were chunked.
+// This test pins that guarantee: CJK + emoji split across deltas must
+// round-trip with no U+FFFD replacement characters.
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE multibyte content", () => {
+  it("preserves CJK and emoji content chunked across SSE events", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content: "访问 " } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content: "官网群" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content: " 🎉" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("访问 官网群 🎉");
+    expect(result.content).not.toContain("�");
+  });
+});
+
+// ===========================================================================
+// Harmony lexer + state-machine rewrite — STRUCTURAL acceptance matrix
+//
+// The harmony parser is a two-phase lexer + state machine (src/harmony.ts):
+//   Phase 1 lexes the accumulated content into an ordered CONTROL/TEXT token
+//   stream (bytes consumed into a TEXT span are never re-scanned for control
+//   tokens); Phase 2 walks the stream against the harmony grammar with UNIFORM
+//   all-or-nothing fail-safe semantics.
+//
+// Contract under test:
+//   - GATE / FAIL-SAFE: any grammar deviation returns the ORIGINAL raw input
+//     verbatim with no toolCalls/reasoning.
+//   - OBSERVABILITY: a harmony failure preserves bytes verbatim, so it is NOT
+//     transport loss — it sets `harmonyUnparsed`, never droppedChunks/truncated.
+//   - WHITESPACE: inter-message / trailing whitespace-only TEXT is absorbed.
+//   - NON-TOOL EMBEDDED TOKENS: literal token substrings inside a body do not
+//     truncate it; the body runs to its real terminator.
+//   - FALLBACK-ONLY wiring: harmony is attempted ONLY when there are no
+//     structured delta.tool_calls; structured calls always win and harmony
+//     content is then treated as prose (no phantom, no truncated stamp).
+//   - ROUTING: analysis->reasoning, final->content, commentary+recipient->tool,
+//     commentary-without-recipient->content. Recipient identifiers only.
+// ===========================================================================
+
+/** Build an OpenAI SSE body from a list of delta objects (matches idioms). */
+function openAISSEBody(deltas: Array<Record<string, unknown>>, id = "chatcmpl-mtx"): string {
+  return [
+    ...deltas.flatMap((delta) => [`data: ${JSON.stringify({ id, choices: [{ delta }] })}`, ""]),
+    "data: [DONE]",
+    "",
+  ].join("\n");
+}
+
+/** Build an OpenAI SSE body whose content chunks carry harmony tokens. */
+function openAIHarmonyBody(chunks: string[], id = "chatcmpl-mtx"): string {
+  return openAISSEBody(
+    chunks.map((content) => ({ content })),
+    id,
+  );
+}
+
+/** Build an Ollama /api/chat NDJSON body whose message.content carries chunks. */
+function ollamaHarmonyBody(chunks: string[], model = "gpt-oss"): string {
+  return [
+    ...chunks.map((content) =>
+      JSON.stringify({ model, message: { role: "assistant", content }, done: false }),
+    ),
+    JSON.stringify({ model, message: { role: "assistant", content: "" }, done: true }),
+  ].join("\n");
+}
+
+describe("harmony rewrite — GATE / FAIL-SAFE (verbatim no-op on non-structure)", () => {
+  // (1)* prose mentioning <|channel|>/<|message|> as inline code -> content
+  // VERBATIM, no toolCalls/reasoning, no truncated/droppedChunks.
+  it("(1) prose mentioning the tokens is content verbatim (no destruction)", () => {
+    const prose =
+      "Harmony uses `<|channel|>` to pick a channel and `<|message|>` to start the body";
+    const result = collapseOpenAISSE(openAIHarmonyBody([prose]));
+    expect(result.content).toBe(prose);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.reasoning).toBeUndefined();
+    expect(result.truncated).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+
+    // Direct parser unit check: a prose-only mention has no Message -> failed.
+    const direct = parseHarmonyContent(prose);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(prose);
+    expect(direct.toolCalls).toEqual([]);
+    expect(direct.reasoning).toBe("");
+  });
+
+  // (2) tokens in reverse order -> verbatim no-op. The cheap `isHarmonyContent`
+  // gate requires channel-then-message (or start-then-message) ordering, so a
+  // reversed stream does not even trip the gate: the collapse path leaves the
+  // content verbatim and never sets harmonyUnparsed. The parser itself, when
+  // called directly, still fails-safe on the reversed structure.
+  it("(2) reversed token order is a verbatim no-op", () => {
+    const reversed = "<|message|>body<|channel|>analysis";
+    const result = collapseOpenAISSE(openAIHarmonyBody([reversed]));
+    expect(result.content).toBe(reversed);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.harmonyUnparsed).toBeUndefined();
+    expect(result.truncated).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+
+    const direct = parseHarmonyContent(reversed);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(reversed);
+  });
+
+  // (3) prose containing every literal token but no valid Message -> verbatim,
+  // accurate harmonyUnparsed signal.
+  it("(3) every literal token but no valid message -> verbatim + harmonyUnparsed", () => {
+    const prose =
+      "tokens: <|start|> <|end|> <|return|> <|call|> <|channel|> <|message|> <|constrain|> (all as prose)";
+    const result = collapseOpenAISSE(openAIHarmonyBody([prose]));
+    expect(result.content).toBe(prose);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.reasoning).toBeUndefined();
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // (4) empty / whitespace-only -> unchanged no-op.
+  it("(4) empty and whitespace-only inputs are unchanged no-ops", () => {
+    const empty = collapseOpenAISSE(openAIHarmonyBody([""]));
+    expect(empty.content).toBe("");
+    expect(empty.toolCalls).toBeUndefined();
+    expect(empty.harmonyUnparsed).toBeUndefined();
+
+    const ws = collapseOpenAISSE(openAIHarmonyBody(["   \n  "]));
+    expect(ws.content).toBe("   \n  ");
+    expect(ws.toolCalls).toBeUndefined();
+    expect(ws.harmonyUnparsed).toBeUndefined();
+
+    // Direct: empty/whitespace are not harmony at all; parser returns failed
+    // (no message) with content preserved.
+    expect(parseHarmonyContent("").content).toBe("");
+    expect(parseHarmonyContent("   ").content).toBe("   ");
+  });
+});
+
+describe("harmony rewrite — WHITESPACE (the masked class)", () => {
+  // (5)* analysis<|end|> + "\n" + <|start|>...final<|return|> -> reasoning +
+  // content parsed, no leak of the inter-message newline.
+  it("(5) newline between analysis<|end|> and the next <|start|> is absorbed", () => {
+    const chunks = [
+      "<|channel|>analysis<|message|>Thinking it through.<|end|>",
+      "\n",
+      "<|start|>assistant<|channel|>final<|message|>The answer.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Thinking it through.");
+    expect(result.content).toBe("The answer.");
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.content).not.toContain("\n");
+    expect(result.content).not.toContain("<|");
+  });
+
+  // (6) single space between two commentary tool calls -> 2 toolCalls.
+  it("(6) a single space between two commentary tool calls yields 2 tool calls", () => {
+    const chunks = [
+      '<|start|>assistant<|channel|>commentary to=functions.first <|constrain|>json<|message|>{"a":1}<|call|>',
+      " ",
+      '<|start|>assistant<|channel|>commentary to=functions.second <|constrain|>json<|message|>{"b":2}<|call|>',
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0]).toEqual({ name: "first", arguments: '{"a":1}' });
+    expect(result.toolCalls![1]).toEqual({ name: "second", arguments: '{"b":2}' });
+  });
+
+  // (7)* trailing "\n" after the final <|return|> -> parsed, newline absorbed.
+  it("(7) trailing newline after the final <|return|> is absorbed, not leaked", () => {
+    const chunks = ["<|channel|>final<|message|>All done.<|return|>", "\n"];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.content).toBe("All done.");
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.content).not.toContain("\n");
+
+    const direct = parseHarmonyContent("<|channel|>final<|message|>All done.<|return|>\n");
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("All done.");
+  });
+
+  // (8) leading whitespace before the first <|channel|> -> absorbed (blank
+  // leading text is not content).
+  it("(8) leading whitespace before the first <|channel|> is absorbed", () => {
+    const direct = parseHarmonyContent("  \n <|channel|>final<|message|>Hi.<|return|>");
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("Hi.");
+    expect(direct.reasoning).toBe("");
+  });
+
+  // (9) mixed " \n " between three messages -> all parsed.
+  it("(9) mixed whitespace between three messages is absorbed; all parse", () => {
+    const chunks = [
+      "<|channel|>analysis<|message|>Reason.<|end|>",
+      " \n ",
+      '<|start|>assistant<|channel|>commentary to=functions.tool <|constrain|>json<|message|>{"k":1}<|call|>',
+      "  ",
+      "<|start|>assistant<|channel|>final<|message|>Answer.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Reason.");
+    expect(result.content).toBe("Answer.");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0]).toEqual({ name: "tool", arguments: '{"k":1}' });
+  });
+});
+
+describe("harmony rewrite — NON-TOOL EMBEDDED TOKENS", () => {
+  // (10)* final body quoting <|end|>/<|return|> as prose, terminated by a real
+  // <|return|> -> content = full sentence (NOT truncated to "See ").
+  it("(10) final body quoting <|end|>/<|return|> keeps the full sentence", () => {
+    const body = "See `<|end|>` for the end token and `<|return|>` too.";
+    const raw = `<|channel|>final<|message|>${body}<|return|>`;
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe(body);
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(body);
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  // (11) analysis body quoting <|call|>/<|start|> as prose, terminated by
+  // <|end|> -> reasoning = full body.
+  it("(11) analysis body quoting <|call|>/<|start|> keeps the full body", () => {
+    const body = "Consider the `<|call|>` and `<|start|>` markers carefully.";
+    const raw = `<|channel|>analysis<|message|>${body}<|end|><|start|>assistant<|channel|>final<|message|>Done.<|return|>`;
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.reasoning).toBe(body);
+    expect(direct.content).toBe("Done.");
+  });
+
+  // (12) commentary-preamble (no recipient) body quoting <|end|> -> content,
+  // full body.
+  it("(12) commentary preamble (no recipient) quoting <|end|> keeps full content", () => {
+    const body = "Let me explain `<|end|>` before answering.";
+    const raw = `<|channel|>commentary<|message|>${body}<|end|><|start|>assistant<|channel|>final<|message|>Answer.<|return|>`;
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe(`${body}Answer.`);
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony rewrite — TOOL BODY (keep green)", () => {
+  // (13) args {"text":"say <|call|> now"}<|call|> -> 1 toolCall, exact args.
+  it("(13) embedded <|call|> inside tool args does not truncate the JSON", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.say<|constrain|>json<|message|>{"text":"say <|call|> now"}<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0]).toEqual({
+      name: "say",
+      arguments: '{"text":"say <|call|> now"}',
+    });
+    expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow();
+  });
+
+  // (14) args containing <|channel|>/<|message|> substrings -> exact args.
+  it("(14) embedded <|channel|>/<|message|> inside tool args are preserved exactly", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.render<|constrain|>json<|message|>{"markup":"<|channel|> and <|message|> tags"}<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0].arguments).toBe('{"markup":"<|channel|> and <|message|> tags"}');
+    expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow();
+  });
+
+  // (15) args invalid JSON, no terminator -> fail-safe verbatim + signal.
+  it("(15) invalid-JSON tool body with no terminator fails safe", () => {
+    const raw =
+      "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid json";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  // (16) args valid JSON but terminated by <|end|> not <|call|> -> fail-safe.
+  it("(16) valid-JSON tool body terminated by <|end|> (not <|call|>) fails safe", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.x<|constrain|>json<|message|>{"a":1}<|end|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony rewrite — DUAL-SOURCE (fallback-only wiring)", () => {
+  // (17)* structured delta.tool_calls + content prose mentioning tokens ->
+  // content verbatim, exactly the structured toolCall(s), NO phantom, NO
+  // truncated/droppedChunks.
+  it("(17) structured tool_calls + prose mentioning tokens: only structured call, content verbatim", () => {
+    const prose = "I will call a tool. Note `<|channel|>` and `<|message|>` are harmony markers.";
+    const body = openAISSEBody([
+      {
+        content: prose,
+        tool_calls: [
+          { index: 0, id: "call_1", function: { name: "get_weather", arguments: '{"city":"SF"}' } },
+        ],
+      },
+    ]);
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe(prose);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+    expect(result.truncated).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.harmonyUnparsed).toBeUndefined();
+  });
+
+  // (18) structured tool_calls + content that IS well-formed harmony tool
+  // tokens -> only structured calls win (fallback-only), count == structured.
+  it("(18) structured tool_calls win over well-formed harmony content (fallback-only)", () => {
+    const harmony =
+      '<|start|>assistant<|channel|>commentary to=functions.harmony_tool<|constrain|>json<|message|>{"z":9}<|call|>';
+    const body = openAISSEBody([
+      {
+        content: harmony,
+        tool_calls: [
+          { index: 0, id: "call_s", function: { name: "structured_tool", arguments: '{"s":1}' } },
+        ],
+      },
+    ]);
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("structured_tool");
+    // The harmony content is treated as prose (never parsed into a 2nd call).
+    expect(result.toolCalls!.some((tc) => tc.name === "harmony_tool")).toBe(false);
+  });
+
+  // (19) structured tool_calls + harmony-looking content that would FAIL to
+  // parse -> no truncated stamp (content is prose, not a harmony failure).
+  it("(19) structured tool_calls + unparseable harmony content: no truncated stamp", () => {
+    const broken =
+      "<|start|>assistant<|channel|>commentary to=functions.broken<|message|>{not valid";
+    const body = openAISSEBody([
+      {
+        content: broken,
+        tool_calls: [
+          { index: 0, id: "call_s", function: { name: "structured_tool", arguments: "{}" } },
+        ],
+      },
+    ]);
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("structured_tool");
+    expect(result.truncated).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.harmonyUnparsed).toBeUndefined();
+    // Content is preserved as-is (prose), never collapsed.
+    expect(result.content).toBe(broken);
+  });
+});
+
+describe("harmony rewrite — MULTI-MESSAGE REALISTIC", () => {
+  // (20) analysis->reasoning, "\n", commentary toolCall, "\n", final->content.
+  it("(20) analysis + commentary tool + final with separators: all correct, zero leak", () => {
+    const chunks = [
+      "<|channel|>analysis<|message|>Plan the call.<|end|>",
+      "\n",
+      '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"x"}<|call|>',
+      "\n",
+      "<|start|>assistant<|channel|>final<|message|>Here is the result.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Plan the call.");
+    expect(result.content).toBe("Here is the result.");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' });
+    expect(result.content).not.toContain("<|");
+    expect(result.content).not.toContain("\n");
+  });
+
+  // (21) analysis + final only with separators (no tool) -> reasoning+content.
+  it("(21) analysis + final only (no tool) -> reasoning + content, no toolCalls", () => {
+    const chunks = [
+      "<|channel|>analysis<|message|>Just reasoning.<|end|>",
+      " \n ",
+      "<|start|>assistant<|channel|>final<|message|>Just the answer.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Just reasoning.");
+    expect(result.content).toBe("Just the answer.");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  // (22) commentary preamble + commentary toolCall -> preamble->content, 1 call.
+  it("(22) commentary preamble + commentary tool call: preamble is content, 1 tool call", () => {
+    const chunks = [
+      "<|channel|>commentary<|message|>Let me look that up for you.<|end|>",
+      '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"y"}<|call|>',
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.content).toBe("Let me look that up for you.");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"y"}' });
+  });
+});
+
+describe("harmony rewrite — ROUTING EDGES (keep green)", () => {
+  // (23) analysis header carrying to=functions.x -> NOT a tool call.
+  it("(23) analysis-channel recipient is NOT a tool call (reasoning only)", () => {
+    const chunks = [
+      "<|channel|>analysis to=functions.should_not_fire<|message|>Thinking.<|end|>",
+      "<|start|>assistant<|channel|>final<|message|>Done.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.reasoning).toBe("Thinking.");
+    expect(result.content).toBe("Done.");
+  });
+
+  // (24) recipient on the role segment before <|channel|>commentary -> toolCall.
+  it("(24) recipient on the role segment before <|channel|>commentary -> tool call", () => {
+    const raw =
+      '<|start|>assistant to=functions.role_placed<|channel|>commentary<|constrain|>json<|message|>{"x":1}<|call|>';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0]).toEqual({ name: "role_placed", arguments: '{"x":1}' });
+  });
+
+  // (25) <|constrain|>json on analysis does NOT trigger <|call|>-termination;
+  // body ends at <|end|>.
+  it("(25) <|constrain|>json on analysis does not trigger call-termination", () => {
+    const chunks = [
+      "<|channel|>analysis<|constrain|>json<|message|>Thinking.<|end|>",
+      "<|start|>assistant<|channel|>final<|message|>The final answer.<|return|>",
+    ];
+    const result = collapseOpenAISSE(openAIHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Thinking.");
+    expect(result.content).toBe("The final answer.");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  // (26) RECIPIENT_RE: to=functions.- or to=functions. -> NOT a recipient ->
+  // non-tool body (no {name:"-"} call). A commentary message without a valid
+  // recipient is a preamble -> content.
+  it("(26) to=functions.- / to=functions. are not recipients (no bogus tool call)", () => {
+    const dash =
+      "<|start|>assistant<|channel|>commentary to=functions.-<|message|>preamble dash<|end|>";
+    const directDash = parseHarmonyContent(dash);
+    expect(directDash.failed).toBe(false);
+    expect(directDash.toolCalls).toEqual([]);
+    expect(directDash.content).toBe("preamble dash");
+
+    const empty =
+      "<|start|>assistant<|channel|>commentary to=functions.<|message|>preamble empty<|end|>";
+    const directEmpty = parseHarmonyContent(empty);
+    expect(directEmpty.failed).toBe(false);
+    expect(directEmpty.toolCalls).toEqual([]);
+    expect(directEmpty.content).toBe("preamble empty");
+  });
+});
+
+describe("harmony rewrite — UNTERMINATED / MALFORMED", () => {
+  // (27) commentary to=functions.x message {"a":1} with NO <|call|> -> fail-safe
+  // verbatim + signal.
+  it("(27) commentary tool body with valid JSON but no <|call|> fails safe", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.x<|constrain|>json<|message|>{"a":1}';
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+  });
+
+  // (28) <|channel|> with no following <|message|> -> fail-safe verbatim.
+  it("(28) <|channel|> with no following <|message|> fails safe", () => {
+    const raw = "<|channel|>analysis no message here";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+  });
+
+  // (29) <|start|>assistant with no channel/message -> fail-safe verbatim.
+  it("(29) <|start|>assistant with no channel/message fails safe", () => {
+    const raw = "<|start|>assistant";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+  });
+});
+
+describe("harmony rewrite — OBSERVABILITY / ENCODING", () => {
+  // (30) a firstDroppedSample-style 200-unit sample whose boundary splits a
+  // surrogate pair must be valid UTF-16 (no lone surrogate). This pins the
+  // surrogate-safe slicing of diagnostic samples.
+  it("(30) a 200-unit diagnostic sample never ends on a lone surrogate", () => {
+    // The diagnostic note slices the FULL content (which begins with the
+    // harmony prefix), not just the filler — so the emoji must be positioned
+    // relative to the prefix length to land its HIGH surrogate exactly at the
+    // 200-unit slice boundary (UTF-16 index 199). Otherwise the slice boundary
+    // never splits the pair and the surrogate-trim branch is never exercised.
+    const prefix = "<|start|>assistant<|channel|>commentary to=functions.s<|message|>{bad ";
+    // filler length = 199 - prefix.length puts the emoji's high surrogate at
+    // UTF-16 index 199 and its low surrogate at index 200.
+    const filler = "x".repeat(199 - prefix.length);
+    const content = `${prefix}${filler}😀 trailing`;
+
+    // Sanity: the raw (un-trimmed) 200-unit slice MUST end on a lone high
+    // surrogate, proving this test actually exercises the trim branch and is
+    // not trivially green.
+    const rawSlice = content.slice(0, 200);
+    const rawLast = rawSlice.charCodeAt(rawSlice.length - 1);
+    expect(rawLast >= 0xd800 && rawLast <= 0xdbff).toBe(true);
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([content]));
+    // Harmony failed -> content preserved verbatim, signal set.
+    expect(result.content).toBe(content);
+    expect(result.harmonyUnparsed).toBe(true);
+
+    // The diagnostic note is always present alongside harmonyUnparsed, and its
+    // surrogate-safe slice must contain NO lone surrogate code unit anywhere.
+    expect(result.harmonyNote).toBeDefined();
+    const note = result.harmonyNote!;
+    for (let k = 0; k < note.length; k++) {
+      const unit = note.charCodeAt(k);
+      if (unit >= 0xd800 && unit <= 0xdbff) {
+        // High surrogate: the next unit MUST be a low surrogate.
+        const next = note.charCodeAt(k + 1);
+        expect(next >= 0xdc00 && next <= 0xdfff).toBe(true);
+        k++; // skip the paired low surrogate
+      } else if (unit >= 0xdc00 && unit <= 0xdfff) {
+        // A low surrogate not preceded by a high surrogate is unpaired.
+        throw new Error(`lone low surrogate at index ${k} in harmonyNote`);
+      }
+    }
+  });
+});
+
+describe("harmony rewrite — Ollama NDJSON parity", () => {
+  // Fallback-only + fail-safe + whitespace parity for the Ollama path.
+  it("parses analysis + commentary tool + final over Ollama with separators", () => {
+    const chunks = [
+      "<|channel|>analysis<|message|>Plan.<|end|>",
+      "\n",
+      '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"x"}<|call|>',
+      "\n",
+      "<|start|>assistant<|channel|>final<|message|>Result.<|return|>",
+    ];
+    const result = collapseOllamaNDJSON(ollamaHarmonyBody(chunks));
+    expect(result.reasoning).toBe("Plan.");
+    expect(result.content).toBe("Result.");
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' });
+  });
+
+  it("prose mention over Ollama is content verbatim (no destruction)", () => {
+    const prose = "Harmony uses `<|channel|>` then `<|message|>` for the body";
+    const result = collapseOllamaNDJSON(ollamaHarmonyBody([prose]));
+    expect(result.content).toBe(prose);
+    expect(result.toolCalls).toBeUndefined();
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  it("structured Ollama tool_calls win over harmony content (fallback-only)", () => {
+    const harmony =
+      '<|start|>assistant<|channel|>commentary to=functions.harmony_tool<|constrain|>json<|message|>{"z":9}<|call|>';
+    const body = [
+      JSON.stringify({
+        model: "gpt-oss",
+        message: {
+          role: "assistant",
+          content: harmony,
+          tool_calls: [{ function: { name: "structured_tool", arguments: '{"s":1}' } }],
+        },
+        done: false,
+      }),
+      JSON.stringify({ model: "gpt-oss", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+    const result = collapseOllamaNDJSON(body);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("structured_tool");
+    expect(result.toolCalls!.some((tc) => tc.name === "harmony_tool")).toBe(false);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  it("unparseable harmony over Ollama fails safe via harmonyUnparsed", () => {
+    const raw =
+      "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid";
+    const result = collapseOllamaNDJSON(ollamaHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+});
+
+// ===========================================================================
+// Harmony fail-safe UNIFORMITY at body boundaries — regressions for the three
+// holes a 7-agent review found in the documented all-or-nothing contract:
+// "any grammar deviation -> return ORIGINAL content verbatim, failed:true,
+// never silently strip/mangle; never leak a control token into content/
+// reasoning." Each test below pins a boundary case where the old code accepted
+// (failed:false) while leaking a control token or dropping data.
+// ===========================================================================
+
+describe("harmony fail-safe — body terminator followed by trailing junk (B-A1)", () => {
+  // A real terminator followed by NON-whitespace text that is NOT a real
+  // message start is a grammar deviation: the stream neither cleanly ends nor
+  // continues with another message. The OLD code absorbed the terminator
+  // literal into the body and kept scanning to EOF, leaking "<|return|> junk"
+  // into content. Correct behavior: uniform fail-safe (verbatim + failed:true).
+  it("final<|return|> followed by trailing junk fails safe (no token leak)", () => {
+    const raw = "<|channel|>final<|message|>Answer.<|return|> junk";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+    expect(direct.reasoning).toBe("");
+
+    const result = collapseOpenAISSE(openAIHarmonyBody([raw]));
+    expect(result.content).toBe(raw);
+    expect(result.harmonyUnparsed).toBe(true);
+    expect(result.droppedChunks ?? 0).toBe(0);
+    expect(result.truncated).toBeUndefined();
+  });
+
+  it("analysis<|end|> followed by trailing junk fails safe (no token leak)", () => {
+    const raw = "<|channel|>analysis<|message|>thinking<|end|>junk";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony fail-safe — unterminated NON-final body at EOF (B-A3)", () => {
+  // The grammar says "EOF terminates the FINAL message only." An unterminated
+  // analysis (reasoning) body at EOF is a grammar deviation — analysis bodies
+  // are terminator-expecting (<|end|>). The OLD code accepted it (failed:false)
+  // and surfaced dangling reasoning. Correct behavior: fail-safe verbatim.
+  it("unterminated analysis body at EOF fails safe", () => {
+    const raw = "<|channel|>analysis<|message|>dangling reasoning";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.reasoning).toBe("");
+    expect(direct.toolCalls).toEqual([]);
+  });
+
+  // A final body at EOF (no terminator) is still legitimately accepted — EOF
+  // terminates the final message. This guards against over-failing B-A3.
+  it("unterminated FINAL body at EOF is still accepted (EOF terminates final)", () => {
+    const raw = "<|channel|>final<|message|>the answer with no terminator";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("the answer with no terminator");
+    expect(direct.reasoning).toBe("");
+  });
+
+  // A legitimate analysis-followed-by-final stream where the analysis body IS
+  // terminated (by <|end|>) and only the final trails to EOF must still parse —
+  // the analysis terminator is present, so B-A3 must not fire on it.
+  it("analysis<|end|> + final-to-EOF still parses (analysis is terminated)", () => {
+    const raw =
+      "<|channel|>analysis<|message|>reasoning here<|end|><|start|>assistant<|channel|>final<|message|>final answer";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.reasoning).toBe("reasoning here");
+    expect(direct.content).toBe("final answer");
+  });
+});
+
+describe("harmony fail-safe — commentary tool body vs message boundary (B-A2)", () => {
+  // (a) A tool arg that is a CLOSED JSON string which legitimately CONTAINS
+  // literal harmony tokens (<|start|>...<|message|>...) is valid data: the
+  // correct parse is ONE tool call whose argument is that string. This is the
+  // SAME mechanism that preserves embedded <|call|>/<|channel|> substrings in
+  // JSON args (matrix 13/14). PIN this as correct — guards against a wrong fix.
+  it("(a) closed JSON arg containing literal harmony tokens -> one tool call", () => {
+    const arg = JSON.stringify({
+      instruction:
+        "emit <|start|>assistant<|channel|>commentary to=functions.x<|message|>nested<|call|>",
+    });
+    const raw = `<|start|>assistant<|channel|>commentary to=functions.outer<|constrain|>json<|message|>${arg}<|call|>`;
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(false);
+    expect(direct.toolCalls).toHaveLength(1);
+    expect(direct.toolCalls[0].name).toBe("outer");
+    expect(direct.toolCalls[0].arguments).toBe(arg);
+    expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow();
+  });
+
+  // (b) An UNTERMINATED tool call: a valid-JSON tool body followed by a real
+  // next message but with NO closing <|call|> for the first tool call. This
+  // must NOT silently merge/drop — it must fail safe verbatim.
+  it("(b) tool body with no <|call|> before a real next message fails safe", () => {
+    const raw =
+      '<|start|>assistant<|channel|>commentary to=functions.first<|constrain|>json<|message|>{"a":1}' +
+      "<|start|>assistant<|channel|>final<|message|>answer<|return|>";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.toolCalls).toEqual([]);
+  });
+});
+
+describe("harmony fail-safe — quoted whole-message ambiguity (known limitation)", () => {
+  // A body that QUOTES a complete well-formed harmony message is structurally
+  // indistinguishable from two real messages in detokenized TEXT. When the
+  // resulting split yields cleanly well-formed messages, the parser accepts it
+  // (the quoted tokens are stripped) — this is the irreducible, documented
+  // KNOWN LIMITATION. We PIN the acknowledged-imperfect behavior here so it is
+  // a conscious choice, not a silent regression target.
+  it("quoting a clean complete message splits into well-formed messages (documented)", () => {
+    const raw =
+      "<|channel|>final<|message|>To emit write " +
+      "<|start|>assistant<|channel|>final<|message|>hello<|return|>";
+    const direct = parseHarmonyContent(raw);
+    // Acknowledged-imperfect: parsed as two final messages; quoted tokens gone.
+    expect(direct.failed).toBe(false);
+    expect(direct.content).toBe("To emit write hello");
+    // Whatever the outcome, no control token ever leaks into the output.
+    expect(direct.content).not.toContain("<|");
+    expect(direct.reasoning).not.toContain("<|");
+  });
+
+  // The fail-safe edge of the same ambiguity: when the quoted message is
+  // followed by trailing junk, the split would yield a MALFORMED message, so
+  // the WHOLE input fails safe verbatim rather than emitting a mangled middle.
+  // This guarantees the behavior is always verbatim-or-clean, never mangled.
+  it("quoting a message followed by trailing junk fails safe verbatim (no mangle)", () => {
+    const raw =
+      "<|channel|>final<|message|>To emit write " +
+      "<|start|>assistant<|channel|>final<|message|>hello<|return|> and then stop";
+    const direct = parseHarmonyContent(raw);
+    expect(direct.failed).toBe(true);
+    expect(direct.content).toBe(raw);
+    expect(direct.content).not.toBe("To emit write hello<|return|> and then stop");
+  });
+});
diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts
index 0cd1dcb..5ed64af 100644
--- a/src/stream-collapse.ts
+++ b/src/stream-collapse.ts
@@ -10,6 +10,7 @@
 import { crc32 } from "node:zlib";
 import type { RecordProviderKey, ToolCall } from "./types.js";
 import type { Logger } from "./logger.js";
+import { isHarmonyContent, parseHarmonyContent } from "./harmony.js";
 
 // ---------------------------------------------------------------------------
 // Result type shared by all collapse functions
@@ -25,6 +26,79 @@ export interface CollapseResult {
   truncated?: boolean;
   audioB64?: string;
   audioMimeType?: string;
+  /**
+   * Set when harmony channel tokens were present in the accumulated content but
+   * could NOT be parsed into a complete, valid harmony structure. The content
+   * is preserved VERBATIM, so this is NOT transport loss — it is distinct from
+   * `droppedChunks` / `truncated`, which are reserved for genuine transport loss
+   * (malformed SSE/NDJSON frames, CRC mismatch). The caller surfaces this as a
+   * dedicated warning rather than a dropped/truncated-chunk warning.
+   */
+  harmonyUnparsed?: true;
+  /** Short human-readable note accompanying {@link harmonyUnparsed}. */
+  harmonyNote?: string;
+}
+
+/**
+ * Slice the first `max` UTF-16 code units of `s` for a diagnostic sample,
+ * trimming a trailing lone high-surrogate so the resulting sample never ends on
+ * a lone high surrogate (i.e. never mid-surrogate-pair).
+ */
+function surrogateSafeSlice(s: string, max: number): string {
+  let out = s.slice(0, max);
+  if (out.length > 0) {
+    const last = out.charCodeAt(out.length - 1);
+    // A high surrogate (U+D800..U+DBFF) at the end is the lead of a split pair.
+    if (last >= 0xd800 && last <= 0xdbff) {
+      out = out.slice(0, -1);
+    }
+  }
+  return out;
+}
+
+/**
+ * Split a raw SSE body into per-event blocks.
+ *
+ * Events are delimited by a blank line. Real HTTP/SSE transports use CRLF
+ * (`\r\n`) line endings, so the inter-event delimiter is `\r\n\r\n` (which
+ * contains no `\n\n` substring) and each line ends with a trailing `\r`.
+ * Splitting on `/\r?\n\r?\n/` handles LF, CRLF, and mixed streams; per-line
+ * `\r` trimming happens in {@link splitSSELines}. Blank blocks are dropped.
+ */
+function splitSSEEvents(body: string): string[] {
+  return body.split(/\r?\n\r?\n/).filter((block) => block.trim().length > 0);
+}
+
+/**
+ * Split a single SSE event block into its lines, trimming a trailing `\r` so
+ * CRLF streams parse identically to LF streams.
+ */
+function splitSSELines(block: string): string[] {
+  return block.split("\n").map((line) => (line.endsWith("\r") ? line.slice(0, -1) : line));
+}
+
+/**
+ * Extract the SSE `data` field from a single event block's lines.
+ *
+ * Per the SSE spec a single event may carry MULTIPLE `data:` lines; the field
+ * value is every data line's content joined with "\n". Collecting only the
+ * first `data:` line (e.g. via `.find`) corrupts payloads that a server split
+ * across lines. Callers MUST pass lines produced by {@link splitSSELines} so
+ * any trailing `\r` is already stripped. Returns the joined payload (with the
+ * leading "data:" prefix and one optional leading space stripped per line), or
+ * `undefined` when the block contains no `data:` line.
+ */
+function extractSSEData(lines: string[]): string | undefined {
+  const dataParts: string[] = [];
+  for (const line of lines) {
+    if (!line.startsWith("data:")) continue;
+    // Strip "data:" then a single optional leading space, per the SSE spec.
+    let part = line.slice(5);
+    if (part.startsWith(" ")) part = part.slice(1);
+    dataParts.push(part);
+  }
+  if (dataParts.length === 0) return undefined;
+  return dataParts.join("\n");
 }
 
 // ---------------------------------------------------------------------------
@@ -39,19 +113,30 @@ export interface CollapseResult {
  *   data: [DONE]\n\n
  */
 export function collapseOpenAISSE(body: string): CollapseResult {
-  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  const lines = splitSSEEvents(body);
   let content = "";
   let reasoning = "";
   const webSearchQueries: string[] = [];
   let droppedChunks = 0;
   let firstDroppedSample: string | undefined;
+  let harmonyUnparsed = false;
+  let harmonyNote: string | undefined;
   const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+  // Fallback keying for deltas that OMIT `index`. Without this, every
+  // index-less delta collapses under one `undefined`/NaN key, merging distinct
+  // tool calls and corrupting arguments. Index-less fragments that share an
+  // `id` correlate via `idKeyMap`; otherwise each gets a fresh synthetic key
+  // assigned from a counter kept above any real index so sort order is stable.
+  // The 1_000_000 sentinel assumes real provider tool-call indices stay below
+  // it (they are small per-stream counters), so synthetic keys never collide.
+  let nextSyntheticIndex = 1_000_000;
+  const idKeyMap = new Map<string, number>();
 
   for (const line of lines) {
-    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
-    if (!dataLine) continue;
+    const data = extractSSEData(splitSSELines(line));
+    if (data === undefined) continue;
 
-    const payload = dataLine.slice(5).trim();
+    const payload = data.trim();
     if (payload === "[DONE]") continue;
 
     let parsed: Record<string, unknown>;
@@ -61,7 +146,7 @@ export function collapseOpenAISSE(body: string): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`;
       }
       continue;
     }
@@ -118,12 +203,30 @@ export function collapseOpenAISSE(body: string): CollapseResult {
     const toolCalls = delta.tool_calls as Array<Record<string, unknown>> | undefined;
     if (toolCalls) {
       for (const tc of toolCalls) {
-        const index = tc.index as number;
         const fn = tc.function as Record<string, unknown> | undefined;
+        const rawId = typeof tc.id === "string" ? tc.id : undefined;
+
+        // Resolve a stable map key. Prefer the streamed `index`; when it is
+        // absent, correlate by `id` if present, else mint a fresh synthetic
+        // key so distinct index-less calls never merge.
+        let index: number;
+        if (typeof tc.index === "number") {
+          index = tc.index;
+        } else if (rawId !== undefined) {
+          const existing = idKeyMap.get(rawId);
+          if (existing !== undefined) {
+            index = existing;
+          } else {
+            index = nextSyntheticIndex++;
+            idKeyMap.set(rawId, index);
+          }
+        } else {
+          index = nextSyntheticIndex++;
+        }
 
         if (!toolCallMap.has(index)) {
           toolCallMap.set(index, {
-            id: (tc.id as string) ?? "",
+            id: rawId ?? "",
             name: (fn?.name as string) ?? "",
             arguments: "",
           });
@@ -143,17 +246,54 @@ export function collapseOpenAISSE(body: string): CollapseResult {
     }
   }
 
-  if (toolCallMap.size > 0) {
+  // Open-weight gpt-oss models (Ollama / vLLM / OpenRouter) stream tool calls
+  // as raw harmony channel tokens inside delta.content rather than structured
+  // delta.tool_calls. Harmony parsing is FALLBACK-ONLY: attempt it ONLY when
+  // there are NO structured delta.tool_calls. If structured tool calls exist,
+  // any harmony-looking content is prose — never merged (no phantom tool call),
+  // never stamped as truncated/dropped. When harmony IS the only source, a
+  // successful parse routes channels (content/reasoning/toolCalls); a failure
+  // preserves content VERBATIM and surfaces the distinct `harmonyUnparsed`
+  // signal (NOT droppedChunks/truncated — the bytes are not lost).
+  const harmonyToolCalls: ToolCall[] = [];
+  if (toolCallMap.size === 0 && isHarmonyContent(content)) {
+    const parsed = parseHarmonyContent(content);
+    if (parsed.failed) {
+      harmonyUnparsed = true;
+      harmonyNote = `harmony tokens present but unparseable; content preserved verbatim: ${surrogateSafeSlice(content, 200)}`;
+    } else {
+      content = parsed.content;
+      if (parsed.reasoning) {
+        reasoning += parsed.reasoning;
+      }
+      harmonyToolCalls.push(...parsed.toolCalls);
+    }
+  }
+
+  if (toolCallMap.size > 0 || harmonyToolCalls.length > 0) {
     const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
     return {
       ...(content ? { content } : {}),
-      toolCalls: sorted.map(([, tc]) => ({
-        name: tc.name,
-        arguments: tc.arguments,
-        ...(tc.id ? { id: tc.id } : {}),
-      })),
+      // Fallback-only: harmonyToolCalls are populated ONLY in the
+      // no-structured-calls branch, so this is never a merge of both sources.
+      toolCalls: [
+        ...sorted.map(([, tc]) => ({
+          name: tc.name,
+          arguments: tc.arguments,
+          ...(tc.id ? { id: tc.id } : {}),
+        })),
+        ...harmonyToolCalls,
+      ],
+      // Reasoning is preserved alongside tool calls for ALL structured streams
+      // (DeepSeek/OpenRouter reasoning_content, harmony analysis channel), at
+      // parity with every other collapser and the non-streaming path.
+      ...(reasoning ? { reasoning } : {}),
+      // webSearches parity with the text-only return branch.
+      ...(webSearchQueries.length > 0 ? { webSearches: webSearchQueries } : {}),
       ...(droppedChunks > 0 ? { droppedChunks } : {}),
       ...(firstDroppedSample ? { firstDroppedSample } : {}),
+      ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}),
+      ...(harmonyNote ? { harmonyNote } : {}),
     };
   }
 
@@ -163,6 +303,8 @@ export function collapseOpenAISSE(body: string): CollapseResult {
     ...(webSearchQueries.length > 0 ? { webSearches: webSearchQueries } : {}),
     ...(droppedChunks > 0 ? { droppedChunks } : {}),
     ...(firstDroppedSample ? { firstDroppedSample } : {}),
+    ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}),
+    ...(harmonyNote ? { harmonyNote } : {}),
   };
 }
 
@@ -178,21 +320,32 @@ export function collapseOpenAISSE(body: string): CollapseResult {
  *   event: content_block_delta\ndata: {"delta":{"type":"text_delta","text":"Hello"}}\n\n
  */
 export function collapseAnthropicSSE(body: string): CollapseResult {
-  const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
+  const blocks = splitSSEEvents(body);
   let content = "";
   let reasoning = "";
   let droppedChunks = 0;
   let firstDroppedSample: string | undefined;
   const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+  // Fallback keying for content blocks that OMIT `index` (mirrors the OpenAI /
+  // Cohere / Bedrock guards). Without it, every index-less block collapses
+  // under one `undefined` key, merging distinct tool_use blocks. Index-less
+  // starts mint a fresh synthetic key (kept above any real index so sort order
+  // is stable). Despite its name, `lastSyntheticIndex` tracks whichever
+  // tool_use start most recently opened REGARDLESS of whether its index was
+  // real or synthetic (it is set on EVERY start), so an index-less delta
+  // correlates to the most-recent start — not just to the last synthetic one.
+  // The 1_000_000 sentinel assumes real provider indices stay below it.
+  let nextSyntheticIndex = 1_000_000;
+  let lastSyntheticIndex: number | undefined;
 
   for (const block of blocks) {
-    const lines = block.split("\n");
+    const lines = splitSSELines(block);
     const eventLine = lines.find((l) => l.startsWith("event:"));
-    const dataLine = lines.find((l) => l.startsWith("data:"));
-    if (!dataLine) continue;
+    const data = extractSSEData(lines);
+    if (data === undefined) continue;
 
     const eventType = eventLine ? eventLine.slice(6).trim() : "";
-    const payload = dataLine.slice(5).trim();
+    const payload = data.trim();
 
     let parsed: Record<string, unknown>;
     try {
@@ -201,15 +354,23 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`;
       }
       continue;
     }
 
     if (eventType === "content_block_start") {
-      const index = parsed.index as number;
       const contentBlock = parsed.content_block as Record<string, unknown> | undefined;
       if (contentBlock?.type === "tool_use") {
+        // Prefer the streamed `index`; when absent, mint a fresh synthetic key
+        // so distinct index-less tool_use blocks never merge.
+        let index: number;
+        if (typeof parsed.index === "number") {
+          index = parsed.index;
+        } else {
+          index = nextSyntheticIndex++;
+        }
+        lastSyntheticIndex = index;
         toolCallMap.set(index, {
           id: (contentBlock.id as string) ?? "",
           name: (contentBlock.name as string) ?? "",
@@ -219,7 +380,6 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
     }
 
     if (eventType === "content_block_delta") {
-      const index = parsed.index as number;
       const delta = parsed.delta as Record<string, unknown> | undefined;
       if (!delta) continue;
 
@@ -232,9 +392,24 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
       }
 
       if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") {
-        const entry = toolCallMap.get(index);
+        // Use the streamed `index` when present; otherwise correlate to the
+        // most recent tool_use start (mirrors the start-side fallback).
+        const index = typeof parsed.index === "number" ? parsed.index : lastSyntheticIndex;
+        // A delta that cannot correlate to any known start (no streamed index
+        // AND no prior start, or a stale index with no entry) would otherwise
+        // silently lose its args. Account for it as a dropped chunk instead of
+        // vanishing (mirrors the Cohere uncorrelated-delta path).
+        const entry = index !== undefined ? toolCallMap.get(index) : undefined;
         if (entry) {
           entry.arguments += delta.partial_json;
+        } else {
+          droppedChunks++;
+          if (droppedChunks === 1) {
+            firstDroppedSample = `input_json_delta with no correlating tool_use start: ${surrogateSafeSlice(
+              payload,
+              200,
+            )}`;
+          }
         }
       }
     }
@@ -274,7 +449,7 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
  *   data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}\n\n
  */
 export function collapseGeminiSSE(body: string): CollapseResult {
-  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  const lines = splitSSEEvents(body);
   let content = "";
   let reasoning = "";
   let droppedChunks = 0;
@@ -284,10 +459,10 @@ export function collapseGeminiSSE(body: string): CollapseResult {
   const toolCalls: ToolCall[] = [];
 
   for (const line of lines) {
-    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
-    if (!dataLine) continue;
+    const data = extractSSEData(splitSSELines(line));
+    if (data === undefined) continue;
 
-    const payload = dataLine.slice(5).trim();
+    const payload = data.trim();
 
     let parsed: Record<string, unknown>;
     try {
@@ -296,7 +471,7 @@ export function collapseGeminiSSE(body: string): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`;
       }
       continue;
     }
@@ -315,7 +490,12 @@ export function collapseGeminiSSE(body: string): CollapseResult {
         const fc = part.functionCall as Record<string, unknown>;
         toolCalls.push({
           name: String(fc.name ?? ""),
-          arguments: typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args),
+          // Default undefined/object args to a JSON object string (matches
+          // collapseGeminiInteractionsSSE / Ollama). JSON.stringify(undefined)
+          // would otherwise yield the VALUE undefined, violating the
+          // ToolCall.arguments:string contract.
+          arguments:
+            typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args ?? {}),
         });
       } else if (
         part.inlineData &&
@@ -340,9 +520,15 @@ export function collapseGeminiSSE(body: string): CollapseResult {
   }
 
   if (audioB64) {
+    // Preserve any content / reasoning / tool calls accumulated in the same
+    // stream — a Gemini turn can interleave audio with text and functionCall
+    // parts, and the early return must not silently drop them.
     return {
       audioB64,
       audioMimeType,
+      ...(content ? { content } : {}),
+      ...(reasoning ? { reasoning } : {}),
+      ...(toolCalls.length > 0 ? { toolCalls } : {}),
       ...(droppedChunks > 0 ? { droppedChunks } : {}),
       ...(firstDroppedSample ? { firstDroppedSample } : {}),
     };
@@ -378,12 +564,20 @@ export function collapseGeminiSSE(body: string): CollapseResult {
  *
  * /api/generate format:
  *   {"model":"llama3","response":"Hello","done":false}\n
+ *
+ * Open-weight gpt-oss served via Ollama streams harmony channel tokens inside
+ * `message.content` (just like the OpenAI SSE path), so after accumulation the
+ * content is run through the same fail-safe {@link parseHarmonyContent} gate to
+ * capture structured tool calls / reasoning instead of leaking raw tokens.
  */
 export function collapseOllamaNDJSON(body: string): CollapseResult {
   const lines = body.split("\n").filter((l) => l.trim().length > 0);
   let content = "";
+  let reasoning = "";
   let droppedChunks = 0;
   let firstDroppedSample: string | undefined;
+  let harmonyUnparsed = false;
+  let harmonyNote: string | undefined;
   const toolCalls: ToolCall[] = [];
 
   for (const line of lines) {
@@ -394,7 +588,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${line.trim().slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(line.trim(), 200)}`;
       }
       continue;
     }
@@ -413,8 +607,13 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
           if (fn) {
             toolCalls.push({
               name: String(fn.name ?? ""),
+              // Default undefined/object args to a JSON object (matching
+              // collapseGeminiInteractionsSSE) — JSON.stringify(undefined)
+              // would otherwise yield the literal string "undefined".
               arguments:
-                typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments),
+                typeof fn.arguments === "string"
+                  ? fn.arguments
+                  : JSON.stringify(fn.arguments ?? {}),
             });
           }
         }
@@ -427,19 +626,46 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
     }
   }
 
+  // Open-weight gpt-oss served via Ollama streams harmony channel tokens inside
+  // message.content (same as the OpenAI SSE path). Harmony parsing is
+  // FALLBACK-ONLY: attempt it ONLY when there are NO structured message
+  // tool_calls. If structured tool calls exist, harmony-looking content is
+  // prose — never merged (no phantom), never stamped truncated/dropped. On a
+  // harmony failure the content is preserved VERBATIM and surfaced via the
+  // distinct `harmonyUnparsed` signal (NOT droppedChunks/truncated).
+  if (toolCalls.length === 0 && isHarmonyContent(content)) {
+    const parsedHarmony = parseHarmonyContent(content);
+    if (parsedHarmony.failed) {
+      harmonyUnparsed = true;
+      harmonyNote = `harmony tokens present but unparseable; content preserved verbatim: ${surrogateSafeSlice(content, 200)}`;
+    } else {
+      content = parsedHarmony.content;
+      if (parsedHarmony.reasoning) {
+        reasoning += parsedHarmony.reasoning;
+      }
+      toolCalls.push(...parsedHarmony.toolCalls);
+    }
+  }
+
   if (toolCalls.length > 0) {
     return {
       ...(content ? { content } : {}),
       toolCalls,
+      ...(reasoning ? { reasoning } : {}),
       ...(droppedChunks > 0 ? { droppedChunks } : {}),
       ...(firstDroppedSample ? { firstDroppedSample } : {}),
+      ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}),
+      ...(harmonyNote ? { harmonyNote } : {}),
     };
   }
 
   return {
     content,
+    ...(reasoning ? { reasoning } : {}),
     ...(droppedChunks > 0 ? { droppedChunks } : {}),
     ...(firstDroppedSample ? { firstDroppedSample } : {}),
+    ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}),
+    ...(harmonyNote ? { harmonyNote } : {}),
   };
 }
 
@@ -454,20 +680,30 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
  *   event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n
  */
 export function collapseCohereSSE(body: string): CollapseResult {
-  const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
+  const blocks = splitSSEEvents(body);
   let content = "";
   let droppedChunks = 0;
   let firstDroppedSample: string | undefined;
   const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+  // Fallback keying for tool-call events that OMIT `index` (mirrors the
+  // OpenAI guard). Without it, every index-less tool-call-start collapses
+  // under one `undefined`/NaN key, merging distinct calls. Index-less starts
+  // mint a fresh synthetic key. `lastStartKey` tracks the most-recent
+  // tool-call-start key REGARDLESS of whether it was real or synthetic, so an
+  // index-less tool-call-delta correlates to whichever start most recently
+  // opened — not just to the last synthetic one. The 1_000_000 sentinel
+  // assumes real provider indices stay below it.
+  let nextSyntheticIndex = 1_000_000;
+  let lastStartKey: number | undefined;
 
   for (const block of blocks) {
-    const lines = block.split("\n");
+    const lines = splitSSELines(block);
     const eventLine = lines.find((l) => l.startsWith("event:"));
-    const dataLine = lines.find((l) => l.startsWith("data:"));
-    if (!dataLine) continue;
+    const data = extractSSEData(lines);
+    if (data === undefined) continue;
 
     const eventType = eventLine ? eventLine.slice(6).trim() : "";
-    const payload = dataLine.slice(5).trim();
+    const payload = data.trim();
 
     let parsed: Record<string, unknown>;
     try {
@@ -476,7 +712,7 @@ export function collapseCohereSSE(body: string): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`;
       }
       continue;
     }
@@ -491,7 +727,15 @@ export function collapseCohereSSE(body: string): CollapseResult {
     }
 
     if (eventType === "tool-call-start") {
-      const index = parsed.index as number;
+      let index: number;
+      if (typeof parsed.index === "number") {
+        index = parsed.index;
+      } else {
+        index = nextSyntheticIndex++;
+      }
+      // Track the most-recent start key (real OR synthetic) so a following
+      // index-less delta correlates to whichever call just opened.
+      lastStartKey = index;
       const delta = parsed.delta as Record<string, unknown> | undefined;
       const message = delta?.message as Record<string, unknown> | undefined;
       const toolCalls = message?.tool_calls as Record<string, unknown> | undefined;
@@ -506,16 +750,29 @@ export function collapseCohereSSE(body: string): CollapseResult {
     }
 
     if (eventType === "tool-call-delta") {
-      const index = parsed.index as number;
+      // Use the streamed `index` when present; otherwise correlate to the most
+      // recent tool-call-start (real or synthetic key).
+      const index = typeof parsed.index === "number" ? parsed.index : lastStartKey;
       const delta = parsed.delta as Record<string, unknown> | undefined;
       const message = delta?.message as Record<string, unknown> | undefined;
       const toolCalls = message?.tool_calls as Record<string, unknown> | undefined;
       if (toolCalls) {
         const fn = toolCalls.function as Record<string, unknown> | undefined;
         if (fn && typeof fn.arguments === "string") {
-          const entry = toolCallMap.get(index);
+          // A delta that cannot correlate to any known start (no streamed
+          // index AND no prior start) would otherwise silently lose its args.
+          // Account for it as a dropped chunk instead of vanishing.
+          const entry = index !== undefined ? toolCallMap.get(index) : undefined;
           if (entry) {
             entry.arguments += fn.arguments;
+          } else {
+            droppedChunks++;
+            if (droppedChunks === 1) {
+              firstDroppedSample = `tool-call-delta with no correlating start: ${surrogateSafeSlice(
+                payload,
+                200,
+              )}`;
+            }
           }
         }
       }
@@ -586,26 +843,55 @@ function decodeEventStreamFrames(buf: Buffer): {
     // Parse headers
     const headersStart = offset + 12;
     const headersEnd = headersStart + headersLength;
+    const payloadEnd = offset + totalLength - 4; // minus message CRC
+
+    // Validate the headers region fits inside the frame. A frame can carry a
+    // valid prelude CRC yet declare a `headersLength` that overruns the payload
+    // region (the prelude CRC only covers total/headers length, not the body).
+    // Without this guard a per-header read walks off the buffer and throws an
+    // uncaught RangeError; treat it as truncation instead.
+    if (headersEnd > payloadEnd || headersEnd > buf.length) {
+      return { frames, truncated: true };
+    }
+
     const headers: Record<string, string> = {};
     let hOffset = headersStart;
+    let headerOverrun = false;
 
     while (hOffset < headersEnd) {
+      // Each read must stay within the declared headers region. Bail out
+      // (truncated) on any overrun rather than reading past the boundary.
+      if (hOffset + 1 > headersEnd) {
+        headerOverrun = true;
+        break;
+      }
       const nameLen = buf.readUInt8(hOffset);
       hOffset += 1;
+      if (hOffset + nameLen + 1 + 2 > headersEnd) {
+        headerOverrun = true;
+        break;
+      }
       const name = buf.subarray(hOffset, hOffset + nameLen).toString("utf8");
       hOffset += nameLen;
       // Skip header type byte (type 7 = STRING)
       hOffset += 1;
       const valueLen = buf.readUInt16BE(hOffset);
       hOffset += 2;
+      if (hOffset + valueLen > headersEnd) {
+        headerOverrun = true;
+        break;
+      }
       const value = buf.subarray(hOffset, hOffset + valueLen).toString("utf8");
       hOffset += valueLen;
       headers[name] = value;
     }
 
+    if (headerOverrun) {
+      return { frames, truncated: true };
+    }
+
     // Extract payload
     const payloadStart = headersEnd;
-    const payloadEnd = offset + totalLength - 4; // minus message CRC
     const payload = buf.subarray(payloadStart, payloadEnd);
 
     // Validate message CRC (covers entire frame minus last 4 bytes)
@@ -644,7 +930,7 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${frameStr.slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(frameStr, 200)}`;
       }
       continue;
     }
@@ -657,9 +943,20 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
       }
       if (delta?.type === "input_json_delta" && typeof delta.partial_json === "string") {
         const index = parsed.index as number | undefined;
-        if (index !== undefined) {
-          const entry = toolCallMap.get(index);
-          if (entry) entry.arguments += delta.partial_json;
+        // An arg delta that cannot correlate to a known tool_use start would
+        // otherwise silently lose its args. Account for it as a dropped chunk
+        // instead of vanishing (mirrors the Cohere uncorrelated-delta path).
+        const entry = index !== undefined ? toolCallMap.get(index) : undefined;
+        if (entry) {
+          entry.arguments += delta.partial_json;
+        } else {
+          droppedChunks++;
+          if (droppedChunks === 1) {
+            firstDroppedSample = `input_json_delta with no correlating tool_use start: ${surrogateSafeSlice(
+              frameStr,
+              200,
+            )}`;
+          }
         }
       }
       continue;
@@ -712,10 +1009,21 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
       // Tool use input JSON delta
       if (typeof delta.toolUse === "object" && delta.toolUse !== null) {
         const toolUseDelta = delta.toolUse as Record<string, unknown>;
-        if (typeof toolUseDelta.input === "string" && index !== undefined) {
-          const entry = toolCallMap.get(index);
+        if (typeof toolUseDelta.input === "string") {
+          // An arg delta that cannot correlate to a known tool_use start would
+          // otherwise silently lose its args. Account for it as a dropped chunk
+          // instead of vanishing (mirrors the Cohere uncorrelated-delta path).
+          const entry = index !== undefined ? toolCallMap.get(index) : undefined;
           if (entry) {
             entry.arguments += toolUseDelta.input;
+          } else {
+            droppedChunks++;
+            if (droppedChunks === 1) {
+              firstDroppedSample = `toolUse.input delta with no correlating tool_use start: ${surrogateSafeSlice(
+                frameStr,
+                200,
+              )}`;
+            }
           }
         }
       }
@@ -756,7 +1064,7 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult {
  *   data: {"event_type":"interaction.complete","interaction":{"id":"...","usage":{...}}}\n\n
  */
 export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
-  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  const lines = splitSSEEvents(body);
   let content = "";
   let reasoning = "";
   let droppedChunks = 0;
@@ -764,10 +1072,10 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
   const toolCalls: ToolCall[] = [];
 
   for (const line of lines) {
-    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
-    if (!dataLine) continue;
+    const data = extractSSEData(splitSSELines(line));
+    if (data === undefined) continue;
 
-    const payload = dataLine.slice(5).trim();
+    const payload = data.trim();
 
     let parsed: Record<string, unknown>;
     try {
@@ -776,7 +1084,7 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult {
       droppedChunks++;
       if (droppedChunks === 1) {
         const msg = err instanceof Error ? err.message : "unknown";
-        firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`;
+        firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`;
       }
       continue;
     }

From b3a1d803ce3657415c2b81fb865de0cd032297dc Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Tue, 2 Jun 2026 13:27:56 -0700
Subject: [PATCH 3/5] fix: decode multibyte UTF-8 incrementally, harden
 recorder frame timing, and record audio companions

---
 .../recorder-multibyte-stream.test.ts         | 175 ++++++
 src/__tests__/recorder.test.ts                | 592 +++++++++++++++++-
 src/recorder.ts                               |  98 ++-
 src/types.ts                                  |  14 +
 4 files changed, 867 insertions(+), 12 deletions(-)
 create mode 100644 src/__tests__/recorder-multibyte-stream.test.ts

diff --git a/src/__tests__/recorder-multibyte-stream.test.ts b/src/__tests__/recorder-multibyte-stream.test.ts
new file mode 100644
index 0000000..3455ab4
--- /dev/null
+++ b/src/__tests__/recorder-multibyte-stream.test.ts
@@ -0,0 +1,175 @@
+import { describe, it, expect } from "vitest";
+import { StreamingFrameDecoder } from "../recorder.js";
+import { collapseOpenAISSE } from "../stream-collapse.js";
+
+// ---------------------------------------------------------------------------
+// Multibyte UTF-8 streaming decode — regression for fixture garbling.
+//
+// When aimock proxies a streamed upstream LLM response, makeUpstreamRequest
+// decodes each TCP chunk to text so it can split the byte stream into SSE /
+// NDJSON frames. A multibyte UTF-8 character (CJK, emoji, ...) can have its
+// bytes split across a TCP chunk boundary. Decoding each chunk independently
+// with Buffer#toString() turns the partial sequence into U+FFFD replacement
+// characters, corrupting the decoded frame text (a user reported garbage like
+// "官网群" in a recorded fixture).
+//
+// There are TWO decode paths and this file covers BOTH:
+//   1. The frame-TIMING path (StreamingFrameDecoder) — used when capturing
+//      per-frame arrival timestamps; it decodes incrementally as TCP chunks
+//      arrive, so it must buffer partial multibyte sequences across chunk
+//      boundaries itself. The first describe-block drives that decoder
+//      directly (the exact code makeUpstreamRequest uses), not a reimpl.
+//   2. The recorded-BODY path — the full upstream byte stream is buffered via
+//      Buffer.concat and decoded ONCE with rawBuffer.toString() before being
+//      handed to the collapse functions. The final describe-block drives that
+//      path end-to-end (split-chunk bytes -> concat -> toString -> collapse)
+//      to pin that the body a fixture is built from is also U+FFFD-free.
+// ---------------------------------------------------------------------------
+
+/**
+ * Split a UTF-8 buffer at an arbitrary byte offset, guaranteeing the cut lands
+ * inside a multibyte sequence (so naive per-chunk decode would mangle it).
+ */
+function splitMidCharacter(text: string): { first: Buffer; second: Buffer } {
+  const full = Buffer.from(text, "utf8");
+  // Locate the first non-ASCII (multibyte lead) byte and cut one byte past it,
+  // so the split lands inside the multibyte sequence.
+  let i = 0;
+  while (i < full.length && full[i] < 0x80) i++;
+  // Guard against ASCII-only misuse: without a multibyte lead byte the cut
+  // would not straddle a character and the test would be a degenerate no-op.
+  if (i >= full.length) {
+    throw new Error("splitMidCharacter: no multibyte lead byte found (ASCII-only input)");
+  }
+  const cut = i + 1; // one byte into the multibyte sequence
+  return { first: full.subarray(0, cut), second: full.subarray(cut) };
+}
+
+describe("StreamingFrameDecoder", () => {
+  it("reassembles a CJK character split across two chunks without U+FFFD", () => {
+    const original = 'data: {"delta":"官网群"}\n\n';
+    const { first, second } = splitMidCharacter(original);
+
+    // Sanity: the split really does straddle a multibyte boundary, so a naive
+    // per-chunk decode would corrupt it. This pins WHY the test is meaningful.
+    expect(first.toString() + second.toString()).toContain("�");
+
+    const decoder = new StreamingFrameDecoder();
+    let out = "";
+    out += decoder.write(first);
+    out += decoder.write(second);
+    out += decoder.end();
+
+    expect(out).toBe(original);
+    expect(out).not.toContain("�");
+  });
+
+  it("reassembles a 4-byte emoji split across two chunks without U+FFFD", () => {
+    const original = "data: 🎉🎉\n\n";
+    const { first, second } = splitMidCharacter(original);
+
+    expect(first.toString() + second.toString()).toContain("�");
+
+    const decoder = new StreamingFrameDecoder();
+    let out = "";
+    out += decoder.write(first);
+    out += decoder.write(second);
+    out += decoder.end();
+
+    expect(out).toBe(original);
+    expect(out).not.toContain("�");
+  });
+
+  it("handles a multibyte character split byte-by-byte across many chunks", () => {
+    const original = "官"; // 3 bytes: E5 AE 98
+    const full = Buffer.from(original, "utf8");
+    const decoder = new StreamingFrameDecoder();
+    let out = "";
+    for (const byte of full) {
+      out += decoder.write(Buffer.from([byte]));
+    }
+    out += decoder.end();
+    expect(out).toBe(original);
+    expect(out).not.toContain("�");
+  });
+
+  it("passes ASCII-only frames through unchanged", () => {
+    const decoder = new StreamingFrameDecoder();
+    let out = "";
+    out += decoder.write(Buffer.from("data: hello\n\n", "utf8"));
+    out += decoder.end();
+    expect(out).toBe("data: hello\n\n");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Recorded-BODY multibyte path — regression for fixture garbling on the
+// collapse path (NOT the frame-timing decoder above).
+//
+// On the non-timing capture path, makeUpstreamRequest accumulates the raw
+// upstream bytes and decodes the COMPLETE buffer once (Buffer.concat then
+// rawBuffer.toString()) before handing the text to a collapse function. Even
+// when a multibyte UTF-8 character is split across TCP chunk boundaries, the
+// concat-then-decode order means the body the fixture is built from must be
+// U+FFFD-free. This test drives that exact order through collapseOpenAISSE.
+// ---------------------------------------------------------------------------
+
+describe("recorded-body multibyte decode (Buffer.concat -> toString -> collapse)", () => {
+  /**
+   * Build an OpenAI SSE body containing the given content, then return its raw
+   * UTF-8 bytes split into two chunks at an offset that lands INSIDE a
+   * multibyte sequence (so a naive per-chunk decode would mangle it).
+   */
+  function sseBytesSplitMidCharacter(content: string): { first: Buffer; second: Buffer } {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+    const full = Buffer.from(body, "utf8");
+    // Cut one byte into the first multibyte (non-ASCII) sequence.
+    let i = 0;
+    while (i < full.length && full[i] < 0x80) i++;
+    // Guard against ASCII-only misuse: without a multibyte lead byte the cut
+    // would not straddle a character and the test would be a degenerate no-op.
+    if (i >= full.length) {
+      throw new Error(
+        "sseBytesSplitMidCharacter: no multibyte lead byte found (ASCII-only content)",
+      );
+    }
+    const cut = i + 1;
+    return { first: full.subarray(0, cut), second: full.subarray(cut) };
+  }
+
+  it("decodes a CJK body split across chunks with no U+FFFD via the collapse path", () => {
+    const content = "官网群 says hello 🎉";
+    const { first, second } = sseBytesSplitMidCharacter(content);
+
+    // Sanity: decoding the chunks INDEPENDENTLY would corrupt the text — this
+    // pins that the split really straddles a multibyte boundary (so the test
+    // is meaningful and not trivially green).
+    expect(first.toString() + second.toString()).toContain("�");
+
+    // The recorder's actual order: buffer all bytes, decode once, then collapse.
+    const rawBuffer = Buffer.concat([first, second]);
+    const decoded = rawBuffer.toString("utf8");
+    const result = collapseOpenAISSE(decoded);
+
+    expect(result.content).toBe(content);
+    expect(result.content).not.toContain("�");
+  });
+
+  it("decodes an emoji-only body split across chunks with no U+FFFD via the collapse path", () => {
+    const content = "🎉🎉🎉";
+    const { first, second } = sseBytesSplitMidCharacter(content);
+
+    expect(first.toString() + second.toString()).toContain("�");
+
+    const rawBuffer = Buffer.concat([first, second]);
+    const result = collapseOpenAISSE(rawBuffer.toString("utf8"));
+
+    expect(result.content).toBe(content);
+    expect(result.content).not.toContain("�");
+  });
+});
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
index 25ae6e6..4d777cd 100644
--- a/src/__tests__/recorder.test.ts
+++ b/src/__tests__/recorder.test.ts
@@ -5006,9 +5006,8 @@ describe("multi-call fixture disambiguation (issue #185)", () => {
     // The haiku fixture has systemHash metadata (from whichever call won)
     const haikuFixture = fixtures.find((f) => f.match.model === "claude-3-5-haiku")!;
     expect(haikuFixture).toBeDefined();
-    expect((haikuFixture as Record<string, unknown>).metadata).toBeDefined();
-    const meta = (haikuFixture as Record<string, unknown>).metadata as Record<string, unknown>;
-    expect(meta.systemHash).toMatch(/^[a-f0-9]{8}$/);
+    expect(haikuFixture.metadata).toBeDefined();
+    expect(haikuFixture.metadata!.systemHash).toMatch(/^[a-f0-9]{8}$/);
 
     // Cleanup
     await new Promise<void>((resolve) => recorderServer.server.close(() => resolve()));
@@ -5166,3 +5165,590 @@ describe("fixture metadata recording", () => {
     expect(hash1).not.toBe(hash2);
   });
 });
+
+// ---------------------------------------------------------------------------
+// webSearches propagation into the persisted fixture
+//
+// Drives a raw upstream that emits exactly the OpenAI Responses-API SSE shape
+// `collapseOpenAISSE` recognizes (a completed web_search_call), then exercises
+// `proxyAndRecord` end-to-end and asserts that the collapsed `webSearches` land
+// in the persisted fixture.
+// ---------------------------------------------------------------------------
+
+describe("recorder webSearches propagation", () => {
+  let rawServer: http.Server | undefined;
+
+  afterEach(async () => {
+    if (rawServer) {
+      await new Promise<void>((resolve) => rawServer!.close(() => resolve()));
+      rawServer = undefined;
+    }
+  });
+
+  // Spin up a raw upstream that replies with a fixed SSE body and a real recorder
+  // server pointed at it, then POST a streaming chat request through the recorder.
+  async function recordSse(sseBody: string): Promise<{
+    fixturePath: string;
+    response: { status: number; body: string };
+  }> {
+    rawServer = http.createServer((_upReq, upRes) => {
+      upRes.writeHead(200, { "Content-Type": "text/event-stream" });
+      upRes.end(sseBody);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve()));
+    const upstreamPort = (rawServer!.address() as { port: number }).port;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-collapse-prop-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { openai: `http://127.0.0.1:${upstreamPort}` },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const response = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "search the web" }],
+      stream: true,
+    });
+
+    return { fixturePath: tmpDir, response };
+  }
+
+  it("propagates webSearches from a collapsed Responses-API stream into the persisted fixture", async () => {
+    // OpenAI Responses-API SSE: a completed web_search_call followed by text output.
+    // collapseOpenAISSE returns { content, webSearches: ["..."] }.
+    const sse = [
+      'data: {"type":"response.output_item.done","item":{"type":"web_search_call","action":{"query":"weather in Paris"}}}',
+      'data: {"type":"response.output_text.delta","delta":"It is sunny in Paris."}',
+      "data: [DONE]",
+    ]
+      .map((l) => l + "\n\n")
+      .join("");
+
+    const { fixturePath } = await recordSse(sse);
+
+    const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"),
+    ) as FixtureFile;
+    const saved = fixtureContent.fixtures[0].response as {
+      content: string;
+      webSearches?: string[];
+    };
+    expect(saved.content).toBe("It is sunny in Paris.");
+    // The bug: webSearches was collapsed but never written to the fixture.
+    expect(saved.webSearches).toEqual(["weather in Paris"]);
+  });
+
+  it("propagates webSearches alongside tool calls into the persisted fixture", async () => {
+    // web_search_call + a structured tool call → collapsed result carries both
+    // toolCalls and webSearches; the fixture must retain webSearches in the
+    // tool-call branch too.
+    const sse = [
+      'data: {"type":"response.output_item.done","item":{"type":"web_search_call","action":{"query":"latest news"}}}',
+      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","function":{"name":"get_news","arguments":"{}"}}]}}]}',
+      "data: [DONE]",
+    ]
+      .map((l) => l + "\n\n")
+      .join("");
+
+    const { fixturePath } = await recordSse(sse);
+
+    const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"),
+    ) as FixtureFile;
+    const saved = fixtureContent.fixtures[0].response as {
+      toolCalls: unknown[];
+      webSearches?: string[];
+    };
+    expect(saved.toolCalls).toHaveLength(1);
+    expect(saved.webSearches).toEqual(["latest news"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Dropped-chunk diagnostic logging
+//
+// A malformed SSE frame is dropped during collapse; the collapser captures a
+// `firstDroppedSample` diagnostic. Assert that sample reaches the logged
+// dropped-chunk warning so the loss is actionable.
+// ---------------------------------------------------------------------------
+
+describe("recorder dropped-chunk diagnostic", () => {
+  let rawServer: http.Server | undefined;
+  let warnSpy: MockInstance | undefined;
+
+  afterEach(async () => {
+    warnSpy?.mockRestore();
+    warnSpy = undefined;
+    if (rawServer) {
+      await new Promise<void>((resolve) => rawServer!.close(() => resolve()));
+      rawServer = undefined;
+    }
+  });
+
+  it("logs firstDroppedSample alongside the dropped-chunk warning", async () => {
+    // A malformed data frame increments droppedChunks and sets firstDroppedSample.
+    const sse = [
+      'data: {"choices":[{"delta":{"content":"Hello"}}]}',
+      "data: {not valid json", // malformed → dropped, captured as the first sample
+      "data: [DONE]",
+    ]
+      .map((l) => l + "\n\n")
+      .join("");
+
+    // Capture warnings via a real logger instance (silent suppresses output, so
+    // spy on the instance method directly and run it through proxyAndRecord).
+    const logger = new Logger("warn");
+    warnSpy = vi.spyOn(logger, "warn").mockImplementation(() => {});
+
+    rawServer = http.createServer((_upReq, upRes) => {
+      upRes.writeHead(200, { "Content-Type": "text/event-stream" });
+      upRes.end(sse);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve()));
+    const upstreamPort = (rawServer!.address() as { port: number }).port;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-collapse-dropped-"));
+    const record: RecordConfig = {
+      providers: { openai: `http://127.0.0.1:${upstreamPort}` },
+      fixturePath: tmpDir,
+    };
+
+    const { req, res } = createMockReqRes();
+    Object.assign(res, {
+      writeHead: () => res,
+      write: () => true,
+      end: () => res,
+      setHeader: () => res,
+      flushHeaders: () => undefined,
+    });
+
+    await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-4", messages: [{ role: "user", content: "drop a chunk" }] },
+      "openai",
+      "/v1/chat/completions",
+      [],
+      { record, logger },
+    );
+
+    const warnings = warnSpy.mock.calls.map((c) => String(c[0]));
+    const droppedWarning = warnings.find((w) => w.includes("dropped during stream collapse"));
+    expect(droppedWarning).toBeDefined();
+    // The bug: the sample diagnostic was computed but never surfaced.
+    expect(droppedWarning).toContain("not valid json");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Gemini audio-branch companion-modality propagation
+//
+// A single Gemini turn can interleave inlineData audio with a functionCall (and
+// text/thought parts). collapseGeminiSSE returns audioB64 ALONGSIDE
+// toolCalls/content/reasoning, but the recorder audio branch historically built
+// only `{ audio: { b64Json, contentType } }` — silently discarding the tool
+// call. These tests drive the real record path against a raw Gemini SSE upstream
+// and assert the persisted fixture retains the companion modalities.
+// ---------------------------------------------------------------------------
+
+describe("recorder Gemini audio-branch propagation", () => {
+  let rawServer: http.Server | undefined;
+
+  afterEach(async () => {
+    if (rawServer) {
+      await new Promise<void>((resolve) => rawServer!.close(() => resolve()));
+      rawServer = undefined;
+    }
+  });
+
+  // Raw Gemini upstream emitting a fixed SSE body, fronted by a real recorder
+  // configured with the `gemini` provider key so collapseGeminiSSE runs.
+  async function recordGeminiSse(sseBody: string): Promise<{
+    fixturePath: string;
+    response: { status: number; body: string };
+  }> {
+    rawServer = http.createServer((_upReq, upRes) => {
+      upRes.writeHead(200, { "Content-Type": "text/event-stream" });
+      upRes.end(sseBody);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve()));
+    const upstreamPort = (rawServer!.address() as { port: number }).port;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-gemini-audio-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { gemini: `http://127.0.0.1:${upstreamPort}` },
+        fixturePath: tmpDir,
+      },
+    });
+
+    // Gemini streaming is the :streamGenerateContent endpoint with a Gemini-shaped
+    // request body (contents/parts), which routes to the `gemini` provider so
+    // collapseGeminiSSE runs over the upstream SSE.
+    const response = await post(
+      `${recorder.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      {
+        contents: [{ role: "user", parts: [{ text: "speak and call a tool" }] }],
+      },
+    );
+
+    return { fixturePath: tmpDir, response };
+  }
+
+  it("retains a functionCall in the persisted fixture when audio is also present", async () => {
+    // Gemini SSE interleaving inlineData audio with a functionCall part.
+    // collapseGeminiSSE returns { audioB64, audioMimeType, toolCalls }.
+    const sse = [
+      JSON.stringify({
+        candidates: [
+          {
+            content: {
+              parts: [{ inlineData: { mimeType: "audio/pcm", data: "QUJD" } }],
+            },
+          },
+        ],
+      }),
+      JSON.stringify({
+        candidates: [
+          {
+            content: {
+              parts: [{ functionCall: { name: "get_weather", args: { city: "SF" } } }],
+            },
+          },
+        ],
+      }),
+    ]
+      .map((l) => `data: ${l}\n\n`)
+      .join("");
+
+    const { fixturePath } = await recordGeminiSse(sse);
+
+    const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"),
+    ) as FixtureFile;
+    const saved = fixtureContent.fixtures[0].response as {
+      audio: { b64Json: string; contentType?: string };
+      toolCalls?: Array<{ name: string; arguments: string }>;
+    };
+    // Audio still persisted.
+    expect(saved.audio.b64Json).toBe("QUJD");
+    expect(saved.audio.contentType).toBe("audio/pcm");
+    // The bug: the tool call was collapsed but dropped from the fixture.
+    expect(saved.toolCalls).toHaveLength(1);
+    expect(saved.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(saved.toolCalls![0].arguments)).toEqual({ city: "SF" });
+  });
+
+  it("retains text content and reasoning alongside audio in the persisted fixture", async () => {
+    // Audio interleaved with a normal text part and a `thought` (reasoning) part.
+    const sse = [
+      JSON.stringify({
+        candidates: [
+          {
+            content: {
+              parts: [{ inlineData: { mimeType: "audio/pcm", data: "WFla" } }],
+            },
+          },
+        ],
+      }),
+      JSON.stringify({
+        candidates: [
+          {
+            content: {
+              parts: [
+                { text: "Here is the weather.", thought: false },
+                { text: "Thinking about it.", thought: true },
+              ],
+            },
+          },
+        ],
+      }),
+    ]
+      .map((l) => `data: ${l}\n\n`)
+      .join("");
+
+    const { fixturePath } = await recordGeminiSse(sse);
+
+    const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"),
+    ) as FixtureFile;
+    const saved = fixtureContent.fixtures[0].response as {
+      audio: { b64Json: string };
+      content?: string;
+      reasoning?: string;
+    };
+    expect(saved.audio.b64Json).toBe("WFla");
+    // The bug: content/reasoning collapsed alongside audio were dropped.
+    expect(saved.content).toBe("Here is the weather.");
+    expect(saved.reasoning).toBe("Thinking about it.");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Harmony-unparsed recording (end-to-end)
+//
+// When a gpt-oss stream carries harmony channel tokens that cannot be parsed
+// into a valid harmony structure, the collapser preserves the bytes VERBATIM and
+// surfaces the distinct `harmonyUnparsed` signal — it is NOT transport loss.
+// The recorder must therefore persist a content-bearing fixture (verbatim, not
+// an error/truncated fixture) and emit a DISTINCT harmony warning, never the
+// dropped-chunk or truncation warnings.
+// ---------------------------------------------------------------------------
+
+describe("recorder harmony-unparsed recording", () => {
+  let rawServer: http.Server | undefined;
+  let warnSpy: MockInstance | undefined;
+
+  afterEach(async () => {
+    warnSpy?.mockRestore();
+    warnSpy = undefined;
+    if (rawServer) {
+      await new Promise<void>((resolve) => rawServer!.close(() => resolve()));
+      rawServer = undefined;
+    }
+  });
+
+  // A <|channel|> + <|message|> opener whose tool-call body never yields valid
+  // JSON — isHarmonyContent recognizes the tokens but parsing fails, so the
+  // collapser sets harmonyUnparsed and preserves content verbatim.
+  const BROKEN_HARMONY =
+    "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid json";
+
+  it("persists verbatim harmony content as a content fixture (not error/truncated)", async () => {
+    const sse = [
+      `data: ${JSON.stringify({ id: "chatcmpl-broken", choices: [{ delta: { content: BROKEN_HARMONY } }] })}`,
+      "data: [DONE]",
+    ]
+      .map((l) => l + "\n\n")
+      .join("");
+
+    rawServer = http.createServer((_upReq, upRes) => {
+      upRes.writeHead(200, { "Content-Type": "text/event-stream" });
+      upRes.end(sse);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve()));
+    const upstreamPort = (rawServer!.address() as { port: number }).port;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-harmony-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { openai: `http://127.0.0.1:${upstreamPort}` },
+        fixturePath: tmpDir,
+      },
+    });
+
+    await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-oss",
+      messages: [{ role: "user", content: "use harmony" }],
+      stream: true,
+    });
+
+    const files = fs.readdirSync(tmpDir).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"),
+    ) as FixtureFile;
+    const saved = fixtureContent.fixtures[0].response as {
+      content?: string;
+      error?: unknown;
+      toolCalls?: unknown[];
+    };
+    // Verbatim content, no fabricated tool call, no error fixture.
+    expect(saved.error).toBeUndefined();
+    expect(saved.toolCalls).toBeUndefined();
+    expect(saved.content).toBe(BROKEN_HARMONY);
+  });
+
+  it("emits the distinct harmony-unparsed warning, not dropped-chunk/truncation warnings", async () => {
+    const sse = [
+      `data: ${JSON.stringify({ id: "chatcmpl-broken", choices: [{ delta: { content: BROKEN_HARMONY } }] })}`,
+      "data: [DONE]",
+    ]
+      .map((l) => l + "\n\n")
+      .join("");
+
+    const logger = new Logger("warn");
+    warnSpy = vi.spyOn(logger, "warn").mockImplementation(() => {});
+
+    rawServer = http.createServer((_upReq, upRes) => {
+      upRes.writeHead(200, { "Content-Type": "text/event-stream" });
+      upRes.end(sse);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve()));
+    const upstreamPort = (rawServer!.address() as { port: number }).port;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-harmony-warn-"));
+    const record: RecordConfig = {
+      providers: { openai: `http://127.0.0.1:${upstreamPort}` },
+      fixturePath: tmpDir,
+    };
+
+    const { req, res } = createMockReqRes();
+    Object.assign(res, {
+      writeHead: () => res,
+      write: () => true,
+      end: () => res,
+      setHeader: () => res,
+      flushHeaders: () => undefined,
+    });
+
+    await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-oss", messages: [{ role: "user", content: "use harmony" }] },
+      "openai",
+      "/v1/chat/completions",
+      [],
+      { record, logger },
+    );
+
+    const warnings = warnSpy.mock.calls.map((c) => String(c[0]));
+    const harmonyWarning = warnings.find((w) =>
+      w.includes("Harmony tokens present but unparseable"),
+    );
+    expect(harmonyWarning).toBeDefined();
+    // Distinct signal — NOT counted as dropped/truncated transport loss.
+    expect(warnings.some((w) => w.includes("dropped during stream collapse"))).toBe(false);
+    expect(warnings.some((w) => w.includes("may be truncated"))).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Frame-timing splitter — CRLF delimiter tolerance
+//
+// Some upstreams/proxies emit SSE/NDJSON frames with CRLF line endings
+// (\r\n\r\n for SSE, \r\n for NDJSON), which the SSE spec permits. The
+// frame-timing splitter must split on these boundaries so per-frame
+// timestamps are captured. An LF-only splitter sees the whole stream as a
+// single frame, producing no recordedTimings.
+// ---------------------------------------------------------------------------
+
+describe("recorder frame-timing: CRLF delimiters", () => {
+  it("captures per-frame timing for CRLF-delimited SSE streams", async () => {
+    // Anthropic-style SSE, but with CRLF (\r\n\r\n) frame boundaries.
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      const frames = [
+        `event: message_start\r\ndata: ${JSON.stringify({ type: "message_start", message: { id: "msg_crlf", role: "assistant" } })}`,
+        `event: content_block_delta\r\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "CRLF " } })}`,
+        `event: content_block_delta\r\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "frames" } })}`,
+        `event: message_stop\r\ndata: ${JSON.stringify({ type: "message_stop" })}`,
+      ];
+      // Write each frame with a CRLF/CRLF terminator on its own tick so the
+      // per-frame timestamps are distinguishable.
+      let i = 0;
+      const writeNext = () => {
+        if (i >= frames.length) {
+          res.end();
+          return;
+        }
+        res.write(`${frames[i]}\r\n\r\n`);
+        i++;
+        setTimeout(writeNext, 2);
+      };
+      writeNext();
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "crlf sse timing test" }],
+      stream: true,
+    });
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"),
+    ) as FixtureFile;
+
+    // Content collapse must still work across CRLF frames.
+    const savedResponse = fixtureContent.fixtures[0].response as { content?: string };
+    expect(savedResponse.content).toBe("CRLF frames");
+
+    // The splitter must have seen each CRLF-terminated frame individually,
+    // so recordedTimings is present with one inter-chunk delay per frame gap.
+    const timings = fixtureContent.fixtures[0].recordedTimings;
+    expect(timings).toBeDefined();
+    // 4 frames → 3 inter-frame delays.
+    expect(timings!.interChunkDelaysMs.length).toBe(3);
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+
+  it("captures per-frame timing for CRLF-delimited NDJSON streams", async () => {
+    // Ollama-style NDJSON, but with CRLF (\r\n) line endings.
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/x-ndjson" });
+      const lines = [
+        JSON.stringify({ message: { role: "assistant", content: "NDJSON " }, done: false }),
+        JSON.stringify({ message: { role: "assistant", content: "over " }, done: false }),
+        JSON.stringify({ message: { role: "assistant", content: "CRLF" }, done: true }),
+      ];
+      let i = 0;
+      const writeNext = () => {
+        if (i >= lines.length) {
+          res.end();
+          return;
+        }
+        res.write(`${lines[i]}\r\n`);
+        i++;
+        setTimeout(writeNext, 2);
+      };
+      writeNext();
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "crlf ndjson timing test" }],
+      stream: true,
+    });
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir).filter((f) => f.endsWith(".json"));
+    expect(files).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"),
+    ) as FixtureFile;
+
+    // Each CRLF-terminated NDJSON line must be timestamped individually.
+    const timings = fixtureContent.fixtures[0].recordedTimings;
+    expect(timings).toBeDefined();
+    // 3 frames → 2 inter-frame delays.
+    expect(timings!.interChunkDelaysMs.length).toBe(2);
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
diff --git a/src/recorder.ts b/src/recorder.ts
index a892a9d..b503ea4 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -3,6 +3,7 @@ import * as https from "node:https";
 import * as fs from "node:fs";
 import * as path from "node:path";
 import * as crypto from "node:crypto";
+import { StringDecoder } from "node:string_decoder";
 import type {
   ChatCompletionRequest,
   Fixture,
@@ -366,15 +367,40 @@ export async function proxyAndRecord(
       defaults.logger.warn("Bedrock EventStream: CRC mismatch — response may be truncated");
     }
     if (collapsed.droppedChunks && collapsed.droppedChunks > 0) {
-      defaults.logger.warn(`${collapsed.droppedChunks} chunk(s) dropped during stream collapse`);
+      defaults.logger.warn(
+        `${collapsed.droppedChunks} chunk(s) dropped during stream collapse${collapsed.firstDroppedSample ? ` — first: ${collapsed.firstDroppedSample}` : ""}`,
+      );
+    }
+    if (collapsed.harmonyUnparsed) {
+      defaults.logger.warn(
+        `Harmony tokens present but unparseable — content preserved verbatim${collapsed.harmonyNote ? ` (${collapsed.harmonyNote})` : ""}`,
+      );
     }
-    // Audio from streamed inlineData (e.g. Gemini SSE with audio parts)
+    // Audio from streamed inlineData (e.g. Gemini SSE with audio parts).
+    // A single Gemini turn can interleave audio with a functionCall and/or
+    // text/thought parts; preserve those companion modalities so the tool call
+    // / content / reasoning are not silently dropped when audio is present.
     if (collapsed.audioB64) {
+      const audioToolCallsSpread =
+        collapsed.toolCalls && collapsed.toolCalls.length > 0
+          ? {
+              toolCalls: collapsed.toolCalls.map((tc) => ({
+                ...tc,
+                name: tc.name ?? "",
+                arguments: tc.arguments ?? "{}",
+              })),
+            }
+          : {};
+      const audioContentSpread = collapsed.content ? { content: collapsed.content } : {};
+      const audioReasoningSpread = collapsed.reasoning ? { reasoning: collapsed.reasoning } : {};
       fixtureResponse = {
         audio: {
           b64Json: collapsed.audioB64,
           contentType: collapsed.audioMimeType ?? "audio/mpeg",
         },
+        ...audioToolCallsSpread,
+        ...audioContentSpread,
+        ...audioReasoningSpread,
       };
     } else if (
       collapsed.content === "" &&
@@ -382,9 +408,19 @@ export async function proxyAndRecord(
     ) {
       defaults.logger.warn("Stream collapse produced empty content — fixture may be incomplete");
       const reasoningSpread = collapsed.reasoning ? { reasoning: collapsed.reasoning } : {};
-      fixtureResponse = { content: collapsed.content ?? "", ...reasoningSpread };
+      const webSearchesSpread = collapsed.webSearches?.length
+        ? { webSearches: collapsed.webSearches }
+        : {};
+      fixtureResponse = {
+        content: collapsed.content ?? "",
+        ...reasoningSpread,
+        ...webSearchesSpread,
+      };
     } else {
       const reasoningSpread = collapsed.reasoning ? { reasoning: collapsed.reasoning } : {};
+      const webSearchesSpread = collapsed.webSearches?.length
+        ? { webSearches: collapsed.webSearches }
+        : {};
       if (collapsed.toolCalls && collapsed.toolCalls.length > 0) {
         const sanitizedToolCalls = collapsed.toolCalls.map((tc) => ({
           ...tc,
@@ -397,12 +433,21 @@ export async function proxyAndRecord(
             content: collapsed.content,
             toolCalls: sanitizedToolCalls,
             ...reasoningSpread,
+            ...webSearchesSpread,
           };
         } else {
-          fixtureResponse = { toolCalls: sanitizedToolCalls, ...reasoningSpread };
+          fixtureResponse = {
+            toolCalls: sanitizedToolCalls,
+            ...reasoningSpread,
+            ...webSearchesSpread,
+          };
         }
       } else {
-        fixtureResponse = { content: collapsed.content ?? "", ...reasoningSpread };
+        fixtureResponse = {
+          content: collapsed.content ?? "",
+          ...reasoningSpread,
+          ...webSearchesSpread,
+        };
       }
     }
   } else {
@@ -572,6 +617,26 @@ export async function proxyAndRecord(
 // Internal helpers
 // ---------------------------------------------------------------------------
 
+/**
+ * Decodes a sequence of byte chunks to UTF-8 text for SSE/NDJSON frame
+ * splitting on the streamed-capture path. Wraps Node's StringDecoder so a
+ * multibyte UTF-8 character (CJK, emoji, ...) whose bytes are split across a
+ * TCP chunk boundary buffers across chunks instead of decoding to U+FFFD
+ * replacement characters — decoding each chunk independently with
+ * Buffer#toString() would corrupt the recorded frame text.
+ */
+export class StreamingFrameDecoder {
+  private decoder = new StringDecoder("utf8");
+  /** Decode a chunk, holding back any trailing partial multibyte sequence. */
+  write(chunk: Buffer): string {
+    return this.decoder.write(chunk);
+  }
+  /** Flush any buffered bytes once the stream has ended. */
+  end(): string {
+    return this.decoder.end();
+  }
+}
+
 function clampTimeout(value: number | undefined, fallback: number): number {
   if (value == null || !Number.isFinite(value) || value <= 0) return fallback;
   return value;
@@ -631,6 +696,10 @@ function makeUpstreamRequest(
         const frameTimestamps: number[] = [];
         const streamStartTime = Date.now();
         let frameBuffer = "";
+        // Decode chunks through a streaming-aware decoder so a multibyte UTF-8
+        // character split across a TCP chunk boundary buffers across chunks
+        // instead of decoding to U+FFFD replacement characters.
+        const frameDecoder = new StreamingFrameDecoder();
         let binaryFrameBuffer = Buffer.alloc(0);
 
         let streamedToClient = false;
@@ -670,8 +739,14 @@ function makeUpstreamRequest(
           // TCP data events don't align with SSE frames — buffer and
           // split on the protocol delimiter to timestamp each complete frame.
           if (isSSE || isNDJSON) {
-            frameBuffer += chunk.toString();
-            const delimiter = isNDJSON ? "\n" : "\n\n";
+            frameBuffer += frameDecoder.write(chunk);
+            // Split on the protocol delimiter, tolerating CRLF line endings.
+            // The SSE spec permits CRLF, and some upstreams/proxies emit
+            // \r\n\r\n (SSE) or \r\n (NDJSON) frame boundaries. An LF-only
+            // split would see the whole CRLF stream as a single frame and
+            // lose per-frame timing. The last split element (a partial frame
+            // tail) stays buffered, exactly as with a string delimiter.
+            const delimiter = isNDJSON ? /\r?\n/ : /\r?\n\r?\n/;
             const parts = frameBuffer.split(delimiter);
             // All complete frames (everything except the last part which
             // may be incomplete).
@@ -721,8 +796,13 @@ function makeUpstreamRequest(
           // the stream ended without a trailing delimiter. Binary EventStream
           // frames are length-prefixed so partial frames at end-of-stream are
           // genuinely incomplete and should not be timestamped.
-          if ((isSSE || isNDJSON) && frameBuffer.trim().length > 0) {
-            frameTimestamps.push(Date.now());
+          if (isSSE || isNDJSON) {
+            // Drain any bytes the decoder buffered for an incomplete multibyte
+            // sequence so the final frame text is complete before we test it.
+            frameBuffer += frameDecoder.end();
+            if (frameBuffer.trim().length > 0) {
+              frameTimestamps.push(Date.now());
+            }
           }
           const rawBuffer = Buffer.concat(chunks);
           if (
diff --git a/src/types.ts b/src/types.ts
index 5124991..bb60d30 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -192,9 +192,23 @@ export interface ImageResponse {
   images?: ImageItem[];
 }
 
+// ORDERING CONTRACT: audio fixtures MUST be discriminated by `isAudioResponse`
+// BEFORE the `isContentWithToolCallsResponse` / `isToolCallResponse` / text
+// guards, because the optional companion fields below make these shapes
+// structurally overlap (an AudioResponse with `toolCalls`/`content` would also
+// satisfy those guards otherwise).
 export interface AudioResponse {
   audio: string | { b64Json: string; contentType?: string };
   format?: string;
+  /**
+   * Companion modalities that can accompany streamed audio. A single Gemini turn
+   * may interleave inlineData audio with a functionCall and/or text/thought
+   * parts; the recorder preserves them here so the tool call / content / reasoning
+   * are not silently discarded when audio is also present.
+   */
+  toolCalls?: ToolCall[];
+  content?: string;
+  reasoning?: string;
 }
 
 export interface TranscriptionResponse {

From b612db1fb6575857a8bb725042d5941ebee8073c Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Tue, 2 Jun 2026 13:27:59 -0700
Subject: [PATCH 4/5] fix: replay Gemini audio companion tool calls, content,
 and reasoning

---
 src/__tests__/gemini-audio.test.ts | 102 +++++++++++++++++++++++++++++
 src/gemini.ts                      |  39 ++++++++---
 2 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/src/__tests__/gemini-audio.test.ts b/src/__tests__/gemini-audio.test.ts
index 8b5c03b..fb14b4e 100644
--- a/src/__tests__/gemini-audio.test.ts
+++ b/src/__tests__/gemini-audio.test.ts
@@ -153,6 +153,108 @@ describe("Gemini audio responses", () => {
     });
   });
 
+  test("non-streaming audio turn replays companion tool call + content + reasoning", async () => {
+    mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "audio with tool call" },
+      response: {
+        audio: "SGVsbG8=",
+        format: "mp3",
+        content: "Here is the audio you asked for.",
+        reasoning: "User wants audio plus a lookup.",
+        toolCalls: [{ id: "call_1", name: "lookup", arguments: '{"query":"weather"}' }],
+      },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1beta/models/lyria-3:generateContent`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        contents: [{ role: "user", parts: [{ text: "audio with tool call" }] }],
+      }),
+    });
+    expect(res.status).toBe(200);
+    const data = await res.json();
+    const parts = data.candidates[0].content.parts;
+
+    // Audio (inlineData) must still be present and first.
+    expect(parts[0].inlineData).toEqual({ mimeType: "audio/mpeg", data: "SGVsbG8=" });
+
+    // Coverage pin: a tool-call-bearing turn must finish with FUNCTION_CALL,
+    // never STOP — guards against a regression that emits STOP with a tool call.
+    expect(data.candidates[0].finishReason).toBe("FUNCTION_CALL");
+
+    // Companion modalities must NOT be dropped on replay.
+    const functionCallPart = parts.find((p: { functionCall?: unknown }) => p.functionCall);
+    expect(functionCallPart).toBeDefined();
+    expect(functionCallPart.functionCall.name).toBe("lookup");
+    expect(functionCallPart.functionCall.args).toEqual({ query: "weather" });
+    expect(functionCallPart.functionCall.id).toBe("call_1");
+
+    const textPart = parts.find((p: { text?: string; thought?: boolean }) => p.text && !p.thought);
+    expect(textPart?.text).toBe("Here is the audio you asked for.");
+
+    const thoughtPart = parts.find((p: { text?: string; thought?: boolean }) => p.thought);
+    expect(thoughtPart?.text).toBe("User wants audio plus a lookup.");
+  });
+
+  test("streaming audio turn replays companion tool call + content + reasoning", async () => {
+    mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "stream audio with tool call" },
+      response: {
+        audio: "SGVsbG8=",
+        format: "mp3",
+        content: "Streamed text.",
+        reasoning: "Streamed thought.",
+        toolCalls: [{ id: "call_2", name: "fetch", arguments: '{"url":"x"}' }],
+      },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1beta/models/lyria-3:streamGenerateContent`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        contents: [{ role: "user", parts: [{ text: "stream audio with tool call" }] }],
+      }),
+    });
+    expect(res.status).toBe(200);
+
+    const text = await res.text();
+    const chunks = text
+      .split("\n\n")
+      .filter((line) => line.startsWith("data: "))
+      .map((line) => JSON.parse(line.replace("data: ", "")));
+
+    const allParts = chunks.flatMap((c) => c.candidates[0].content.parts);
+
+    expect(allParts.some((p: { inlineData?: unknown }) => p.inlineData)).toBe(true);
+
+    // Coverage pin: a tool-call-bearing turn must finish with FUNCTION_CALL,
+    // never STOP — guards against a regression that emits STOP with a tool call.
+    expect(
+      chunks.some(
+        (c: { candidates: Array<{ finishReason?: string }> }) =>
+          c.candidates[0].finishReason === "FUNCTION_CALL",
+      ),
+    ).toBe(true);
+
+    const functionCallPart = allParts.find((p: { functionCall?: unknown }) => p.functionCall);
+    expect(functionCallPart).toBeDefined();
+    expect(functionCallPart.functionCall.name).toBe("fetch");
+    expect(functionCallPart.functionCall.id).toBe("call_2");
+
+    const textPart = allParts.find(
+      (p: { text?: string; thought?: boolean }) => p.text && !p.thought,
+    );
+    expect(textPart?.text).toBe("Streamed text.");
+
+    const thoughtPart = allParts.find((p: { text?: string; thought?: boolean }) => p.thought);
+    expect(thoughtPart?.text).toBe("Streamed thought.");
+  });
+
   test("onAudio() convenience method works via Gemini", async () => {
     mock = new LLMock({ port: 0 });
     mock.onAudio("piano loop", { audio: "SGVsbG8=" });
diff --git a/src/gemini.ts b/src/gemini.ts
index a10d13e..f240f16 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -509,13 +509,35 @@ function resolveAudioInlineData(audio: AudioResponse): { mimeType: string; data:
   };
 }
 
-function buildGeminiAudioResponse(audio: AudioResponse): GeminiResponseChunk {
+// Build the ordered Gemini parts for an audio turn: inlineData audio first,
+// then any companion modalities (reasoning/thought, text content, tool calls)
+// the recorder preserved on the AudioResponse. Without this, a recorded turn
+// that interleaves audio with a functionCall and/or text silently drops the
+// companions on replay.
+// NOTE: audio companions are only re-emitted on this Gemini replay path because
+// `audioB64` collapse is currently Gemini-only — a cross-provider audio fixture
+// would not replay its companions.
+function buildGeminiAudioParts(audio: AudioResponse, logger: Logger): GeminiPart[] {
   const inlineData = resolveAudioInlineData(audio);
+  const parts: GeminiPart[] = [{ inlineData }];
+  if (audio.reasoning) {
+    parts.push({ text: audio.reasoning, thought: true });
+  }
+  if (audio.content) {
+    parts.push({ text: audio.content });
+  }
+  if (audio.toolCalls?.length) {
+    parts.push(...audio.toolCalls.map((tc) => parseToolCallPart(tc, logger)));
+  }
+  return parts;
+}
+
+function buildGeminiAudioResponse(audio: AudioResponse, logger: Logger): GeminiResponseChunk {
   return {
     candidates: [
       {
-        content: { role: "model", parts: [{ inlineData }] },
-        finishReason: "STOP",
+        content: { role: "model", parts: buildGeminiAudioParts(audio, logger) },
+        finishReason: audio.toolCalls?.length ? "FUNCTION_CALL" : "STOP",
         index: 0,
       },
     ],
@@ -523,14 +545,13 @@ function buildGeminiAudioResponse(audio: AudioResponse): GeminiResponseChunk {
   };
 }
 
-function buildGeminiAudioStreamChunks(audio: AudioResponse): GeminiResponseChunk[] {
-  const inlineData = resolveAudioInlineData(audio);
+function buildGeminiAudioStreamChunks(audio: AudioResponse, logger: Logger): GeminiResponseChunk[] {
   return [
     {
       candidates: [
         {
-          content: { role: "model", parts: [{ inlineData }] },
-          finishReason: "STOP",
+          content: { role: "model", parts: buildGeminiAudioParts(audio, logger) },
+          finishReason: audio.toolCalls?.length ? "FUNCTION_CALL" : "STOP",
           index: 0,
         },
       ],
@@ -790,11 +811,11 @@ export async function handleGemini(
       response: { status: 200, fixture },
     });
     if (!streaming) {
-      const body = buildGeminiAudioResponse(response);
+      const body = buildGeminiAudioResponse(response, logger);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(body));
     } else {
-      const chunks = buildGeminiAudioStreamChunks(response);
+      const chunks = buildGeminiAudioStreamChunks(response, logger);
       const interruption = createInterruptionSignal(fixture);
       const completed = await writeGeminiSSEStream(res, chunks, {
         latency,

From 75c1066ad996cbe77d04ac1984a698cb2ad3b516 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Tue, 2 Jun 2026 14:38:03 -0700
Subject: [PATCH 5/5] chore: release v1.28.0

---
 .claude-plugin/marketplace.json |  2 +-
 .claude-plugin/plugin.json      |  2 +-
 CHANGELOG.md                    | 12 ++++++++++++
 charts/aimock/Chart.yaml        |  2 +-
 package.json                    |  2 +-
 5 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 8d51cfa..6f7bd7f 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
       "source": {
         "source": "npm",
         "package": "@copilotkit/aimock",
-        "version": "^1.27.3"
+        "version": "^1.28.0"
       },
       "description": "Fixture authoring skill for @copilotkit/aimock — LLM, multimedia (image/TTS/transcription/video), MCP, A2A, AG-UI, vector, embeddings, structured output, sequential responses, streaming physics, record/replay, agent loop patterns, and debugging"
     }
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index ef930da..a92dbcd 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "aimock",
-  "version": "1.27.3",
+  "version": "1.28.0",
   "description": "Fixture authoring guidance for @copilotkit/aimock — LLM, multimedia, MCP, A2A, AG-UI, vector, and service mocking",
   "author": {
     "name": "CopilotKit"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 91d17e2..f17158e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,18 @@
 
 ## [Unreleased]
 
+## [1.28.0] - 2026-06-02
+
+### Added
+
+- **Harmony channel format** — parse OpenAI "harmony" channel tokens (`<|channel|>… <|message|>… <|call|>`) emitted by local gpt-oss models (Ollama / vLLM / OpenRouter) so their tool calls, reasoning, and content are captured when recording (hosted OpenAI pre-parses harmony, so only local runtimes pass it through raw). Implemented as a lexer + state-machine parser with a uniform all-or-nothing verbatim fail-safe, wired as fallback-only so it never produces phantom tool calls.
+
+### Fixed
+
+- **Recorder** — decode streamed response chunks incrementally to prevent multibyte UTF-8 corruption; CRLF-tolerant frame-timing splitter; propagate `webSearches` and audio-companion fields (tool calls / content / reasoning) into recorded fixtures; log `firstDroppedSample` alongside dropped-chunk warnings.
+- **Stream collapsers** — multi-line and CRLF SSE handling; missing/uncorrelated tool-call index guards with symmetric dropped-chunk accounting across OpenAI / Anthropic / Bedrock / Cohere; bound Bedrock EventStream header parsing against malformed frames.
+- **Gemini** — replay audio-companion tool calls / content / reasoning on audio turns instead of dropping them.
+
 ## [1.27.3] - 2026-05-27
 
 ### Fixed
diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml
index 79c178c..1eba0cd 100644
--- a/charts/aimock/Chart.yaml
+++ b/charts/aimock/Chart.yaml
@@ -3,4 +3,4 @@ name: aimock
 description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector)
 type: application
 version: 0.1.0
-appVersion: "1.27.3"
+appVersion: "1.28.0"
diff --git a/package.json b/package.json
index dba0ec2..2f7263a 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@copilotkit/aimock",
-  "version": "1.27.3",
+  "version": "1.28.0",
   "description": "Mock infrastructure for AI application testing — LLM APIs, image generation, text-to-speech, transcription, audio generation, video generation, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.",
   "license": "MIT",
   "keywords": [