From 1c54b709dfb21a1c8f6d0a39ffaaacf669f788da Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 2 Jun 2026 13:27:49 -0700 Subject: [PATCH 1/5] feat: parse OpenAI harmony channel tokens to record gpt-oss tool calls --- src/__tests__/harmony-boundary.test.ts | 568 +++++++++++++++++++ src/harmony.ts | 731 +++++++++++++++++++++++++ 2 files changed, 1299 insertions(+) create mode 100644 src/__tests__/harmony-boundary.test.ts create mode 100644 src/harmony.ts diff --git a/src/__tests__/harmony-boundary.test.ts b/src/__tests__/harmony-boundary.test.ts new file mode 100644 index 0000000..e2f0254 --- /dev/null +++ b/src/__tests__/harmony-boundary.test.ts @@ -0,0 +1,568 @@ +import { describe, it, expect } from "vitest"; +import { parseHarmonyContent } from "../harmony.js"; +import { collapseOpenAISSE } from "../stream-collapse.js"; + +// =========================================================================== +// Harmony body fail-safe — STRUCTURAL (not per-token/per-exit) regressions. +// +// A 7-agent review found the per-branch guard (`absorbedTerminatorLiteral`, +// checked only on the EOF exit) leaked control tokens on OTHER exit paths. The +// fix makes the non-tool-body fail-safe STRUCTURAL: a terminator-shaped literal +// (END/RETURN/CALL) or a START/CONSTRAIN may only be absorbed as embedded prose +// when its immediate follower is real prose text; a literal immediately +// followed by another control token or by EOF is NOT legitimately embedded and +// fails the WHOLE input safe (verbatim + harmonyUnparsed). This pins the +// verified-bad leak shapes the review found, and proves legitimate harmony +// (including bodies that quote tokens as prose, multi-message streams, and tool +// calls with whitespace-padded args) still parses. +// +// SSE-body idiom mirrors stream-collapse.test.ts: +// data: ${JSON.stringify({ choices: [{ delta: { content: "..." } }] })} +// joined by "\n". +// =========================================================================== + +/** Build an OpenAI SSE body whose content chunks carry harmony tokens. */ +function openAIHarmonyBody(chunks: string[], id = "chatcmpl-hb"): string { + return [ + ...chunks.flatMap((content) => [ + `data: ${JSON.stringify({ id, choices: [{ delta: { content } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); +} + +describe("harmony body fail-safe — structural (no control-token literal reaches routed output)", () => { + // RED 1: a final body terminated by <|return|> immediately followed by a + // SECOND <|return|> (then EOF). The first <|return|> is a terminator-shaped + // literal whose follower is a control token (not prose) — NOT legitimately + // embedded. The OLD code absorbed it and routed "A<|return|>" to content via + // the `terminated` exit. Correct: uniform fail-safe (verbatim + signal). + it("RED1: final<|message|>A<|return|><|return|> fails safe (no <|return|> leak via terminated exit)", () => { + const raw = "<|channel|>final<|message|>A<|return|><|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + // On failure the ORIGINAL bytes are preserved verbatim (the tokens are NOT + // stripped) — the no-leak guarantee is that a token never reaches a + // SUCCESSFUL routed body. Here the OLD code routed "A<|return|>"; now it + // fails safe, so nothing is routed at all. + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // RED 2: an analysis body terminated by <|end|> immediately followed by a + // SECOND <|end|> (then EOF). The OLD code absorbed the first <|end|> and + // routed "A<|end|>" to reasoning via the `terminated` exit (the EOF-only + // guard never fired because the loop exited via `terminated`). Correct: + // uniform fail-safe — no <|end|> may reach reasoning. + it("RED2: analysis<|message|>A<|end|><|end|> fails safe (no <|end|> leak into reasoning)", () => { + const raw = "<|channel|>analysis<|message|>A<|end|><|end|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.reasoning).not.toContain("<|end|>"); + expect(direct.toolCalls).toEqual([]); + }); + + // RED 3: a final body whose trailing text absorbs a <|start|> that runs + // straight to EOF (no following message, no terminator). The OLD code did not + // track START absorption, so "answer <|start|>" leaked into content via the + // EOF exit. Correct: a START absorbed with no real boundary after it fails. + it("RED3: final<|message|>answer <|start|> fails safe (absorbed <|start|> at EOF does not leak)", () => { + const raw = "<|channel|>final<|message|>answer <|start|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + // Verbatim on failure (nothing routed); the OLD code leaked "answer <|start|>". + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + }); + + // A terminator-shaped literal immediately followed by EOF (no second token) + // is also not legitimately embedded. + it("RED1b: final<|message|>A<|return|> followed only by a control token fails safe", () => { + // <|return|> then <|call|> then EOF — first terminator's follower is a + // control token, so it cannot be absorbed as prose. + const raw = "<|channel|>final<|message|>A<|return|><|call|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + // Verbatim on failure; the embedded <|return|> is never routed to content. + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + }); + + // RED 4: a final body that QUOTES a complete well-formed message which is then + // followed by trailing prose + a final terminator + // (`...hello<|return|> and then stop<|return|>`). The OLD code split it into + // two final messages and routed the quoted body "hello<|return|> and then + // stop" — MANGLED, leaking <|return|>. The quoted-split message's body must + // not absorb an embedded control literal, so the WHOLE input fails safe + // verbatim (never mangled). This is the harmony.ts "verbatim-or-clean, never + // mangled" contract at the quoted-message edge. + it("RED4: quoted message + trailing junk + final terminator fails safe (no <|return|> mangle)", () => { + const raw = + "<|channel|>final<|message|>To emit write " + + "<|start|>assistant<|channel|>final<|message|>hello<|return|> and then stop<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.content).not.toBe("To emit write hello<|return|> and then stop"); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // GENERIC SAFETY INVARIANT across all four verified-bad inputs: whatever the + // outcome, the parse must NEVER leak a raw control-token literal — a success + // carries zero literals in routed output; a failure preserves bytes verbatim. + it("never leaks a raw control token: clean-or-verbatim across all four RED inputs", () => { + const inputs = [ + "<|channel|>final<|message|>A<|return|><|return|>", + "<|channel|>analysis<|message|>A<|end|><|end|>", + "<|channel|>final<|message|>answer <|start|>", + "<|channel|>final<|message|>To emit write <|start|>assistant<|channel|>final<|message|>hello<|return|> and then stop<|return|>", + ]; + for (const raw of inputs) { + const r = parseHarmonyContent(raw); + if (!r.failed) { + // A clean success must carry zero control-token literals in output. + expect(r.content).not.toMatch(/<\|(start|end|return|call|channel|message|constrain)\|>/); + expect(r.reasoning).not.toMatch(/<\|(start|end|return|call|channel|message|constrain)\|>/); + } else { + // A failure preserves the original bytes verbatim. + expect(r.content).toBe(raw); + } + } + }); +}); + +describe("harmony bare-<|message|>-at-message-position fail-safe (silent-corruption gap)", () => { + // A bare <|message|> token at MESSAGE POSITION — with no preceding <|start|> + // or <|channel|> introducing it — is a grammar deviation, not a channel-less + // message. The OLD code accepted it, silently stripping control tokens and + // gluing bodies together. Correct: uniform fail-safe (verbatim + + // harmonyUnparsed), matching the parser's all-or-nothing contract and the + // isHarmonyContent gate. A legitimate message ALWAYS has START or CHANNEL + // before MESSAGE, so these failures cannot touch any valid harmony. + + // BARE-RED 1: a valid final message followed by a SECOND message that begins + // with a bare <|message|> (no <|start|>/<|channel|>). The OLD code consumed + // the bare <|message|> as a channel-less message and glued the two bodies + // -> content "realinjected". Correct: fail safe verbatim. + it("BARE-RED1: final<|message|>real<|return|><|message|>injected<|return|> fails safe (no body glue)", () => { + const raw = "<|channel|>final<|message|>real<|return|><|message|>injected<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.content).not.toBe("realinjected"); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // BARE-RED 2: the FIRST message itself begins with a bare <|message|> (no + // <|start|>/<|channel|>), followed by another bare <|message|>. The OLD code + // glued both bare bodies -> content "onetwo". Correct: parseHarmonyContent + // fails safe verbatim. At the collapse layer the content has NO real header + // (no <|channel|>/<|start|> before <|message|>), so it does not even trip the + // cheap `isHarmonyContent` gate (which requires channel-then-message or + // start-then-message ordering): the collapser leaves it VERBATIM with no glue + // and does not flag harmonyUnparsed. The corruption (body glue) is fixed + // either way; the bytes are preserved untouched. + it("BARE-RED2: <|message|>one<|end|><|message|>two<|return|> fails safe (no body glue)", () => { + const raw = "<|message|>one<|end|><|message|>two<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.content).not.toBe("onetwo"); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + // Verbatim no-op at the collapse layer (no channel/start header -> not + // recognized as harmony structure, so content is preserved untouched). + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.content).not.toBe("onetwo"); + expect(result.harmonyUnparsed).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // BARE-RED 3: leading prose text followed immediately by a bare <|message|> + // (no <|start|>/<|channel|>). The OLD code treated the leading text as a + // channel-less preamble and consumed the bare <|message|>, gluing them -> + // content "preamblebody". Correct: parseHarmonyContent fails safe verbatim. At + // the collapse layer there is again no real header before <|message|>, so it + // does not trip the `isHarmonyContent` gate and is left VERBATIM with no glue. + it("BARE-RED3: preamble<|message|>body<|return|> fails safe (no leading-text glue)", () => { + const raw = "preamble<|message|>body<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.content).not.toBe("preamblebody"); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + // Verbatim no-op at the collapse layer (no channel/start header). + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.content).not.toBe("preamblebody"); + expect(result.harmonyUnparsed).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // POSITIVE pin: leading channel-less TEXT that IS followed by a real + // <|channel|>-introduced message stays a valid preamble (the leading-text + // branch must still fire for START/CHANNEL, only MESSAGE is removed). + it("leading text followed by a real <|channel|> message still parses (preamble preserved)", () => { + const raw = "preamble <|channel|>final<|message|>body<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe("preamble body"); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + }); + + // POSITIVE pin: leading channel-less TEXT followed by a real <|start|> message + // stays a valid preamble likewise. + it("leading text followed by a real <|start|> message still parses (preamble preserved)", () => { + const raw = "preamble <|start|>assistant<|channel|>final<|message|>body<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe("preamble body"); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony body fail-safe — legitimate prose-quoted tokens still parse (guard against over-failing)", () => { + // A final body that QUOTES <|end|>/<|return|> as inline-code prose, each + // followed by real text and closed by a REAL <|return|> at EOF, is the + // documented "embedded literal" case. It must still parse cleanly (the literal + // is bracketed by prose on both sides — its follower is real text). + it("final body quoting <|end|>/<|return|> as prose keeps the full sentence", () => { + const body = "See `<|end|>` for the end token and `<|return|>` too."; + const raw = `<|channel|>final<|message|>${body}<|return|>`; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe(body); + expect(direct.toolCalls).toEqual([]); + }); + + // An analysis body quoting <|call|>/<|start|> as prose, closed by a real + // <|end|> before a real next message, must keep the full reasoning body. + it("analysis body quoting <|call|>/<|start|> as prose keeps the full body", () => { + const body = "Consider the `<|call|>` and `<|start|>` markers carefully."; + const raw = `<|channel|>analysis<|message|>${body}<|end|><|start|>assistant<|channel|>final<|message|>Done.<|return|>`; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.reasoning).toBe(body); + expect(direct.content).toBe("Done."); + }); +}); + +describe("harmony boundary — KNOWN_CHANNELS tightening of looksLikeMessageStart", () => { + // A lookahead <|start|>...<|channel|>X<|message|> whose channel X is NOT a + // known harmony channel (analysis/commentary/final) is NOT a real message + // boundary. Inside a final body, such a <|start|> is therefore embedded prose + // — but here it is immediately followed by control tokens / runs without real + // prose bracketing, so the structural body rule fails it safe rather than + // splitting on a bogus channel. The key assertion: it does NOT split into a + // second message routed on an unknown channel, and no token leaks. + it("a <|start|>...<|channel|>UNKNOWN<|message|> lookahead is not treated as a real boundary", () => { + const raw = + "<|channel|>final<|message|>body <|start|>assistant<|channel|>bogus<|message|>x<|return|>"; + const direct = parseHarmonyContent(raw); + // Not split on the bogus channel; whatever the outcome, no leak on success. + if (!direct.failed) { + expect(direct.content).not.toMatch(/<\|(start|channel|message|return)\|>/); + } else { + expect(direct.content).toBe(raw); + } + }); + + // Positive: a KNOWN-channel lookahead still terminates the body and starts the + // next message (no regression to legitimate multi-message splitting). + it("a KNOWN-channel <|start|> lookahead still terminates the current body", () => { + const raw = + "<|channel|>final<|message|>first answer<|start|>assistant<|channel|>final<|message|>second answer<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + // Two final messages concatenate into content; zero leaked tokens. + expect(direct.content).toBe("first answersecond answer"); + expect(direct.content).not.toMatch(/<\|/); + }); + + // A directly-channel-less trailing message (<|start|>assistant<|message|>..., + // NO <|channel|>) is still a valid boundary — KNOWN_CHANNELS only gates a + // lookahead that actually carries a <|channel|> header. + it("a channel-less <|start|>...<|message|> trailing message is still a valid boundary", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"x"}<|call|>' + + "<|start|>assistant<|message|>The answer.<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' }); + expect(direct.content).toBe("The answer."); + }); + + // MULTI-CALL: the commentary tool-body scan picks the FIRST <|call|> whose + // accumulated preceding text is a COMPLETE JSON OBJECT and terminates the + // args there — it must NOT over-consume into a second trailing <|call|>. The + // body `{"a":1}` is already a complete object at the first CALL, so the args + // are exactly `{"a":1}` and the parser does not greedily swallow the second + // CALL. A clean trailing final message proves the first CALL was selected as + // the boundary (over-consumption would have mangled this into a single body). + it("MULTI-CALL: first valid-object <|call|> terminates the args (no over-consume)", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1}<|call|>' + + "<|start|>assistant<|message|>done<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + // Args terminated at the FIRST CALL — exactly the first complete object. + expect(direct.toolCalls[0]).toEqual({ name: "t", arguments: '{"a":1}' }); + expect(direct.content).toBe("done"); + }); + + // MULTI-CALL fail-safe: the same first-CALL selection holds when a bare second + // <|call|> immediately follows. The first CALL closes `{"a":1}` and is chosen + // as the terminator (not over-consumed across the second CALL); the stray + // trailing CALL then has no owning message, so the WHOLE parse fails safe + // verbatim rather than fabricating an over-consumed tool call. + it("MULTI-CALL: a stray trailing <|call|> after the first object fails safe verbatim", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1}<|call|><|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony tool-arg whitespace canonicalization", () => { + // Leading whitespace in the captured JSON args (e.g. "<|message|> {\"a\":1}") + // must be trimmed so the recorded arguments are the canonical JSON value. + it("trims leading whitespace from tool-call JSON args", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|> {"a":1}<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0]).toEqual({ name: "t", arguments: '{"a":1}' }); + expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow(); + }); + + // Trailing whitespace is likewise trimmed. + it("trims trailing whitespace from tool-call JSON args", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1} <|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0].arguments).toBe('{"a":1}'); + }); + + // Both-sides whitespace (including newlines) is trimmed to the canonical JSON. + it("trims surrounding whitespace/newlines from tool-call JSON args", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>\n {"a":1}\n <|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls[0].arguments).toBe('{"a":1}'); + }); + + // Interior whitespace inside the JSON value is preserved (only leading/ + // trailing is trimmed). + it("preserves interior whitespace inside the JSON args", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|> {"a": 1, "b": 2} <|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls[0].arguments).toBe('{"a": 1, "b": 2}'); + }); +}); + +describe("harmony tool-arg must be a JSON OBJECT (scalar/array/null are malformed)", () => { + // Harmony tool-call arguments are JSON OBJECTS. A commentary tool body that is + // a bare JSON SCALAR (number / boolean / string) parses as valid JSON but is + // NOT a valid tool-call argument object. The OLD code accepted it as a tool + // call with the scalar text as `arguments` (e.g. arguments "123"). Correct: + // the body does NOT terminate a valid tool call -> uniform fail-safe (verbatim + // + harmonyUnparsed), no fabricated tool call. + it("a bare numeric scalar body (123) is NOT a tool call (fail-safe verbatim)", () => { + const raw = "<|channel|>commentary to=functions.f<|message|>123<|call|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + it("a bare boolean scalar body (true) is NOT a tool call (fail-safe verbatim)", () => { + const raw = "<|channel|>commentary to=functions.f<|message|>true<|call|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + }); + + it('a bare string scalar body ("str") is NOT a tool call (fail-safe verbatim)', () => { + const raw = '<|channel|>commentary to=functions.f<|message|>"str"<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + }); + + it("a JSON array body ([1,2]) is NOT a tool call (arguments must be an object)", () => { + const raw = "<|channel|>commentary to=functions.f<|message|>[1,2]<|call|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + }); + + it("a JSON null body is NOT a tool call (arguments must be a non-null object)", () => { + const raw = "<|channel|>commentary to=functions.f<|message|>null<|call|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + }); + + // POSITIVE pin: a genuine JSON OBJECT body still parses as a tool call — the + // object requirement must not regress the happy path, including an empty {}. + it("an empty object body ({}) IS a valid tool call", () => { + const raw = "<|channel|>commentary to=functions.f<|message|>{}<|call|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toEqual([{ name: "f", arguments: "{}" }]); + }); + + // POSITIVE pin: matrix 13/14 — embedded control-token literals INSIDE a JSON + // OBJECT arg remain valid (the object requirement only rejects scalars/arrays, + // not objects whose string values happen to contain token-shaped substrings). + it("an object arg containing embedded <|call|> substrings stays a valid tool call (matrix 13)", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.say<|constrain|>json<|message|>{"text":"say <|call|> now"}<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toEqual([{ name: "say", arguments: '{"text":"say <|call|> now"}' }]); + }); +}); + +describe("harmony — recipient does NOT carry over across messages", () => { + // A prior analysis message carries `to=functions.x` (matrix 23: analysis + + // recipient is NOT a tool call). The NEXT message is a plain commentary + // message with NO recipient of its own. The recipient must NOT carry over — + // commentary-without-recipient is a preamble that routes to CONTENT, and no + // tool call named `x` may be fabricated. + it("analysis to=functions.x then plain commentary does not fabricate tool x", () => { + const raw = + "<|channel|>analysis to=functions.x<|message|>thinking<|end|>" + + "<|start|>assistant<|channel|>commentary<|message|>plain<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + // analysis body -> reasoning; commentary-without-recipient -> content. + expect(direct.reasoning).toBe("thinking"); + expect(direct.content).toBe("plain"); + // No tool call fabricated from the carried-over recipient. + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony — cross-channel quoted-split routing (KNOWN LIMITATION)", () => { + // The quoted-whole-message ambiguity is channel-agnostic: a body that QUOTES a + // complete well-formed message of a DIFFERENT channel is structurally + // indistinguishable from two real messages, so it splits and routes each half + // by its (quoted) channel. We PIN the documented imperfect behavior so a future + // change to the split logic is a conscious decision, not an accident. + + // (a) An analysis body that quotes a complete FINAL message: splits into an + // analysis half (-> reasoning) and a final half (-> content). The quoted + // control tokens are stripped (the known limitation), never leaked. + it("analysis body quoting a complete final message splits reasoning|content", () => { + const raw = + "<|channel|>analysis<|message|>note " + + "<|start|>assistant<|channel|>final<|message|>answer<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.reasoning).toBe("note "); + expect(direct.content).toBe("answer"); + // Whatever the split, no raw control token leaks into routed output. + expect(direct.reasoning).not.toMatch(/<\|/); + expect(direct.content).not.toMatch(/<\|/); + }); + + // (b) A final body that quotes a complete commentary TOOL message: the quoted + // <|start|>...commentary to=functions.X...<|call|> is a well-formed message + // boundary, so it splits — the final half routes to content and the quoted + // commentary-tool half materializes as a real tool call (the known limitation: + // a quoted tool message is indistinguishable from a real one). No leak. + it("final body quoting a complete commentary-tool message splits content|toolCall", () => { + const raw = + "<|channel|>final<|message|>see " + + '<|start|>assistant<|channel|>commentary to=functions.t<|constrain|>json<|message|>{"a":1}<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe("see "); + expect(direct.toolCalls).toEqual([{ name: "t", arguments: '{"a":1}' }]); + expect(direct.content).not.toMatch(/<\|/); + }); +}); + +describe("harmony — legitimate multi-message + tool calls (positive end-to-end)", () => { + // analysis -> reasoning, commentary tool call (whitespace-padded args), final + // -> content, with inter-message whitespace. Proves the structural fail-safe + // does not regress the realistic happy path. + it("parses analysis + commentary tool (padded args) + final with separators", () => { + const chunks = [ + "<|channel|>analysis<|message|>Plan the call.<|end|>", + "\n", + '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|> {"q":"x"} <|call|>', + "\n", + "<|start|>assistant<|channel|>final<|message|>Here is the result.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.reasoning).toBe("Plan the call."); + expect(result.content).toBe("Here is the result."); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' }); + expect(result.content).not.toMatch(/<\|/); + expect(result.content).not.toContain("\n"); + }); +}); diff --git a/src/harmony.ts b/src/harmony.ts new file mode 100644 index 0000000..8e0416c --- /dev/null +++ b/src/harmony.ts @@ -0,0 +1,731 @@ +/** + * OpenAI harmony channel parsing for open-weight gpt-oss models. + * + * Hosted api.openai.com pre-parses harmony output into structured + * `tool_calls` / `message.content`, but open-weight gpt-oss models served via + * Ollama / vLLM / OpenRouter (i.e. whenever OPENAI_BASE_URL points at a + * local/open-weights backend) stream tool calls as RAW harmony channel tokens + * INSIDE `delta.content`. Without parsing, the recorded fixture leaks the + * tool-call routing marker (`to=functions.NAME`) and its args JSON as plain + * text content instead of capturing a structured tool call. + * + * Harmony grammar (authoritative, from OpenAI's harmony spec): + * Special tokens: <|start|> <|end|> <|message|> <|channel|> <|constrain|> + * <|return|> <|call|> + * A message is laid out as: + * <|start|>{role/recipient header}<|channel|>{channel header}<|message|>{body}{terminator} + * where the leading <|start|> and/or <|channel|> may be absent on the very + * first message of a stream, and the channel header carries the channel name + * plus optional `to=functions.NAME` routing and `<|constrain|>json`. + * Channels: + * - analysis chain-of-thought -> reasoning + * - commentary function/tool calls + preambles + * - final user-facing answer -> content + * A tool call is a `commentary`-channel message whose header (role segment OR + * channel header) carries recipient routing `to=functions.NAME`; its args are + * the JSON body after `<|message|>`, terminated by `<|call|>`. Example: + * <|channel|>analysis<|message|>Need to call the tool.<|end|> + * <|start|>assistant<|channel|>commentary to=functions.generate_a2ui + * <|constrain|>json<|message|>{"component":"card","props":{}}<|call|> + * <|start|>assistant<|channel|>final<|message|>Here you go.<|return|> + * + * Implementation: a TWO-PHASE parser, NOT an indexOf scanner. + * + * Phase 1 — LEXER ({@link lex}). One left-to-right pass over the accumulated + * content producing an ordered {@link Token}[]: each element is either a + * CONTROL token (matched by exact prefix at the cursor) or a TEXT span (the + * literal run between control tokens). Once bytes are consumed into a TEXT + * span they are NEVER re-scanned for control tokens — so a literal + * "<|call|>"/"<|channel|>" substring inside a JSON string or prose can never + * be mistaken for structure. The lexer NEVER throws; it always returns a + * complete token stream. + * + * Phase 2 — STATE MACHINE ({@link parseTokens}). Walks the token stream + * against the harmony grammar: + * Stream := TEXT? Message+ TEXT? + * Message := START? Header? MESSAGE Body Terminator + * Header := role-TEXT? CHANNEL header-TEXT? + * Body := the token span following MESSAGE up to its real Terminator + * Terminator := END | RETURN | CALL | (lookahead START) | EOF (final only) + * The Terminator is located over the TOKEN STREAM, never via indexOf: a body + * may re-materialize embedded control-token literals as prose (e.g. a final + * answer that quotes "`<|end|>`"), so the real terminator is the first + * END/RETURN/CALL (or a START that begins a well-formed next message) whose + * follower is a real message boundary — EOF or a parseable next header. A + * commentary tool body additionally requires Terminator==CALL AND a body that + * parses as JSON; the "first CALL whose preceding TEXT parses as valid JSON, + * else fail-safe" rule is preserved but operates over the token stream. A + * CHANNEL header must name a known channel (analysis/commentary/final) and a + * dangling CHANNEL/MESSAGE inside a body is a grammar deviation (fail-safe). + * + * Fail-safe contract: parsing is UNIFORM all-or-nothing. {@link + * parseHarmonyContent} returns `failed:true` with `content` set to the ORIGINAL + * raw input VERBATIM on ANY grammar deviation (TEXT-only / prose mention with + * no Message, CHANNEL with no following MESSAGE, a tool body that is not valid + * JSON or not CALL-terminated, an unterminated non-final body, a body + * terminator followed by trailing non-message junk, or any leftover unexpected + * token). There is EXACTLY ONE success path that strips tokens; it never + * partial-strips and never leaks a control token into content/reasoning. + * Harmony-present-but-unparseable is NOT transport loss — the caller preserves + * the bytes verbatim and surfaces a distinct `harmonyUnparsed` signal rather + * than `droppedChunks`/`truncated`. + * + * KNOWN LIMITATION — quoted whole-message ambiguity. Harmony tokens arrive as + * detokenized TEXT, so a body that QUOTES a COMPLETE, well-formed harmony + * message is structurally indistinguishable from two real messages. Example: + * <|channel|>final<|message|>To emit write <|start|>assistant<|channel|> + * final<|message|>hello<|return|> + * The lexer cannot know the inner `<|start|>...<|message|>hello<|return|>` is a + * quotation rather than a real second message, so this parses as TWO final + * messages and the quoted control tokens are stripped (content "To emit write + * hello"). This is the irreducible quoted-vs-real ambiguity; the parser does + * NOT over-engineer a guess. The fail-safe contract still holds at its edges: + * the split is only accepted when it yields cleanly well-formed messages — if + * any resulting message is malformed (e.g. the quoted message is followed by + * trailing junk, "...hello<|return|> and then stop"), the body terminator / + * trailing-junk rule fails the WHOLE input safe (verbatim) rather than emit a + * mangled middle. So the behavior is always verbatim-or-clean, never mangled. + */ + +import type { ToolCall } from "./types.js"; + +// Harmony special tokens. +const START_TOKEN = "<|start|>"; +const END_TOKEN = "<|end|>"; +const RETURN_TOKEN = "<|return|>"; +const CALL_TOKEN = "<|call|>"; +const CHANNEL_TOKEN = "<|channel|>"; +const MESSAGE_TOKEN = "<|message|>"; +const CONSTRAIN_TOKEN = "<|constrain|>"; + +/** The seven harmony control-token kinds. */ +type ControlType = "START" | "END" | "RETURN" | "CALL" | "CHANNEL" | "MESSAGE" | "CONSTRAIN"; + +// Control tokens ordered for prefix matching at the cursor. All seven literals +// are distinct prefixes, so match order is irrelevant for correctness; the +// array simply drives the single cursor scan in the lexer. +const CONTROL_TOKENS: ReadonlyArray<{ type: ControlType; literal: string }> = [ + { type: "START", literal: START_TOKEN }, + { type: "END", literal: END_TOKEN }, + { type: "RETURN", literal: RETURN_TOKEN }, + { type: "CALL", literal: CALL_TOKEN }, + { type: "CHANNEL", literal: CHANNEL_TOKEN }, + { type: "MESSAGE", literal: MESSAGE_TOKEN }, + { type: "CONSTRAIN", literal: CONSTRAIN_TOKEN }, +]; + +// Reverse map: control-token kind -> its literal. Used by the state machine to +// re-materialize a control token's literal text when reconstructing a JSON +// tool-call body that legitimately contains "<|call|>"-shaped substrings. +const CONTROL_LITERAL: Record = { + START: START_TOKEN, + END: END_TOKEN, + RETURN: RETURN_TOKEN, + CALL: CALL_TOKEN, + CHANNEL: CHANNEL_TOKEN, + MESSAGE: MESSAGE_TOKEN, + CONSTRAIN: CONSTRAIN_TOKEN, +}; + +// Recipient routing marker carried by the role segment or the channel header. +// Requires a valid identifier after `to=functions.` — must START with a letter +// or underscore (so `to=functions.-` / `to=functions.` are NOT recipients), +// then allow word chars, dots, and dashes. +const RECIPIENT_RE = /to=functions\.([A-Za-z_][\w.-]*)/; + +/** + * Cheap detection guard — only ATTEMPT a parse when a harmony structure looks + * present, i.e. a `<|channel|>` followed (anywhere after it) by a `<|message|>`, + * OR a `<|message|>` appearing after a `<|start|>`. + * + * This is a fast-path gate, NOT the authority on well-formedness: the state + * machine in {@link parseHarmonyContent} makes the real decision and is itself + * fully fail-safe. Requiring the token pairing keeps hosted/structured answers + * that merely MENTION a single token as prose out of the parser entirely. + */ +export function isHarmonyContent(content: string): boolean { + const channelIdx = content.indexOf(CHANNEL_TOKEN); + if (channelIdx !== -1) { + if (content.indexOf(MESSAGE_TOKEN, channelIdx + CHANNEL_TOKEN.length) !== -1) { + return true; + } + } + const startIdx = content.indexOf(START_TOKEN); + if (startIdx !== -1) { + if (content.indexOf(MESSAGE_TOKEN, startIdx + START_TOKEN.length) !== -1) { + return true; + } + } + return false; +} + +export interface HarmonyParseResult { + content: string; + reasoning: string; + toolCalls: ToolCall[]; + /** + * True when the input could NOT be parsed as a complete, valid harmony + * structure and the ORIGINAL content was returned VERBATIM (fail-safe). The + * bytes are preserved, so this is NOT transport loss — the caller surfaces it + * via a distinct `harmonyUnparsed` signal, not `droppedChunks`/`truncated`. + */ + failed: boolean; +} + +// --------------------------------------------------------------------------- +// Phase 1: Lexer +// --------------------------------------------------------------------------- + +/** A control token (one of the seven harmony special tokens). */ +interface ControlToken { + kind: "control"; + type: ControlType; +} + +/** A literal text span between control tokens. Never empty. */ +interface TextToken { + kind: "text"; + value: string; +} + +type Token = ControlToken | TextToken; + +/** + * Lex the accumulated content into an ordered token stream via a single + * left-to-right cursor walk. At each position, match a control token by exact + * prefix; otherwise accumulate bytes into the current TEXT run until the next + * control token or EOF. + * + * Bytes consumed into a TEXT span are NEVER re-scanned for control tokens, so + * an embedded literal "<|call|>"/"<|channel|>" inside a JSON string or prose is + * inert. The lexer NEVER throws; it always returns a complete token stream. + */ +function lex(raw: string): Token[] { + const tokens: Token[] = []; + let cursor = 0; + let textStart = 0; + + const flushText = (end: number): void => { + if (end > textStart) { + tokens.push({ kind: "text", value: raw.slice(textStart, end) }); + } + }; + + while (cursor < raw.length) { + let matched: { type: ControlType; literal: string } | undefined; + // A control token only begins at "<|"; cheap reject avoids scanning the + // literal list on every plain character. + if (raw.startsWith("<|", cursor)) { + for (const tok of CONTROL_TOKENS) { + if (raw.startsWith(tok.literal, cursor)) { + matched = tok; + break; + } + } + } + if (matched) { + flushText(cursor); + tokens.push({ kind: "control", type: matched.type }); + cursor += matched.literal.length; + textStart = cursor; + } else { + cursor += 1; + } + } + flushText(raw.length); + + return tokens; +} + +// --------------------------------------------------------------------------- +// Phase 2: State machine +// --------------------------------------------------------------------------- + +/** True when `s` is empty or only whitespace. */ +function isBlank(s: string): boolean { + return s.trim().length === 0; +} + +/** + * True when `s` parses as a JSON OBJECT — a non-null, non-array `{...}` value. + * + * Harmony tool-call arguments are JSON OBJECTS. A bare JSON SCALAR (number / + * boolean / string / null) or ARRAY parses as valid JSON but is NOT a valid + * tool-call argument, so it must not terminate a tool call (fail-safe verbatim + * per the uniform contract). The object check (and ONLY the object check) is + * what makes a commentary `<|call|>` body a tool call; embedded token-shaped + * substrings INSIDE the object's string values remain valid data (matrix 13/14). + */ +function isToolArgsObject(s: string): boolean { + let value: unknown; + try { + value = JSON.parse(s); + } catch { + return false; + } + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +/** + * Extract the channel name from the header text that follows `<|channel|>`. The + * channel name is the leading token, delimited by whitespace (the rest of the + * header carries optional `to=functions.NAME` routing). A `<|constrain|>` token + * is lexed separately, so it never appears inside this text. + */ +function headerChannel(headerText: string): string { + return headerText.trim().split(/\s+/)[0] ?? ""; +} + +/** The harmony channels a real `<|channel|>` header may name. */ +const KNOWN_CHANNELS = new Set(["analysis", "commentary", "final"]); + +/** + * True when token index `idx` begins a well-formed harmony message header — + * used as lookahead to decide whether a `<|start|>` is a real message boundary + * (terminating the current body) or a literal `<|start|>` quoted inside a prose + * body. A real message header reaches a `<|message|>` token via the optional + * `START? role-TEXT? CHANNEL? header-TEXT? CONSTRAIN? constraint-TEXT?` prefix + * WITHOUT first crossing a body terminator (END/RETURN/CALL) or EOF. + * + * When the lookahead carries a `<|channel|>` header, the channel name it names + * must be a KNOWN harmony channel (analysis/commentary/final). A lookahead like + * `<|start|>...<|channel|>X<|message|>` whose X is unknown is NOT a real + * boundary — it narrows the quoted-message ambiguity so a body quoting a + * bogus-channel pseudo-message is not split on it. A channel-LESS header + * (`<|start|>role<|message|>...`) is unaffected (KNOWN_CHANNELS only gates a + * present `<|channel|>` name). + */ +function looksLikeMessageStart(tokens: Token[], idx: number): boolean { + let k = idx; + if (tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "START") { + k += 1; + if (tokens[k]?.kind === "text") k += 1; // optional role-TEXT + } + if (tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "CHANNEL") { + k += 1; + if (tokens[k]?.kind === "text") { + // The channel name must be a known harmony channel for this to be a real + // message boundary; an unknown channel header is not a true boundary. + if (!KNOWN_CHANNELS.has(headerChannel((tokens[k] as TextToken).value))) return false; + k += 1; // header-TEXT + } + if (tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "CONSTRAIN") { + k += 1; + if (tokens[k]?.kind === "text") k += 1; // optional constraint-name TEXT + } + } + return tokens[k]?.kind === "control" && (tokens[k] as ControlToken).type === "MESSAGE"; +} + +/** + * True when the position right after a body terminator candidate (END/RETURN/ + * CALL at the token before `idx`) is a REAL message boundary: either EOF + * (optionally preceded by whitespace-only TEXT spans) or the start of a + * well-formed next message. When false, the terminator candidate is a literal + * control token embedded in a prose body. + */ +function isRealBoundaryAfter(tokens: Token[], idx: number): boolean { + let k = idx; + // Skip whitespace-only TEXT spans (inter-message / trailing whitespace). + while (tokens[k]?.kind === "text" && (tokens[k] as TextToken).value.trim().length === 0) { + k += 1; + } + if (k >= tokens.length) return true; // EOF (final message) + return looksLikeMessageStart(tokens, k); +} + +/** + * True when the token at index `idx` is a NON-BLANK TEXT span — i.e. real prose + * follows. A control-token literal embedded in a non-tool body is only + * LEGITIMATELY prose when it is bracketed by real text (e.g. a final answer that + * quotes "the `<|end|>` token"); the lexer will have tokenized the quoted + * literal, and its immediate follower being non-blank prose is what makes it + * inert body text rather than structure. When the follower is instead another + * control token or EOF (or only whitespace), the literal is NOT embedded prose — + * it is a control token that would leak into routed content/reasoning, so the + * body must fail safe. This is the STRUCTURAL fail-safe invariant: it fires at + * absorption time on EVERY exit path, not per-exit, so a leak-shaped body can + * never reach the routing step regardless of how its loop terminates. + */ +function hasProseFollower(tokens: Token[], idx: number): boolean { + const next = tokens[idx]; + return next !== undefined && next.kind === "text" && next.value.trim().length > 0; +} + +/** Sentinel thrown internally to unwind to the uniform fail-safe path. */ +const FAIL = Symbol("harmony-fail"); + +/** + * Walk the token stream against the harmony grammar and route each message by + * channel. Throws {@link FAIL} on ANY grammar deviation so {@link + * parseHarmonyContent} returns the original bytes verbatim (uniform + * all-or-nothing fail-safe). On success returns fully-routed channels. + */ +function parseTokens(tokens: Token[]): { + content: string; + reasoning: string; + toolCalls: ToolCall[]; +} { + let content = ""; + let reasoning = ""; + const toolCalls: ToolCall[] = []; + + let i = 0; + const peek = (): Token | undefined => tokens[i]; + const fail = (): never => { + throw FAIL; + }; + + // ----- Leading channel-less TEXT (before the first Message) ----- + // Whitespace-only leading text is absorbed; non-whitespace leading text is + // channel-less content (a pre-channel preamble). + if (peek()?.kind === "text") { + const t = tokens[i] as TextToken; + // Only treat this as leading content when a real Message header actually + // follows (START / CHANNEL). A bare MESSAGE is NOT a message header — a + // legitimate message always opens with START or CHANNEL before MESSAGE — so + // leading text followed by a bare <|message|> is a grammar deviation, left + // for the main loop to fail safe (verbatim) rather than glued to the body. + // Otherwise the text is handled by the trailing / no-message rules below + // (which fail-safe when no message exists). + const next = tokens[i + 1]; + const nextStartsMessage = + next !== undefined && + next.kind === "control" && + (next.type === "START" || next.type === "CHANNEL"); + if (nextStartsMessage) { + if (!isBlank(t.value)) content += t.value; + i += 1; + } + } + + // A well-formed stream has at least one Message. + let sawMessage = false; + + // Set when the PREVIOUS body terminated on a START-lookahead — i.e. a body + // ran (without an intervening real terminator) into a `<|start|>...` that + // looks like a message header, so the parser SPLIT it off as a separate + // message. This is the irreducible quoted-whole-message ambiguity: in + // detokenized TEXT a body that QUOTES a complete well-formed message is + // indistinguishable from two real messages. The split is only accepted when + // BOTH resulting messages are cleanly well-formed (matrix-doc "verbatim-or- + // clean, never mangled"). A quoted-split message whose OWN body would have to + // absorb an embedded control literal (e.g. the quoted body + // "hello<|return|> and then stop") is NOT clean — absorbing it would leak the + // token into routed content/reasoning — so it fails the WHOLE input safe. + let nextIsQuotedSplit = false; + + while (i < tokens.length) { + const tok = peek(); + if (tok === undefined) break; + + // Absorb whitespace-only inter-message / trailing TEXT spans. A non-blank + // stray TEXT span at message position is a grammar deviation. + if (tok.kind === "text") { + if (isBlank(tok.value)) { + i += 1; + continue; + } + // Non-blank text where a message (or EOF) was expected: this is leftover, + // unexpected token content — fail safe. + fail(); + } + + // Capture-and-reset the quoted-split marker for THIS message. + const fromQuotedSplit = nextIsQuotedSplit; + nextIsQuotedSplit = false; + + // tok is a control token: the start of a Message. + let recipient: string | undefined; + let channel = ""; + // A well-formed message ALWAYS opens with a real header (START and/or + // CHANNEL) before <|message|>. Track whether such a header was seen so a + // bare <|message|> at message position (no preceding START/CHANNEL) fails + // safe instead of being silently accepted as a channel-less message (which + // would strip control tokens and glue bodies together). + let sawHeader = false; + + // ----- optional START + role-TEXT ----- + if (tok.kind === "control" && tok.type === "START") { + sawHeader = true; + i += 1; + // Optional role header text carrying `to=functions.NAME`. + if (peek()?.kind === "text") { + const roleText = (tokens[i] as TextToken).value; + recipient = roleText.match(RECIPIENT_RE)?.[1]; + i += 1; + } + } + + // ----- optional CHANNEL + header-TEXT (+ optional CONSTRAIN) ----- + if (peek()?.kind === "control" && (peek() as ControlToken).type === "CHANNEL") { + sawHeader = true; + i += 1; + // Optional header text carrying the channel name + optional routing. + if (peek()?.kind === "text") { + const headerText = (tokens[i] as TextToken).value; + channel = headerChannel(headerText); + const headerRecipient = headerText.match(RECIPIENT_RE)?.[1]; + if (headerRecipient !== undefined) recipient = headerRecipient; + i += 1; + } + // An optional <|constrain|> token (e.g. <|constrain|>json) may sit + // between the channel header and <|message|>. It carries a constraint + // hint only — consume it and any following constraint-name text. It does + // NOT make a body <|call|>-terminated on its own (only a commentary + // recipient does). + if (peek()?.kind === "control" && (peek() as ControlToken).type === "CONSTRAIN") { + i += 1; + if (peek()?.kind === "text") { + // e.g. "json" — discard; it is a constraint hint, not body content. + i += 1; + } + } + // A real <|channel|> header names a KNOWN channel (analysis / commentary + // / final). If it does not, this is not harmony structure — it is a prose + // mention of the literal token (e.g. "use `<|channel|>` to pick a + // channel"). Fail safe so the original bytes are preserved verbatim. + if (!KNOWN_CHANNELS.has(channel)) fail(); + } + + // ----- mandatory MESSAGE ----- + // A message must be introduced by a real header (START and/or CHANNEL) + // before <|message|> is consumed. A bare <|message|> at message position + // — with no preceding START/CHANNEL in this message — is a grammar + // deviation (not a channel-less message): accepting it would silently strip + // control tokens and glue bodies together. Fail safe (uniform verbatim), + // mirroring the bare CHANNEL/MESSAGE-inside-a-non-tool-body rule below. + if (!sawHeader) fail(); + if (!(peek()?.kind === "control" && (peek() as ControlToken).type === "MESSAGE")) { + // A header (START and/or CHANNEL) with no following <|message|> is an + // incomplete message — fail safe. + fail(); + } + i += 1; // consume MESSAGE + const bodyStart = i; // token index of the first body token + + const isCommentaryToolCall = recipient !== undefined && channel === "commentary"; + + if (isCommentaryToolCall) { + // A commentary tool-call body is a JSON value terminated by <|call|>. The + // literal substring "<|call|>" can legitimately appear INSIDE a JSON + // string, and the lexer will have tokenized it as a CALL control token. + // So scan CALL tokens left-to-right, re-materializing the body text from + // tokens between <|message|> and each CALL, and pick the FIRST CALL whose + // accumulated preceding text parses as a COMPLETE JSON OBJECT (A2). A bare + // JSON SCALAR/array (e.g. `123`, `true`, `[1,2]`, `null`) is valid JSON but + // is NOT a valid tool-call argument, so it does NOT terminate the call. + // If no CALL closes a valid JSON object, fail safe. + let acc = ""; + let j = bodyStart; + let parsed: string | undefined; + for (; j < tokens.length; j++) { + const t = tokens[j]; + if (t.kind === "control" && t.type === "CALL") { + // Canonicalize the captured args: leading/trailing whitespace around + // the JSON value is not part of the value (e.g. "<|message|> {...} ") + // — trim it so the recorded arguments are the canonical JSON. Interior + // whitespace inside the JSON is preserved. JSON.parse already tolerates + // surrounding whitespace, so validate the TRIMMED form to pick the + // terminator correctly. + const candidate = acc.trim(); + if (isToolArgsObject(candidate)) { + parsed = candidate; + break; + } + // Not a complete JSON object yet (incomplete, or a scalar/array that is + // not a valid tool-call argument) — the embedded "<|call|>" is part of + // the JSON string; keep accumulating. + acc += CONTROL_LITERAL.CALL; + continue; + } + if (t.kind === "control") { + acc += CONTROL_LITERAL[t.type]; + } else { + acc += t.value; + } + } + if (parsed === undefined) fail(); + i = j + 1; // consume body tokens + the terminating CALL + toolCalls.push({ name: recipient!, arguments: parsed! }); + sawMessage = true; + continue; + } + + // ----- Non-tool Body + Terminator ----- + // The body runs from MESSAGE to its REAL terminator. A literal control + // token can legitimately appear in a prose body (e.g. "the `<|end|>` + // token"), and the lexer will have tokenized it. So scan forward, + // re-materializing control literals into the body text, and stop at the + // FIRST END/RETURN/CALL whose follower is a real message boundary — i.e. + // EOF (optionally preceded by whitespace-only TEXT) or the start of a + // well-formed next message ({@link looksLikeMessageStart}). A bare START + // that begins a parseable message is also a (lookahead) terminator. + // + // STRUCTURAL FAIL-SAFE INVARIANT. A control-token literal may only be + // ABSORBED into a routed (content/reasoning) body when it is genuinely + // embedded prose — i.e. its immediate follower is real text ({@link + // hasProseFollower}), as in a final answer quoting "the `<|end|>` token" + // (matrix 10-12). When an embedded terminator-shaped literal (END/RETURN/ + // CALL), a non-boundary START, or a stray CONSTRAIN is followed by another + // control token or by EOF (i.e. NOT bracketed by prose), it is not embedded + // prose — it is a control token that would LEAK into routed output. Rather + // than a per-exit guard (which the old code only applied on the EOF exit, + // leaking on the `terminated` exit), the check fires HERE, at absorption + // time, so a leak-shaped body fails safe uniformly no matter how its loop + // ends. This single invariant subsumes the terminated-exit leak + // (`A<|return|><|return|>`), the trailing `<|start|>` absorption leak + // (`answer <|start|>`), and the stray-CONSTRAIN re-materialization. Tool + // (commentary+recipient) bodies are handled separately above and are NOT + // subject to this rule (embedded tokens inside a JSON string arg are valid + // data validated by JSON.parse). `absorbedControlLiteral` records that a + // literal was legitimately absorbed mid-prose so the EOF branch can reject a + // body that runs past such a token straight to EOF with no real terminator. + let body = ""; + let terminated = false; + let reachedEof = false; + let absorbedControlLiteral = false; + let j = i; + for (; j < tokens.length; j++) { + const t = tokens[j]; + if (t.kind === "control" && (t.type === "END" || t.type === "RETURN" || t.type === "CALL")) { + if (isRealBoundaryAfter(tokens, j + 1)) { + terminated = true; + break; + } + // Embedded terminator-shaped literal. It is inert body prose ONLY when + // bracketed by real text AND this message is not itself a quoted split + // (a quoted-split body that must absorb a literal is not clean — it + // would leak the token); otherwise fail safe. + if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail(); + absorbedControlLiteral = true; + body += CONTROL_LITERAL[t.type]; + continue; + } + if (t.kind === "control" && t.type === "START") { + if (looksLikeMessageStart(tokens, j)) { + // Lookahead terminator: the NEXT message begins here. Do NOT consume. + // The next message is a quoted-message split (see nextIsQuotedSplit). + terminated = true; + break; + } + // Embedded <|start|> inside prose — inert body text ONLY when bracketed + // by real text and not a quoted split; a START with no prose after it + // (e.g. trailing "answer <|start|>" or "<|start|>" before another + // control token) would leak — fail safe. + if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail(); + absorbedControlLiteral = true; + body += CONTROL_LITERAL.START; + continue; + } + if (t.kind === "control" && (t.type === "CHANNEL" || t.type === "MESSAGE")) { + // A bare CHANNEL / MESSAGE inside a non-tool body is structural, not + // prose: in a well-formed stream the next message's CHANNEL is always + // introduced by a START (caught above as a real boundary), so a + // dangling CHANNEL/MESSAGE here means the structure is malformed (e.g. + // "<|message|>body<|channel|>analysis"). Fail safe. + fail(); + } + if (t.kind === "control") { + // A stray CONSTRAIN inside a body is an inert hint, but its literal + // would leak unless it is bracketed by prose (and not a quoted split) — + // fail safe otherwise. + if (fromQuotedSplit || !hasProseFollower(tokens, j + 1)) fail(); + absorbedControlLiteral = true; + body += CONTROL_LITERAL[t.type]; + continue; + } + // TEXT span — part of the body. + body += t.value; + } + if (j >= tokens.length) reachedEof = true; + + if (terminated) { + const term = tokens[j] as ControlToken; + if (term.type === "START") { + // Lookahead: leave START in place for the next loop iteration, and flag + // that the next message is a quoted-message split (the current body ran + // into a START without a real terminator of its own). + i = j; + nextIsQuotedSplit = true; + } else { + i = j + 1; // consume END/RETURN/CALL + } + routeBody(channel, body); + sawMessage = true; + continue; + } + if (reachedEof) { + // EOF terminates the FINAL message only. A content-routing channel + // (final / commentary-preamble-without-recipient / channel-less) may + // legitimately run to EOF with no explicit terminator, so it is accepted + // verbatim. But: + // - An `analysis` body is a terminator-expecting reasoning body + // (closed by <|end|>); an UNTERMINATED analysis body at EOF is a + // grammar deviation (B-A3) — fail safe rather than surface dangling + // reasoning. + // - If the body legitimately absorbed a mid-prose control literal and + // then ran to EOF, the message was never properly terminated and the + // control token would leak into the output (B-A1) — fail safe rather + // than mangle. + if (channel === "analysis" || absorbedControlLiteral) fail(); + i = j; + routeBody(channel, body); + sawMessage = true; + break; + } + // Unreachable in practice (loop only exits via terminator or EOF), but keep + // the uniform fail-safe for any unexpected fallthrough. + fail(); + } + + if (!sawMessage) fail(); + + return { content, reasoning, toolCalls }; + + // Route a non-tool body by channel. Only two channel shapes reach this + // function: `analysis` (-> reasoning) and `final` / commentary-without- + // recipient (preamble) / channel-less (-> content). An UNKNOWN <|channel|> + // name never reaches here — it fail-safes upstream at the + // `if (!KNOWN_CHANNELS.has(channel)) fail()` guard during header parsing — so + // there is no unknown-channel case to route. + function routeBody(ch: string, body: string): void { + if (ch === "analysis") { + reasoning += body; + } else { + // final, commentary-without-recipient (preamble), and channel-less bodies + // all surface as user-facing content. + content += body; + } + } +} + +// --------------------------------------------------------------------------- +// Public entry point +// --------------------------------------------------------------------------- + +/** + * Parse harmony channel tokens out of an accumulated assistant `content` + * string, splitting them into final-channel content, analysis-channel + * reasoning, and commentary-channel tool calls. Pure function — no I/O. + * + * Callers should gate this behind {@link isHarmonyContent} so ordinary + * (already-structured) streams are never touched. Even so, this function is + * itself UNIFORM all-or-nothing fail-safe: on ANY structural/validation failure + * it returns `{ content: raw, reasoning: "", toolCalls: [], failed: true }` so + * the original content is preserved VERBATIM and the caller can surface a + * distinct `harmonyUnparsed` signal (NOT a dropped/truncated chunk). + */ +export function parseHarmonyContent(raw: string): HarmonyParseResult { + const tokens = lex(raw); + try { + const { content, reasoning, toolCalls } = parseTokens(tokens); + return { content, reasoning, toolCalls, failed: false }; + } catch (err) { + if (err === FAIL) { + return { content: raw, reasoning: "", toolCalls: [], failed: true }; + } + // Unexpected error — still fail safe rather than throw to the caller. + return { content: raw, reasoning: "", toolCalls: [], failed: true }; + } +} From 1d744104b3b630d5dd9f3ddbb7dbce481157fef0 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 2 Jun 2026 13:27:53 -0700 Subject: [PATCH 2/5] fix: harden stream collapsers and wire harmony fallback for recording --- src/__tests__/collapser-robustness.test.ts | 786 ++++++++++++ src/__tests__/stream-collapse.test.ts | 1291 ++++++++++++++++++++ src/stream-collapse.ts | 410 ++++++- 3 files changed, 2436 insertions(+), 51 deletions(-) create mode 100644 src/__tests__/collapser-robustness.test.ts diff --git a/src/__tests__/collapser-robustness.test.ts b/src/__tests__/collapser-robustness.test.ts new file mode 100644 index 0000000..2717c73 --- /dev/null +++ b/src/__tests__/collapser-robustness.test.ts @@ -0,0 +1,786 @@ +import { describe, it, expect } from "vitest"; +import { crc32 } from "node:zlib"; +import { + collapseOpenAISSE, + collapseGeminiSSE, + collapseCohereSSE, + collapseAnthropicSSE, + collapseBedrockEventStream, + collapseGeminiInteractionsSSE, +} from "../stream-collapse.js"; +import { encodeEventStreamMessage } from "../aws-event-stream.js"; + +// =========================================================================== +// Robustness hardening for the per-provider stream collapsers. +// +// Each `describe` block targets one pre-existing defect found by review. The +// assertions encode the intended graceful behavior; they fail (RED) against +// the unfixed collapsers and pass (GREEN) once the fix lands. +// =========================================================================== + +// --------------------------------------------------------------------------- +// 1. Bedrock EventStream header bounds (decodeEventStreamFrames) +// +// `headersLength` / per-header `nameLen` / `valueLen` are read without +// bounds-checking. A frame with a VALID prelude CRC but a `headersLength` +// that overruns the payload throws an uncaught RangeError instead of the +// intended graceful `{ frames, truncated: true }`. +// --------------------------------------------------------------------------- + +/** + * Build a single AWS EventStream frame whose prelude CRC is VALID but whose + * declared `headersLength` is `headersLength`, independent of the actual + * payload. `totalLength` is sized so the whole frame fits inside the buffer + * (so the existing total-length bounds check passes) and the message CRC is + * computed correctly — the ONLY corruption is the oversized headers length, + * which must be caught by header bounds validation. + */ +function buildFrameWithHeadersLength(headersLength: number, payload: Buffer): Buffer { + // prelude (8) + prelude_crc (4) + payload + message_crc (4). + // We intentionally allocate NO real header bytes — headersLength lies. + const totalLength = 4 + 4 + 4 + payload.length + 4; + const frame = Buffer.alloc(totalLength); + let offset = 0; + + frame.writeUInt32BE(totalLength, offset); + offset += 4; + frame.writeUInt32BE(headersLength, offset); // bogus, oversized + offset += 4; + + // Valid prelude CRC over the first 8 bytes (passes the prelude check). + const preludeCrc = crc32(frame.subarray(0, 8)); + frame.writeUInt32BE(preludeCrc >>> 0, offset); + offset += 4; + + payload.copy(frame, offset); + offset += payload.length; + + // Valid message CRC over everything but the last 4 bytes. + const messageCrc = crc32(frame.subarray(0, totalLength - 4)); + frame.writeUInt32BE(messageCrc >>> 0, offset); + + return frame; +} + +describe("collapseBedrockEventStream malformed header bounds", () => { + it("returns truncated (not a RangeError) when headersLength overruns the frame", () => { + const payload = Buffer.from(JSON.stringify({ contentBlockDelta: { delta: { text: "Hi" } } })); + // headersLength=1000 far exceeds the tiny payload region. + const frame = buildFrameWithHeadersLength(1000, payload); + + let result: ReturnType; + expect(() => { + result = collapseBedrockEventStream(frame); + }).not.toThrow(); + + expect(result!.truncated).toBe(true); + }); + + it("processes valid earlier frames, then truncates on a malformed-header frame", () => { + const goodFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "Good" } }, + }); + const payload = Buffer.from(JSON.stringify({ contentBlockDelta: { delta: { text: "Bad" } } })); + const badFrame = buildFrameWithHeadersLength(5000, payload); + const buf = Buffer.concat([goodFrame, badFrame]); + + let result: ReturnType; + expect(() => { + result = collapseBedrockEventStream(buf); + }).not.toThrow(); + + expect(result!.content).toBe("Good"); + expect(result!.truncated).toBe(true); + }); + + it("returns truncated when a per-header value length overruns the headers region", () => { + // headersLength=4 leaves room for a 1-byte nameLen + 1-byte name + type + // byte... but then the 2-byte valueLen read pushes past headersEnd, and the + // declared value length itself overruns. Build the header bytes by hand. + // + // Layout inside the headers region (4 bytes): nameLen=1, name="x", type=7, + // and then there is no room for the 2-byte valueLen → must be caught. + const headerBytes = Buffer.from([ + 0x01, // nameLen = 1 + 0x78, // "x" + 0x07, // type = STRING + 0x00, // first byte of a valueLen that runs off the end of the region + ]); + const headersLength = headerBytes.length; + const payload = Buffer.from(JSON.stringify({ contentBlockDelta: { delta: { text: "Z" } } })); + const totalLength = 4 + 4 + 4 + headersLength + payload.length + 4; + const frame = Buffer.alloc(totalLength); + let offset = 0; + frame.writeUInt32BE(totalLength, offset); + offset += 4; + frame.writeUInt32BE(headersLength, offset); + offset += 4; + const preludeCrc = crc32(frame.subarray(0, 8)); + frame.writeUInt32BE(preludeCrc >>> 0, offset); + offset += 4; + headerBytes.copy(frame, offset); + offset += headersLength; + payload.copy(frame, offset); + offset += payload.length; + const messageCrc = crc32(frame.subarray(0, totalLength - 4)); + frame.writeUInt32BE(messageCrc >>> 0, offset); + + let result: ReturnType; + expect(() => { + result = collapseBedrockEventStream(frame); + }).not.toThrow(); + expect(result!.truncated).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 2. Missing tool_call index — OpenAI SSE and Cohere SSE +// +// `const index = tc.index as number` assumes index present. If a delta omits +// `index`, every index-less delta collapses under a single map key, merging +// distinct tool calls and corrupting arguments. Distinct calls must stay +// distinct. +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE missing tool_call index", () => { + it("keeps two index-less tool_call deltas as two distinct tool calls", () => { + const body = [ + `data: ${JSON.stringify({ + id: "chatcmpl-noidx", + choices: [ + { + delta: { + tool_calls: [ + { + id: "call_a", + type: "function", + function: { name: "func_a", arguments: '{"x":1}' }, + }, + ], + }, + }, + ], + })}`, + "", + `data: ${JSON.stringify({ + id: "chatcmpl-noidx", + choices: [ + { + delta: { + tool_calls: [ + { + id: "call_b", + type: "function", + function: { name: "func_b", arguments: '{"y":2}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + const names = result.toolCalls!.map((tc) => tc.name).sort(); + expect(names).toEqual(["func_a", "func_b"]); + // Arguments must not be cross-contaminated into one entry. + const byName = Object.fromEntries(result.toolCalls!.map((tc) => [tc.name, tc.arguments])); + expect(byName.func_a).toBe('{"x":1}'); + expect(byName.func_b).toBe('{"y":2}'); + }); + + it("still merges streamed argument fragments that DO carry an index", () => { + const body = [ + `data: ${JSON.stringify({ + id: "chatcmpl-idx", + choices: [ + { + delta: { + tool_calls: [{ index: 0, id: "call_x", function: { name: "fn", arguments: '{"a' } }], + }, + }, + ], + })}`, + "", + `data: ${JSON.stringify({ + id: "chatcmpl-idx", + choices: [{ delta: { tool_calls: [{ index: 0, function: { arguments: '":1}' } }] } }], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("fn"); + expect(result.toolCalls![0].arguments).toBe('{"a":1}'); + }); +}); + +describe("collapseCohereSSE missing tool_call index", () => { + it("keeps two index-less tool-call-start events as two distinct tool calls", () => { + const body = [ + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + delta: { + message: { + tool_calls: { + id: "call_a", + type: "function", + function: { name: "func_a", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } }, + })}`, + "", + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + delta: { + message: { + tool_calls: { + id: "call_b", + type: "function", + function: { name: "func_b", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + delta: { message: { tool_calls: { function: { arguments: '{"y":2}' } } } }, + })}`, + "", + ].join("\n"); + + const result = collapseCohereSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + const names = result.toolCalls!.map((tc) => tc.name).sort(); + expect(names).toEqual(["func_a", "func_b"]); + }); +}); + +// --------------------------------------------------------------------------- +// 3. Gemini SSE tool args default — JSON.stringify(undefined) === undefined +// +// `JSON.stringify(fc.args)` returns the VALUE undefined when args is omitted, +// violating the ToolCall.arguments:string contract. Should be "{}". +// --------------------------------------------------------------------------- + +describe("collapseGeminiSSE functionCall with no args", () => { + it("defaults missing args to the JSON object string '{}'", () => { + const body = [ + `data: ${JSON.stringify({ + candidates: [{ content: { role: "model", parts: [{ functionCall: { name: "ping" } }] } }], + })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].arguments).toBe("{}"); + expect(typeof result.toolCalls![0].arguments).toBe("string"); + }); +}); + +// --------------------------------------------------------------------------- +// 4. Gemini SSE audio branch drops accumulated tool calls / content / reasoning +// +// When audioB64 is present the early return silently discards any tool calls, +// content, and reasoning accumulated earlier in the same stream. +// --------------------------------------------------------------------------- + +describe("collapseGeminiSSE audio branch preserves accumulated data", () => { + it("returns BOTH audio and a tool call when the stream has both", () => { + const body = [ + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [{ functionCall: { name: "get_weather", args: { city: "Paris" } } }], + }, + }, + ], + })}`, + "", + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [{ inlineData: { mimeType: "audio/pcm", data: "QUJD" } }], + }, + }, + ], + })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.audioB64).toBe("QUJD"); + expect(result.audioMimeType).toBe("audio/pcm"); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + }); + + it("returns BOTH audio and accumulated content + reasoning", () => { + const body = [ + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [{ text: "thinking", thought: true }, { text: "visible answer" }], + }, + }, + ], + })}`, + "", + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [{ inlineData: { mimeType: "audio/pcm", data: "WFla" } }], + }, + }, + ], + })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.audioB64).toBe("WFla"); + expect(result.content).toBe("visible answer"); + expect(result.reasoning).toBe("thinking"); + }); +}); + +// --------------------------------------------------------------------------- +// 5. SSE multi-line `data:` fields — only the first data: line per event read +// +// Per the SSE spec a single event may carry multiple `data:` lines that are +// joined with "\n" to form one payload. The collapsers `.find` only the first. +// --------------------------------------------------------------------------- + +/** + * Emit a single SSE event whose JSON payload is spread across MULTIPLE + * `data:` lines, the way a server splits a value at structural boundaries. + * + * Pretty-printing the object embeds newlines only between JSON tokens (where + * whitespace is legal), so prefixing each resulting line with `data:` and + * letting the collapser rejoin them with "\n" reconstructs valid JSON. This + * is the realistic multi-`data:` case; a mid-token split would be malformed + * SSE, not something a collapser should silently accept. + */ +function multiLineDataEvent(obj: unknown, eventLine?: string): string { + const dataLines = JSON.stringify(obj, null, 2) + .split("\n") + .map((l) => `data: ${l}`); + const parts = eventLine ? [eventLine, ...dataLines] : dataLines; + return parts.join("\n"); +} + +describe("multi-line SSE data fields", () => { + it("collapseOpenAISSE joins multiple data: lines into one JSON payload", () => { + const event = multiLineDataEvent({ choices: [{ delta: { content: "Hello multiline" } }] }); + const body = [event, "", "data: [DONE]", ""].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Hello multiline"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseAnthropicSSE joins multiple data: lines into one JSON payload", () => { + const event = multiLineDataEvent( + { index: 0, delta: { type: "text_delta", text: "Split text" } }, + "event: content_block_delta", + ); + const body = [event, ""].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("Split text"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseGeminiSSE joins multiple data: lines into one JSON payload", () => { + const event = multiLineDataEvent({ + candidates: [{ content: { parts: [{ text: "Gemini split" }] } }], + }); + const body = [event, ""].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.content).toBe("Gemini split"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseGeminiInteractionsSSE joins multiple data: lines into one JSON payload", () => { + const event = multiLineDataEvent({ + event_type: "content.delta", + index: 0, + delta: { type: "text", text: "Interactions split" }, + }); + const body = [event, ""].join("\n"); + + const result = collapseGeminiInteractionsSSE(body); + expect(result.content).toBe("Interactions split"); + expect(result.droppedChunks).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 6. webSearches dropped from collapseOpenAISSE tool-call return branch +// +// The text-only return includes `webSearches`, but the tool-call return branch +// omits it. A Responses-API stream with both a web_search_call AND a tool_call +// loses the web searches. +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE webSearches with tool calls", () => { + it("returns BOTH toolCalls and webSearches when the stream has both", () => { + const body = [ + `data: ${JSON.stringify({ + type: "response.output_item.done", + item: { type: "web_search_call", status: "completed", action: { query: "weather paris" } }, + })}`, + "", + `data: ${JSON.stringify({ + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_ws", + type: "function", + function: { name: "get_weather", arguments: '{"city":"Paris"}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.webSearches).toEqual(["weather paris"]); + }); +}); + +// --------------------------------------------------------------------------- +// 7. Anthropic SSE missing content_block index +// +// `const index = parsed.index as number` is unguarded. When two tool_use +// content_block_start events both OMIT `index`, they collapse under the single +// `undefined` key and merge into one tool call. The OpenAI/Cohere/Bedrock +// collapsers already guard this; Anthropic must too. +// --------------------------------------------------------------------------- + +describe("collapseAnthropicSSE missing content_block index", () => { + it("keeps two index-less tool_use blocks as two distinct tool calls", () => { + const body = [ + `event: content_block_start`, + `data: ${JSON.stringify({ + type: "content_block_start", + content_block: { type: "tool_use", id: "toolu_a", name: "func_a" }, + })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + delta: { type: "input_json_delta", partial_json: '{"x":1}' }, + })}`, + "", + `event: content_block_start`, + `data: ${JSON.stringify({ + type: "content_block_start", + content_block: { type: "tool_use", id: "toolu_b", name: "func_b" }, + })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + delta: { type: "input_json_delta", partial_json: '{"y":2}' }, + })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + const names = result.toolCalls!.map((tc) => tc.name).sort(); + expect(names).toEqual(["func_a", "func_b"]); + // Arguments must land on the block they followed, not cross-contaminate. + const byName = Object.fromEntries(result.toolCalls!.map((tc) => [tc.name, tc.arguments])); + expect(byName.func_a).toBe('{"x":1}'); + expect(byName.func_b).toBe('{"y":2}'); + }); +}); + +// --------------------------------------------------------------------------- +// 8. Cohere SSE mixed-key delta correlation + uncorrelated-delta accounting +// +// `lastSyntheticIndex` was only set for index-LESS starts, so a real-indexed +// start followed by an index-less delta fell back to a stale/undefined key and +// silently dropped the args. The most-recent start key must be tracked +// regardless of how it was keyed. And a delta that cannot correlate to any +// known start must increment droppedChunks rather than vanish. +// --------------------------------------------------------------------------- + +describe("collapseCohereSSE mixed-key delta correlation", () => { + it("lands an index-less delta on the most recent REAL-indexed start", () => { + const body = [ + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + index: 0, + delta: { + message: { + tool_calls: { + id: "call_a", + type: "function", + function: { name: "func_a", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } }, + })}`, + "", + ].join("\n"); + + const result = collapseCohereSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("func_a"); + expect(result.toolCalls![0].arguments).toBe('{"x":1}'); + }); + + it("counts an index-less delta with no prior start as a dropped chunk", () => { + const body = [ + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + delta: { message: { tool_calls: { function: { arguments: '{"orphan":true}' } } } }, + })}`, + "", + ].join("\n"); + + const result = collapseCohereSSE(body); + expect(result.toolCalls).toBeUndefined(); + expect(result.droppedChunks).toBe(1); + expect(result.firstDroppedSample).toBeDefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 9. CRLF-delimited SSE streams +// +// Real HTTP/SSE streams use CRLF (`\r\n`) line endings and `\r\n\r\n` between +// events. Splitting events on `\n\n` and data lines on `\n` leaves a trailing +// `\r` on each data line, so the final `data: [DONE]\r` mis-parses and earlier +// payloads carry a stray `\r`, corrupting JSON.parse. +// --------------------------------------------------------------------------- + +describe("CRLF-delimited SSE streams", () => { + it("collapseOpenAISSE parses a CRLF stream (content + [DONE])", () => { + const body = [ + `data: ${JSON.stringify({ choices: [{ delta: { content: "Hello CRLF" } }] })}`, + "", + "data: [DONE]", + "", + ].join("\r\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Hello CRLF"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseAnthropicSSE parses a multi-event CRLF stream", () => { + const body = [ + "event: content_block_delta", + `data: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "CRLF " } })}`, + "", + "event: content_block_delta", + `data: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "text" } })}`, + "", + ].join("\r\n"); + + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("CRLF text"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseGeminiSSE parses a multi-event CRLF stream", () => { + const body = [ + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Gemini " }] } }] })}`, + "", + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "CRLF" }] } }] })}`, + "", + ].join("\r\n"); + + const result = collapseGeminiSSE(body); + expect(result.content).toBe("Gemini CRLF"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseGeminiInteractionsSSE parses a multi-event CRLF stream", () => { + const body = [ + `data: ${JSON.stringify({ + event_type: "content.delta", + index: 0, + delta: { type: "text", text: "Interactions " }, + })}`, + "", + `data: ${JSON.stringify({ + event_type: "content.delta", + index: 0, + delta: { type: "text", text: "CRLF" }, + })}`, + "", + ].join("\r\n"); + + const result = collapseGeminiInteractionsSSE(body); + expect(result.content).toBe("Interactions CRLF"); + expect(result.droppedChunks).toBeUndefined(); + }); + + it("collapseCohereSSE parses a multi-event CRLF stream", () => { + const body = [ + "event: content-delta", + `data: ${JSON.stringify({ + type: "content-delta", + delta: { message: { content: { text: "Cohere " } } }, + })}`, + "", + "event: content-delta", + `data: ${JSON.stringify({ + type: "content-delta", + delta: { message: { content: { text: "CRLF" } } }, + })}`, + "", + ].join("\r\n"); + + const result = collapseCohereSSE(body); + expect(result.content).toBe("Cohere CRLF"); + expect(result.droppedChunks).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 10. Uncorrelated tool-arg deltas — Anthropic SSE + Bedrock EventStream +// +// The Cohere collapser already accounts for a tool-call-delta that has no +// correlating start as a dropped chunk (droppedChunks++ / firstDroppedSample). +// The Anthropic `input_json_delta` path and both Bedrock arg-delta paths +// (Messages `input_json_delta` and Converse `toolUse.input`) silently dropped +// the analogous uncorrelated delta. They must mirror Cohere. AND the Anthropic +// `lastSyntheticIndex` fallback must still let a real-indexed start correlate +// to a following index-less delta (positive coverage). +// --------------------------------------------------------------------------- + +describe("collapseAnthropicSSE uncorrelated input_json_delta", () => { + it("counts an input_json_delta with no correlating tool_use start as a dropped chunk", () => { + const body = [ + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + delta: { type: "input_json_delta", partial_json: '{"orphan":true}' }, + })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeUndefined(); + expect(result.droppedChunks).toBe(1); + // The sample carries the raw SSE payload (the orphan partial_json is + // JSON-escaped inside it), so assert the orphan token survives — stronger + // than the bare `.toBeDefined()` it would otherwise be. + expect(result.firstDroppedSample).toContain("no correlating tool_use start"); + expect(result.firstDroppedSample).toContain('orphan\\":true'); + }); + + it("lands an index-less delta on the most recent REAL-indexed tool_use start", () => { + const body = [ + `event: content_block_start`, + `data: ${JSON.stringify({ + type: "content_block_start", + index: 3, + content_block: { type: "tool_use", id: "toolu_real", name: "func_real" }, + })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + delta: { type: "input_json_delta", partial_json: '{"k":9}' }, + })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("func_real"); + expect(result.toolCalls![0].arguments).toBe('{"k":9}'); + expect(result.droppedChunks).toBeUndefined(); + }); +}); + +describe("collapseBedrockEventStream uncorrelated tool-arg deltas", () => { + it("counts a Messages input_json_delta with no correlating start as a dropped chunk", () => { + const frame = encodeEventStreamMessage("contentBlockDelta", { + type: "content_block_delta", + index: 0, + delta: { type: "input_json_delta", partial_json: '{"orphanMsg":1}' }, + }); + + const result = collapseBedrockEventStream(frame); + expect(result.toolCalls).toBeUndefined(); + expect(result.droppedChunks).toBe(1); + expect(result.firstDroppedSample).toContain("no correlating tool_use start"); + expect(result.firstDroppedSample).toContain('orphanMsg\\":1'); + }); + + it("counts a Converse toolUse.input with no correlating start as a dropped chunk", () => { + const frame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + delta: { toolUse: { input: '{"orphanConverse":2}' } }, + }, + }); + + const result = collapseBedrockEventStream(frame); + expect(result.toolCalls).toBeUndefined(); + expect(result.droppedChunks).toBe(1); + expect(result.firstDroppedSample).toContain("no correlating tool_use start"); + expect(result.firstDroppedSample).toContain('orphanConverse\\":2'); + }); +}); diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts index d9f27fb..8bd88e0 100644 --- a/src/__tests__/stream-collapse.test.ts +++ b/src/__tests__/stream-collapse.test.ts @@ -9,6 +9,7 @@ import { collapseStreamingResponse, } from "../stream-collapse.js"; import { encodeEventStreamMessage, encodeEventStreamFrame } from "../aws-event-stream.js"; +import { parseHarmonyContent } from "../harmony.js"; // --------------------------------------------------------------------------- // 1. OpenAI SSE @@ -1834,3 +1835,1293 @@ describe("collapseOpenAISSE with chat completions reasoning_content", () => { expect(result.content).toBe(""); }); }); + +// --------------------------------------------------------------------------- +// collapseOpenAISSE — OpenAI harmony channel tokens (open-weight gpt-oss) +// +// Open-weight gpt-oss models served via Ollama / vLLM / OpenRouter stream +// tool calls as RAW harmony channel tokens inside `delta.content`, not in +// `delta.tool_calls`. aimock must parse those channels so the recorded +// fixture captures a structured tool call instead of leaking the raw +// `to=functions...` marker as plain text content. +// +// Harmony grammar (authoritative, from OpenAI's harmony spec): +// <|channel|>analysis<|message|>...<|end|> -> reasoning +// <|start|>assistant<|channel|>commentary to=functions.NAME +// <|constrain|>json<|message|>{...args...}<|call|> -> tool call +// <|channel|>final<|message|>...<|return|> -> content +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE harmony channel tokens", () => { + it("parses a harmony tool call streamed as raw tokens inside delta.content", () => { + // The canonical harmony sequence, chunked across SSE deltas like a real + // open-weight stream would emit it (token boundaries fall mid-marker). + const harmonyChunks = [ + "<|channel|>analysis<|message|>Need to call the ", + "tool to render the card.<|end|>", + "<|start|>assistant<|channel|>commentary to=functions.generate_a2ui ", + '<|constrain|>json<|message|>{"component":"card",', + '"props":{"title":"Hi"}}<|call|>', + ]; + + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-harm", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + // Desired end state: a structured tool call is emitted... + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("generate_a2ui"); + expect(result.toolCalls![0].arguments).toBe('{"component":"card","props":{"title":"Hi"}}'); + + // ...the analysis channel becomes reasoning... + expect(result.reasoning).toBe("Need to call the tool to render the card."); + + // ...and NO harmony control tokens or routing markers leak into content. + const leak = result.content ?? ""; + expect(leak).not.toContain("<|channel|>"); + expect(leak).not.toContain("<|message|>"); + expect(leak).not.toContain("<|constrain|>"); + expect(leak).not.toContain("<|call|>"); + expect(leak).not.toContain("to=generate_a2ui"); + expect(leak).not.toContain("to=functions.generate_a2ui"); + expect(leak).not.toContain("functions.generate_a2ui"); + }); + + it("captures the final channel as content and analysis as reasoning", () => { + const harmonyChunks = [ + "<|channel|>analysis<|message|>The user said hello.<|end|>", + "<|start|>assistant<|channel|>final<|message|>Hello! How can ", + "I help you today?<|return|>", + ]; + + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-harm2", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.content).toBe("Hello! How can I help you today?"); + expect(result.reasoning).toBe("The user said hello."); + expect(result.toolCalls).toBeUndefined(); + expect(result.content).not.toContain("<|channel|>"); + expect(result.content).not.toContain("<|return|>"); + }); + + it("parses multiple interleaved harmony tool calls", () => { + const harmonyChunks = [ + "<|channel|>analysis<|message|>Call two tools.<|end|>", + "<|start|>assistant<|channel|>commentary to=functions.first ", + '<|constrain|>json<|message|>{"a":1}<|call|>', + "<|start|>assistant<|channel|>commentary to=functions.second ", + '<|constrain|>json<|message|>{"b":2}<|call|>', + ]; + + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-harm3", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0].name).toBe("first"); + expect(result.toolCalls![0].arguments).toBe('{"a":1}'); + expect(result.toolCalls![1].name).toBe("second"); + expect(result.toolCalls![1].arguments).toBe('{"b":2}'); + expect(result.content ?? "").not.toContain("functions.first"); + }); + + it("is a no-op for normal (non-harmony) structured streams", () => { + // A plain text stream with no harmony control tokens must be untouched. + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-plain", choices: [{ delta: { content: "Just " } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-plain", choices: [{ delta: { content: "text." } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Just text."); + expect(result.toolCalls).toBeUndefined(); + expect(result.reasoning).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// collapseOpenAISSE harmony channel — fail-safe, token-aware, observable +// +// Regression coverage for the CR findings on the harmony parser. The shared +// root cause was a naive indexOf-scan over detokenized text that could +// DESTROY valid hosted-OpenAI answers (mere prose mention of a token), +// TRUNCATE tool-call JSON containing a literal token substring, DROP +// pre-channel / trailing-message text, and MISROUTE analysis-channel +// recipients into tool calls — all silently. The fix makes parsing fail-safe +// (return original content on any incomplete/invalid structure), terminate +// json bodies at their spec terminator with JSON validation, anchor +// pre/trailing text, gate recipient routing to the commentary channel, and +// surface drops/truncations via droppedChunks/truncated. +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE harmony fail-safe + token-aware", () => { + // A1 — content destruction: a hosted/structured answer that merely MENTIONS + // the token must NOT be mangled into empty content. + it("preserves a final answer that merely mentions <|channel|> as prose (no destruction)", () => { + const prose = "The special token is <|channel|> and it routes model output to channels."; + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-prose", choices: [{ delta: { content: prose } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + // The whole answer must survive verbatim — never collapsed to "". + expect(result.content).toBe(prose); + expect(result.toolCalls).toBeUndefined(); + expect(result.reasoning).toBeUndefined(); + }); + + // A1 — explicit no-op guard: a genuinely structured tool_calls stream whose + // text content happens to mention the token must keep its content verbatim + // and fabricate NO harmony tool calls / reasoning. + it("is a no-op for a structured tool_calls stream whose content mentions <|channel|> as prose", () => { + const prose = "I will call a tool. Note: <|channel|> is a harmony marker."; + const body = [ + `data: ${JSON.stringify({ + id: "chatcmpl-struct", + choices: [ + { + delta: { + content: prose, + tool_calls: [ + { + index: 0, + id: "call_1", + function: { name: "get_weather", arguments: '{"city":"SF"}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + // Content preserved verbatim; only the genuine structured tool call present. + expect(result.content).toBe(prose); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}'); + }); + + // A2 — body termination: tool-call args JSON containing a literal token + // substring must NOT be truncated to invalid JSON. + it("does not truncate tool-call args JSON containing a literal token substring", () => { + // The args contain the text "<|call|>" inside a JSON string value — the + // body must run to the REAL <|call|> terminator, not the embedded one. + const harmonyChunks = [ + "<|start|>assistant<|channel|>commentary to=functions.say ", + '<|constrain|>json<|message|>{"text":"say <|call|> now"}<|call|>', + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-emb", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("say"); + // Args must be the COMPLETE, valid JSON — not cut at the embedded token. + expect(result.toolCalls![0].arguments).toBe('{"text":"say <|call|> now"}'); + expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow(); + }); + + // A2 — channel/start must not truncate an open json body. + it("does not let <|channel|> inside tool-call args truncate the JSON body", () => { + const harmonyChunks = [ + "<|start|>assistant<|channel|>commentary to=functions.render ", + '<|constrain|>json<|message|>{"markup":"<|channel|> tag in a2ui"}<|call|>', + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-emb2", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].arguments).toBe('{"markup":"<|channel|> tag in a2ui"}'); + expect(() => JSON.parse(result.toolCalls![0].arguments)).not.toThrow(); + }); + + // A3 — anchoring: text BEFORE the first <|channel|> must be captured. + it("captures pre-channel text as content", () => { + const harmonyChunks = [ + "Here is a preamble. ", + "<|channel|>final<|message|>The answer.<|return|>", + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-pre", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.content).toBe("Here is a preamble. The answer."); + expect(result.content).not.toContain("<|channel|>"); + }); + + // A3 — anchoring: a trailing <|start|>assistant<|message|> message that has + // NO <|channel|> (final-answer-after-tool-call) must be captured. + it("captures a trailing <|start|>...<|message|> final message that lacks <|channel|>", () => { + const harmonyChunks = [ + "<|start|>assistant<|channel|>commentary to=functions.lookup ", + '<|constrain|>json<|message|>{"q":"weather"}<|call|>', + "<|start|>assistant<|message|>The weather is sunny.<|return|>", + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-trail", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("lookup"); + expect(result.content).toBe("The weather is sunny."); + expect(result.content).not.toContain("<|start|>"); + expect(result.content).not.toContain("<|message|>"); + }); + + // A5 — recipient routing: recipient on the ROLE line (before <|channel|>) + // must be recognized as a commentary tool call. + it("recognizes a recipient placed on the role segment before <|channel|>", () => { + const harmonyChunks = [ + "<|start|>assistant to=functions.role_placed<|channel|>commentary ", + '<|constrain|>json<|message|>{"x":1}<|call|>', + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-role", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("role_placed"); + expect(result.toolCalls![0].arguments).toBe('{"x":1}'); + }); + + // A5 — recipient routing: an analysis-channel header carrying a recipient + // must NOT be misrouted into a tool call (only commentary routes). + it("does not route an analysis-channel recipient into a tool call", () => { + const harmonyChunks = [ + "<|channel|>analysis to=functions.should_not_fire<|message|>Thinking about it.<|end|>", + "<|start|>assistant<|channel|>final<|message|>Done.<|return|>", + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-an", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + // No tool call fabricated from the analysis channel. + expect(result.toolCalls).toBeUndefined(); + expect(result.reasoning).toBe("Thinking about it."); + expect(result.content).toBe("Done."); + }); + + // A2/A5 edge: a <|constrain|>json marker on a NON-tool channel (no + // commentary recipient) must NOT trigger <|call|>-terminated parsing and + // swallow the following final message — it ends at its own <|end|>. + it("does not let a non-tool-call <|constrain|>json body swallow the next message", () => { + const harmonyChunks = [ + "<|channel|>analysis<|constrain|>json<|message|>Thinking.<|end|>", + "<|start|>assistant<|channel|>final<|message|>The final answer.<|return|>", + ]; + const body = [ + ...harmonyChunks.flatMap((chunk) => [ + `data: ${JSON.stringify({ id: "chatcmpl-cj", choices: [{ delta: { content: chunk } }] })}`, + "", + ]), + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.reasoning).toBe("Thinking."); + expect(result.content).toBe("The final answer."); + expect(result.toolCalls).toBeUndefined(); + }); + + // A6 — observability: a malformed harmony structure must fail SAFE (content + // preserved VERBATIM). Because the bytes are NOT lost, this is NOT transport + // loss: it surfaces via the distinct `harmonyUnparsed` signal, NOT + // droppedChunks/truncated (those are reserved for genuine transport loss). + it("surfaces a malformed harmony structure via harmonyUnparsed (not droppedChunks/truncated)", () => { + // A <|channel|> + <|message|> opener whose tool-call body never yields + // valid JSON (no terminator, no closing brace) — unparseable. + const broken = + "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid json"; + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-broken", choices: [{ delta: { content: broken } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + // Fail-safe: original content preserved verbatim, no fabricated/empty loss. + expect(result.content).toBe(broken); + expect(result.toolCalls).toBeUndefined(); + // Distinct signal — NOT a dropped/truncated chunk. + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// collapseOpenAISSE — A4: reasoning + webSearches parity on the tool-call +// return branch (non-harmony structured streams). +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE reasoning/webSearches parity with tool calls", () => { + // A4 — a NON-harmony structured stream with delta.reasoning_content + + // tool_calls must preserve reasoning (DeepSeek / OpenRouter shape). + it("preserves reasoning_content alongside structured tool_calls (no harmony)", () => { + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-r", choices: [{ delta: { reasoning_content: "Let me think. " } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-r", choices: [{ delta: { reasoning_content: "I will call a tool." } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-r", choices: [{ delta: { tool_calls: [{ index: 0, id: "call_a", function: { name: "get_weather", arguments: '{"city":"SF"}' } }] } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + // reasoning must survive even though this is NOT a harmony stream. + expect(result.reasoning).toBe("Let me think. I will call a tool."); + }); + + // A4 — webSearches parity: a Responses-API web_search_call followed by a + // tool call must surface webSearches on the tool-call return branch too. + it("preserves webSearches alongside tool calls", () => { + const body = [ + `data: ${JSON.stringify({ type: "response.output_item.done", item: { type: "web_search_call", action: { query: "weather SF" } } })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-ws", choices: [{ delta: { tool_calls: [{ index: 0, id: "call_w", function: { name: "get_weather", arguments: "{}" } }] } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.webSearches).toEqual(["weather SF"]); + }); +}); + +// --------------------------------------------------------------------------- +// collapseOllamaNDJSON — A7: harmony parsing parity for gpt-oss over Ollama +// (NDJSON). gpt-oss served via Ollama streams harmony tokens inside +// message.content; without parsing they leak as raw text. +// --------------------------------------------------------------------------- + +describe("collapseOllamaNDJSON harmony channel tokens", () => { + it("parses a harmony tool call streamed as raw tokens inside message.content", () => { + const harmonyChunks = [ + "<|channel|>analysis<|message|>Need to render a card.<|end|>", + "<|start|>assistant<|channel|>commentary to=functions.generate_a2ui ", + '<|constrain|>json<|message|>{"component":"card","props":{"title":"Hi"}}<|call|>', + ]; + const body = [ + ...harmonyChunks.map((chunk) => + JSON.stringify({ + model: "gpt-oss", + message: { role: "assistant", content: chunk }, + done: false, + }), + ), + JSON.stringify({ model: "gpt-oss", message: { role: "assistant", content: "" }, done: true }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("generate_a2ui"); + expect(result.toolCalls![0].arguments).toBe('{"component":"card","props":{"title":"Hi"}}'); + expect(result.reasoning).toBe("Need to render a card."); + const leak = result.content ?? ""; + expect(leak).not.toContain("<|channel|>"); + expect(leak).not.toContain("to=functions.generate_a2ui"); + }); + + it("is a no-op for normal (non-harmony) Ollama content", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "Just " }, + done: false, + }), + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "text." }, + done: true, + }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.content).toBe("Just text."); + expect(result.toolCalls).toBeUndefined(); + }); + + // A7 — pre-existing bug: JSON.stringify(undefined arguments) yields the + // literal string "undefined". Must default to "{}". + it("defaults arguments to {} when a structured tool_call omits arguments", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "", + tool_calls: [{ function: { name: "no_args" } }], + }, + done: false, + }), + JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("no_args"); + expect(result.toolCalls![0].arguments).toBe("{}"); + }); +}); + +// --------------------------------------------------------------------------- +// collapseOpenAISSE — multibyte content across SSE event boundaries +// +// collapseOpenAISSE receives the FULL concatenated response body (decoded +// once from the complete buffer at the recorder layer), so multibyte UTF-8 +// in delta.content is already safe regardless of how deltas were chunked. +// This test pins that guarantee: CJK + emoji split across deltas must +// round-trip with no U+FFFD replacement characters. +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE multibyte content", () => { + it("preserves CJK and emoji content chunked across SSE events", () => { + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content: "访问 " } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content: "官网群" } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content: " 🎉" } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("访问 官网群 🎉"); + expect(result.content).not.toContain("�"); + }); +}); + +// =========================================================================== +// Harmony lexer + state-machine rewrite — STRUCTURAL acceptance matrix +// +// The harmony parser is a two-phase lexer + state machine (src/harmony.ts): +// Phase 1 lexes the accumulated content into an ordered CONTROL/TEXT token +// stream (bytes consumed into a TEXT span are never re-scanned for control +// tokens); Phase 2 walks the stream against the harmony grammar with UNIFORM +// all-or-nothing fail-safe semantics. +// +// Contract under test: +// - GATE / FAIL-SAFE: any grammar deviation returns the ORIGINAL raw input +// verbatim with no toolCalls/reasoning. +// - OBSERVABILITY: a harmony failure preserves bytes verbatim, so it is NOT +// transport loss — it sets `harmonyUnparsed`, never droppedChunks/truncated. +// - WHITESPACE: inter-message / trailing whitespace-only TEXT is absorbed. +// - NON-TOOL EMBEDDED TOKENS: literal token substrings inside a body do not +// truncate it; the body runs to its real terminator. +// - FALLBACK-ONLY wiring: harmony is attempted ONLY when there are no +// structured delta.tool_calls; structured calls always win and harmony +// content is then treated as prose (no phantom, no truncated stamp). +// - ROUTING: analysis->reasoning, final->content, commentary+recipient->tool, +// commentary-without-recipient->content. Recipient identifiers only. +// =========================================================================== + +/** Build an OpenAI SSE body from a list of delta objects (matches idioms). */ +function openAISSEBody(deltas: Array>, id = "chatcmpl-mtx"): string { + return [ + ...deltas.flatMap((delta) => [`data: ${JSON.stringify({ id, choices: [{ delta }] })}`, ""]), + "data: [DONE]", + "", + ].join("\n"); +} + +/** Build an OpenAI SSE body whose content chunks carry harmony tokens. */ +function openAIHarmonyBody(chunks: string[], id = "chatcmpl-mtx"): string { + return openAISSEBody( + chunks.map((content) => ({ content })), + id, + ); +} + +/** Build an Ollama /api/chat NDJSON body whose message.content carries chunks. */ +function ollamaHarmonyBody(chunks: string[], model = "gpt-oss"): string { + return [ + ...chunks.map((content) => + JSON.stringify({ model, message: { role: "assistant", content }, done: false }), + ), + JSON.stringify({ model, message: { role: "assistant", content: "" }, done: true }), + ].join("\n"); +} + +describe("harmony rewrite — GATE / FAIL-SAFE (verbatim no-op on non-structure)", () => { + // (1)* prose mentioning <|channel|>/<|message|> as inline code -> content + // VERBATIM, no toolCalls/reasoning, no truncated/droppedChunks. + it("(1) prose mentioning the tokens is content verbatim (no destruction)", () => { + const prose = + "Harmony uses `<|channel|>` to pick a channel and `<|message|>` to start the body"; + const result = collapseOpenAISSE(openAIHarmonyBody([prose])); + expect(result.content).toBe(prose); + expect(result.toolCalls).toBeUndefined(); + expect(result.reasoning).toBeUndefined(); + expect(result.truncated).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + + // Direct parser unit check: a prose-only mention has no Message -> failed. + const direct = parseHarmonyContent(prose); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(prose); + expect(direct.toolCalls).toEqual([]); + expect(direct.reasoning).toBe(""); + }); + + // (2) tokens in reverse order -> verbatim no-op. The cheap `isHarmonyContent` + // gate requires channel-then-message (or start-then-message) ordering, so a + // reversed stream does not even trip the gate: the collapse path leaves the + // content verbatim and never sets harmonyUnparsed. The parser itself, when + // called directly, still fails-safe on the reversed structure. + it("(2) reversed token order is a verbatim no-op", () => { + const reversed = "<|message|>body<|channel|>analysis"; + const result = collapseOpenAISSE(openAIHarmonyBody([reversed])); + expect(result.content).toBe(reversed); + expect(result.toolCalls).toBeUndefined(); + expect(result.harmonyUnparsed).toBeUndefined(); + expect(result.truncated).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + + const direct = parseHarmonyContent(reversed); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(reversed); + }); + + // (3) prose containing every literal token but no valid Message -> verbatim, + // accurate harmonyUnparsed signal. + it("(3) every literal token but no valid message -> verbatim + harmonyUnparsed", () => { + const prose = + "tokens: <|start|> <|end|> <|return|> <|call|> <|channel|> <|message|> <|constrain|> (all as prose)"; + const result = collapseOpenAISSE(openAIHarmonyBody([prose])); + expect(result.content).toBe(prose); + expect(result.toolCalls).toBeUndefined(); + expect(result.reasoning).toBeUndefined(); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // (4) empty / whitespace-only -> unchanged no-op. + it("(4) empty and whitespace-only inputs are unchanged no-ops", () => { + const empty = collapseOpenAISSE(openAIHarmonyBody([""])); + expect(empty.content).toBe(""); + expect(empty.toolCalls).toBeUndefined(); + expect(empty.harmonyUnparsed).toBeUndefined(); + + const ws = collapseOpenAISSE(openAIHarmonyBody([" \n "])); + expect(ws.content).toBe(" \n "); + expect(ws.toolCalls).toBeUndefined(); + expect(ws.harmonyUnparsed).toBeUndefined(); + + // Direct: empty/whitespace are not harmony at all; parser returns failed + // (no message) with content preserved. + expect(parseHarmonyContent("").content).toBe(""); + expect(parseHarmonyContent(" ").content).toBe(" "); + }); +}); + +describe("harmony rewrite — WHITESPACE (the masked class)", () => { + // (5)* analysis<|end|> + "\n" + <|start|>...final<|return|> -> reasoning + + // content parsed, no leak of the inter-message newline. + it("(5) newline between analysis<|end|> and the next <|start|> is absorbed", () => { + const chunks = [ + "<|channel|>analysis<|message|>Thinking it through.<|end|>", + "\n", + "<|start|>assistant<|channel|>final<|message|>The answer.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.reasoning).toBe("Thinking it through."); + expect(result.content).toBe("The answer."); + expect(result.toolCalls).toBeUndefined(); + expect(result.content).not.toContain("\n"); + expect(result.content).not.toContain("<|"); + }); + + // (6) single space between two commentary tool calls -> 2 toolCalls. + it("(6) a single space between two commentary tool calls yields 2 tool calls", () => { + const chunks = [ + '<|start|>assistant<|channel|>commentary to=functions.first <|constrain|>json<|message|>{"a":1}<|call|>', + " ", + '<|start|>assistant<|channel|>commentary to=functions.second <|constrain|>json<|message|>{"b":2}<|call|>', + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0]).toEqual({ name: "first", arguments: '{"a":1}' }); + expect(result.toolCalls![1]).toEqual({ name: "second", arguments: '{"b":2}' }); + }); + + // (7)* trailing "\n" after the final <|return|> -> parsed, newline absorbed. + it("(7) trailing newline after the final <|return|> is absorbed, not leaked", () => { + const chunks = ["<|channel|>final<|message|>All done.<|return|>", "\n"]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.content).toBe("All done."); + expect(result.toolCalls).toBeUndefined(); + expect(result.content).not.toContain("\n"); + + const direct = parseHarmonyContent("<|channel|>final<|message|>All done.<|return|>\n"); + expect(direct.failed).toBe(false); + expect(direct.content).toBe("All done."); + }); + + // (8) leading whitespace before the first <|channel|> -> absorbed (blank + // leading text is not content). + it("(8) leading whitespace before the first <|channel|> is absorbed", () => { + const direct = parseHarmonyContent(" \n <|channel|>final<|message|>Hi.<|return|>"); + expect(direct.failed).toBe(false); + expect(direct.content).toBe("Hi."); + expect(direct.reasoning).toBe(""); + }); + + // (9) mixed " \n " between three messages -> all parsed. + it("(9) mixed whitespace between three messages is absorbed; all parse", () => { + const chunks = [ + "<|channel|>analysis<|message|>Reason.<|end|>", + " \n ", + '<|start|>assistant<|channel|>commentary to=functions.tool <|constrain|>json<|message|>{"k":1}<|call|>', + " ", + "<|start|>assistant<|channel|>final<|message|>Answer.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.reasoning).toBe("Reason."); + expect(result.content).toBe("Answer."); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0]).toEqual({ name: "tool", arguments: '{"k":1}' }); + }); +}); + +describe("harmony rewrite — NON-TOOL EMBEDDED TOKENS", () => { + // (10)* final body quoting <|end|>/<|return|> as prose, terminated by a real + // <|return|> -> content = full sentence (NOT truncated to "See "). + it("(10) final body quoting <|end|>/<|return|> keeps the full sentence", () => { + const body = "See `<|end|>` for the end token and `<|return|>` too."; + const raw = `<|channel|>final<|message|>${body}<|return|>`; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe(body); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(body); + expect(result.toolCalls).toBeUndefined(); + }); + + // (11) analysis body quoting <|call|>/<|start|> as prose, terminated by + // <|end|> -> reasoning = full body. + it("(11) analysis body quoting <|call|>/<|start|> keeps the full body", () => { + const body = "Consider the `<|call|>` and `<|start|>` markers carefully."; + const raw = `<|channel|>analysis<|message|>${body}<|end|><|start|>assistant<|channel|>final<|message|>Done.<|return|>`; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.reasoning).toBe(body); + expect(direct.content).toBe("Done."); + }); + + // (12) commentary-preamble (no recipient) body quoting <|end|> -> content, + // full body. + it("(12) commentary preamble (no recipient) quoting <|end|> keeps full content", () => { + const body = "Let me explain `<|end|>` before answering."; + const raw = `<|channel|>commentary<|message|>${body}<|end|><|start|>assistant<|channel|>final<|message|>Answer.<|return|>`; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe(`${body}Answer.`); + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony rewrite — TOOL BODY (keep green)", () => { + // (13) args {"text":"say <|call|> now"}<|call|> -> 1 toolCall, exact args. + it("(13) embedded <|call|> inside tool args does not truncate the JSON", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.say<|constrain|>json<|message|>{"text":"say <|call|> now"}<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0]).toEqual({ + name: "say", + arguments: '{"text":"say <|call|> now"}', + }); + expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow(); + }); + + // (14) args containing <|channel|>/<|message|> substrings -> exact args. + it("(14) embedded <|channel|>/<|message|> inside tool args are preserved exactly", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.render<|constrain|>json<|message|>{"markup":"<|channel|> and <|message|> tags"}<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0].arguments).toBe('{"markup":"<|channel|> and <|message|> tags"}'); + expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow(); + }); + + // (15) args invalid JSON, no terminator -> fail-safe verbatim + signal. + it("(15) invalid-JSON tool body with no terminator fails safe", () => { + const raw = + "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid json"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + // (16) args valid JSON but terminated by <|end|> not <|call|> -> fail-safe. + it("(16) valid-JSON tool body terminated by <|end|> (not <|call|>) fails safe", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.x<|constrain|>json<|message|>{"a":1}<|end|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony rewrite — DUAL-SOURCE (fallback-only wiring)", () => { + // (17)* structured delta.tool_calls + content prose mentioning tokens -> + // content verbatim, exactly the structured toolCall(s), NO phantom, NO + // truncated/droppedChunks. + it("(17) structured tool_calls + prose mentioning tokens: only structured call, content verbatim", () => { + const prose = "I will call a tool. Note `<|channel|>` and `<|message|>` are harmony markers."; + const body = openAISSEBody([ + { + content: prose, + tool_calls: [ + { index: 0, id: "call_1", function: { name: "get_weather", arguments: '{"city":"SF"}' } }, + ], + }, + ]); + const result = collapseOpenAISSE(body); + expect(result.content).toBe(prose); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}'); + expect(result.truncated).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.harmonyUnparsed).toBeUndefined(); + }); + + // (18) structured tool_calls + content that IS well-formed harmony tool + // tokens -> only structured calls win (fallback-only), count == structured. + it("(18) structured tool_calls win over well-formed harmony content (fallback-only)", () => { + const harmony = + '<|start|>assistant<|channel|>commentary to=functions.harmony_tool<|constrain|>json<|message|>{"z":9}<|call|>'; + const body = openAISSEBody([ + { + content: harmony, + tool_calls: [ + { index: 0, id: "call_s", function: { name: "structured_tool", arguments: '{"s":1}' } }, + ], + }, + ]); + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("structured_tool"); + // The harmony content is treated as prose (never parsed into a 2nd call). + expect(result.toolCalls!.some((tc) => tc.name === "harmony_tool")).toBe(false); + }); + + // (19) structured tool_calls + harmony-looking content that would FAIL to + // parse -> no truncated stamp (content is prose, not a harmony failure). + it("(19) structured tool_calls + unparseable harmony content: no truncated stamp", () => { + const broken = + "<|start|>assistant<|channel|>commentary to=functions.broken<|message|>{not valid"; + const body = openAISSEBody([ + { + content: broken, + tool_calls: [ + { index: 0, id: "call_s", function: { name: "structured_tool", arguments: "{}" } }, + ], + }, + ]); + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("structured_tool"); + expect(result.truncated).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.harmonyUnparsed).toBeUndefined(); + // Content is preserved as-is (prose), never collapsed. + expect(result.content).toBe(broken); + }); +}); + +describe("harmony rewrite — MULTI-MESSAGE REALISTIC", () => { + // (20) analysis->reasoning, "\n", commentary toolCall, "\n", final->content. + it("(20) analysis + commentary tool + final with separators: all correct, zero leak", () => { + const chunks = [ + "<|channel|>analysis<|message|>Plan the call.<|end|>", + "\n", + '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"x"}<|call|>', + "\n", + "<|start|>assistant<|channel|>final<|message|>Here is the result.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.reasoning).toBe("Plan the call."); + expect(result.content).toBe("Here is the result."); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' }); + expect(result.content).not.toContain("<|"); + expect(result.content).not.toContain("\n"); + }); + + // (21) analysis + final only with separators (no tool) -> reasoning+content. + it("(21) analysis + final only (no tool) -> reasoning + content, no toolCalls", () => { + const chunks = [ + "<|channel|>analysis<|message|>Just reasoning.<|end|>", + " \n ", + "<|start|>assistant<|channel|>final<|message|>Just the answer.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.reasoning).toBe("Just reasoning."); + expect(result.content).toBe("Just the answer."); + expect(result.toolCalls).toBeUndefined(); + }); + + // (22) commentary preamble + commentary toolCall -> preamble->content, 1 call. + it("(22) commentary preamble + commentary tool call: preamble is content, 1 tool call", () => { + const chunks = [ + "<|channel|>commentary<|message|>Let me look that up for you.<|end|>", + '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"y"}<|call|>', + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.content).toBe("Let me look that up for you."); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"y"}' }); + }); +}); + +describe("harmony rewrite — ROUTING EDGES (keep green)", () => { + // (23) analysis header carrying to=functions.x -> NOT a tool call. + it("(23) analysis-channel recipient is NOT a tool call (reasoning only)", () => { + const chunks = [ + "<|channel|>analysis to=functions.should_not_fire<|message|>Thinking.<|end|>", + "<|start|>assistant<|channel|>final<|message|>Done.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.toolCalls).toBeUndefined(); + expect(result.reasoning).toBe("Thinking."); + expect(result.content).toBe("Done."); + }); + + // (24) recipient on the role segment before <|channel|>commentary -> toolCall. + it("(24) recipient on the role segment before <|channel|>commentary -> tool call", () => { + const raw = + '<|start|>assistant to=functions.role_placed<|channel|>commentary<|constrain|>json<|message|>{"x":1}<|call|>'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0]).toEqual({ name: "role_placed", arguments: '{"x":1}' }); + }); + + // (25) <|constrain|>json on analysis does NOT trigger <|call|>-termination; + // body ends at <|end|>. + it("(25) <|constrain|>json on analysis does not trigger call-termination", () => { + const chunks = [ + "<|channel|>analysis<|constrain|>json<|message|>Thinking.<|end|>", + "<|start|>assistant<|channel|>final<|message|>The final answer.<|return|>", + ]; + const result = collapseOpenAISSE(openAIHarmonyBody(chunks)); + expect(result.reasoning).toBe("Thinking."); + expect(result.content).toBe("The final answer."); + expect(result.toolCalls).toBeUndefined(); + }); + + // (26) RECIPIENT_RE: to=functions.- or to=functions. -> NOT a recipient -> + // non-tool body (no {name:"-"} call). A commentary message without a valid + // recipient is a preamble -> content. + it("(26) to=functions.- / to=functions. are not recipients (no bogus tool call)", () => { + const dash = + "<|start|>assistant<|channel|>commentary to=functions.-<|message|>preamble dash<|end|>"; + const directDash = parseHarmonyContent(dash); + expect(directDash.failed).toBe(false); + expect(directDash.toolCalls).toEqual([]); + expect(directDash.content).toBe("preamble dash"); + + const empty = + "<|start|>assistant<|channel|>commentary to=functions.<|message|>preamble empty<|end|>"; + const directEmpty = parseHarmonyContent(empty); + expect(directEmpty.failed).toBe(false); + expect(directEmpty.toolCalls).toEqual([]); + expect(directEmpty.content).toBe("preamble empty"); + }); +}); + +describe("harmony rewrite — UNTERMINATED / MALFORMED", () => { + // (27) commentary to=functions.x message {"a":1} with NO <|call|> -> fail-safe + // verbatim + signal. + it("(27) commentary tool body with valid JSON but no <|call|> fails safe", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.x<|constrain|>json<|message|>{"a":1}'; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + }); + + // (28) <|channel|> with no following <|message|> -> fail-safe verbatim. + it("(28) <|channel|> with no following <|message|> fails safe", () => { + const raw = "<|channel|>analysis no message here"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + }); + + // (29) <|start|>assistant with no channel/message -> fail-safe verbatim. + it("(29) <|start|>assistant with no channel/message fails safe", () => { + const raw = "<|start|>assistant"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + }); +}); + +describe("harmony rewrite — OBSERVABILITY / ENCODING", () => { + // (30) a firstDroppedSample-style 200-unit sample whose boundary splits a + // surrogate pair must be valid UTF-16 (no lone surrogate). This pins the + // surrogate-safe slicing of diagnostic samples. + it("(30) a 200-unit diagnostic sample never ends on a lone surrogate", () => { + // The diagnostic note slices the FULL content (which begins with the + // harmony prefix), not just the filler — so the emoji must be positioned + // relative to the prefix length to land its HIGH surrogate exactly at the + // 200-unit slice boundary (UTF-16 index 199). Otherwise the slice boundary + // never splits the pair and the surrogate-trim branch is never exercised. + const prefix = "<|start|>assistant<|channel|>commentary to=functions.s<|message|>{bad "; + // filler length = 199 - prefix.length puts the emoji's high surrogate at + // UTF-16 index 199 and its low surrogate at index 200. + const filler = "x".repeat(199 - prefix.length); + const content = `${prefix}${filler}😀 trailing`; + + // Sanity: the raw (un-trimmed) 200-unit slice MUST end on a lone high + // surrogate, proving this test actually exercises the trim branch and is + // not trivially green. + const rawSlice = content.slice(0, 200); + const rawLast = rawSlice.charCodeAt(rawSlice.length - 1); + expect(rawLast >= 0xd800 && rawLast <= 0xdbff).toBe(true); + + const result = collapseOpenAISSE(openAIHarmonyBody([content])); + // Harmony failed -> content preserved verbatim, signal set. + expect(result.content).toBe(content); + expect(result.harmonyUnparsed).toBe(true); + + // The diagnostic note is always present alongside harmonyUnparsed, and its + // surrogate-safe slice must contain NO lone surrogate code unit anywhere. + expect(result.harmonyNote).toBeDefined(); + const note = result.harmonyNote!; + for (let k = 0; k < note.length; k++) { + const unit = note.charCodeAt(k); + if (unit >= 0xd800 && unit <= 0xdbff) { + // High surrogate: the next unit MUST be a low surrogate. + const next = note.charCodeAt(k + 1); + expect(next >= 0xdc00 && next <= 0xdfff).toBe(true); + k++; // skip the paired low surrogate + } else if (unit >= 0xdc00 && unit <= 0xdfff) { + // A low surrogate not preceded by a high surrogate is unpaired. + throw new Error(`lone low surrogate at index ${k} in harmonyNote`); + } + } + }); +}); + +describe("harmony rewrite — Ollama NDJSON parity", () => { + // Fallback-only + fail-safe + whitespace parity for the Ollama path. + it("parses analysis + commentary tool + final over Ollama with separators", () => { + const chunks = [ + "<|channel|>analysis<|message|>Plan.<|end|>", + "\n", + '<|start|>assistant<|channel|>commentary to=functions.lookup<|constrain|>json<|message|>{"q":"x"}<|call|>', + "\n", + "<|start|>assistant<|channel|>final<|message|>Result.<|return|>", + ]; + const result = collapseOllamaNDJSON(ollamaHarmonyBody(chunks)); + expect(result.reasoning).toBe("Plan."); + expect(result.content).toBe("Result."); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0]).toEqual({ name: "lookup", arguments: '{"q":"x"}' }); + }); + + it("prose mention over Ollama is content verbatim (no destruction)", () => { + const prose = "Harmony uses `<|channel|>` then `<|message|>` for the body"; + const result = collapseOllamaNDJSON(ollamaHarmonyBody([prose])); + expect(result.content).toBe(prose); + expect(result.toolCalls).toBeUndefined(); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + it("structured Ollama tool_calls win over harmony content (fallback-only)", () => { + const harmony = + '<|start|>assistant<|channel|>commentary to=functions.harmony_tool<|constrain|>json<|message|>{"z":9}<|call|>'; + const body = [ + JSON.stringify({ + model: "gpt-oss", + message: { + role: "assistant", + content: harmony, + tool_calls: [{ function: { name: "structured_tool", arguments: '{"s":1}' } }], + }, + done: false, + }), + JSON.stringify({ model: "gpt-oss", message: { role: "assistant", content: "" }, done: true }), + ].join("\n"); + const result = collapseOllamaNDJSON(body); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("structured_tool"); + expect(result.toolCalls!.some((tc) => tc.name === "harmony_tool")).toBe(false); + expect(result.truncated).toBeUndefined(); + }); + + it("unparseable harmony over Ollama fails safe via harmonyUnparsed", () => { + const raw = + "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid"; + const result = collapseOllamaNDJSON(ollamaHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); +}); + +// =========================================================================== +// Harmony fail-safe UNIFORMITY at body boundaries — regressions for the three +// holes a 7-agent review found in the documented all-or-nothing contract: +// "any grammar deviation -> return ORIGINAL content verbatim, failed:true, +// never silently strip/mangle; never leak a control token into content/ +// reasoning." Each test below pins a boundary case where the old code accepted +// (failed:false) while leaking a control token or dropping data. +// =========================================================================== + +describe("harmony fail-safe — body terminator followed by trailing junk (B-A1)", () => { + // A real terminator followed by NON-whitespace text that is NOT a real + // message start is a grammar deviation: the stream neither cleanly ends nor + // continues with another message. The OLD code absorbed the terminator + // literal into the body and kept scanning to EOF, leaking "<|return|> junk" + // into content. Correct behavior: uniform fail-safe (verbatim + failed:true). + it("final<|return|> followed by trailing junk fails safe (no token leak)", () => { + const raw = "<|channel|>final<|message|>Answer.<|return|> junk"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + expect(direct.reasoning).toBe(""); + + const result = collapseOpenAISSE(openAIHarmonyBody([raw])); + expect(result.content).toBe(raw); + expect(result.harmonyUnparsed).toBe(true); + expect(result.droppedChunks ?? 0).toBe(0); + expect(result.truncated).toBeUndefined(); + }); + + it("analysis<|end|> followed by trailing junk fails safe (no token leak)", () => { + const raw = "<|channel|>analysis<|message|>thinking<|end|>junk"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony fail-safe — unterminated NON-final body at EOF (B-A3)", () => { + // The grammar says "EOF terminates the FINAL message only." An unterminated + // analysis (reasoning) body at EOF is a grammar deviation — analysis bodies + // are terminator-expecting (<|end|>). The OLD code accepted it (failed:false) + // and surfaced dangling reasoning. Correct behavior: fail-safe verbatim. + it("unterminated analysis body at EOF fails safe", () => { + const raw = "<|channel|>analysis<|message|>dangling reasoning"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.reasoning).toBe(""); + expect(direct.toolCalls).toEqual([]); + }); + + // A final body at EOF (no terminator) is still legitimately accepted — EOF + // terminates the final message. This guards against over-failing B-A3. + it("unterminated FINAL body at EOF is still accepted (EOF terminates final)", () => { + const raw = "<|channel|>final<|message|>the answer with no terminator"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.content).toBe("the answer with no terminator"); + expect(direct.reasoning).toBe(""); + }); + + // A legitimate analysis-followed-by-final stream where the analysis body IS + // terminated (by <|end|>) and only the final trails to EOF must still parse — + // the analysis terminator is present, so B-A3 must not fire on it. + it("analysis<|end|> + final-to-EOF still parses (analysis is terminated)", () => { + const raw = + "<|channel|>analysis<|message|>reasoning here<|end|><|start|>assistant<|channel|>final<|message|>final answer"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.reasoning).toBe("reasoning here"); + expect(direct.content).toBe("final answer"); + }); +}); + +describe("harmony fail-safe — commentary tool body vs message boundary (B-A2)", () => { + // (a) A tool arg that is a CLOSED JSON string which legitimately CONTAINS + // literal harmony tokens (<|start|>...<|message|>...) is valid data: the + // correct parse is ONE tool call whose argument is that string. This is the + // SAME mechanism that preserves embedded <|call|>/<|channel|> substrings in + // JSON args (matrix 13/14). PIN this as correct — guards against a wrong fix. + it("(a) closed JSON arg containing literal harmony tokens -> one tool call", () => { + const arg = JSON.stringify({ + instruction: + "emit <|start|>assistant<|channel|>commentary to=functions.x<|message|>nested<|call|>", + }); + const raw = `<|start|>assistant<|channel|>commentary to=functions.outer<|constrain|>json<|message|>${arg}<|call|>`; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(false); + expect(direct.toolCalls).toHaveLength(1); + expect(direct.toolCalls[0].name).toBe("outer"); + expect(direct.toolCalls[0].arguments).toBe(arg); + expect(() => JSON.parse(direct.toolCalls[0].arguments)).not.toThrow(); + }); + + // (b) An UNTERMINATED tool call: a valid-JSON tool body followed by a real + // next message but with NO closing <|call|> for the first tool call. This + // must NOT silently merge/drop — it must fail safe verbatim. + it("(b) tool body with no <|call|> before a real next message fails safe", () => { + const raw = + '<|start|>assistant<|channel|>commentary to=functions.first<|constrain|>json<|message|>{"a":1}' + + "<|start|>assistant<|channel|>final<|message|>answer<|return|>"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.toolCalls).toEqual([]); + }); +}); + +describe("harmony fail-safe — quoted whole-message ambiguity (known limitation)", () => { + // A body that QUOTES a complete well-formed harmony message is structurally + // indistinguishable from two real messages in detokenized TEXT. When the + // resulting split yields cleanly well-formed messages, the parser accepts it + // (the quoted tokens are stripped) — this is the irreducible, documented + // KNOWN LIMITATION. We PIN the acknowledged-imperfect behavior here so it is + // a conscious choice, not a silent regression target. + it("quoting a clean complete message splits into well-formed messages (documented)", () => { + const raw = + "<|channel|>final<|message|>To emit write " + + "<|start|>assistant<|channel|>final<|message|>hello<|return|>"; + const direct = parseHarmonyContent(raw); + // Acknowledged-imperfect: parsed as two final messages; quoted tokens gone. + expect(direct.failed).toBe(false); + expect(direct.content).toBe("To emit write hello"); + // Whatever the outcome, no control token ever leaks into the output. + expect(direct.content).not.toContain("<|"); + expect(direct.reasoning).not.toContain("<|"); + }); + + // The fail-safe edge of the same ambiguity: when the quoted message is + // followed by trailing junk, the split would yield a MALFORMED message, so + // the WHOLE input fails safe verbatim rather than emitting a mangled middle. + // This guarantees the behavior is always verbatim-or-clean, never mangled. + it("quoting a message followed by trailing junk fails safe verbatim (no mangle)", () => { + const raw = + "<|channel|>final<|message|>To emit write " + + "<|start|>assistant<|channel|>final<|message|>hello<|return|> and then stop"; + const direct = parseHarmonyContent(raw); + expect(direct.failed).toBe(true); + expect(direct.content).toBe(raw); + expect(direct.content).not.toBe("To emit write hello<|return|> and then stop"); + }); +}); diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts index 0cd1dcb..5ed64af 100644 --- a/src/stream-collapse.ts +++ b/src/stream-collapse.ts @@ -10,6 +10,7 @@ import { crc32 } from "node:zlib"; import type { RecordProviderKey, ToolCall } from "./types.js"; import type { Logger } from "./logger.js"; +import { isHarmonyContent, parseHarmonyContent } from "./harmony.js"; // --------------------------------------------------------------------------- // Result type shared by all collapse functions @@ -25,6 +26,79 @@ export interface CollapseResult { truncated?: boolean; audioB64?: string; audioMimeType?: string; + /** + * Set when harmony channel tokens were present in the accumulated content but + * could NOT be parsed into a complete, valid harmony structure. The content + * is preserved VERBATIM, so this is NOT transport loss — it is distinct from + * `droppedChunks` / `truncated`, which are reserved for genuine transport loss + * (malformed SSE/NDJSON frames, CRC mismatch). The caller surfaces this as a + * dedicated warning rather than a dropped/truncated-chunk warning. + */ + harmonyUnparsed?: true; + /** Short human-readable note accompanying {@link harmonyUnparsed}. */ + harmonyNote?: string; +} + +/** + * Slice the first `max` UTF-16 code units of `s` for a diagnostic sample, + * trimming a trailing lone high-surrogate so the resulting sample never ends on + * a lone high surrogate (i.e. never mid-surrogate-pair). + */ +function surrogateSafeSlice(s: string, max: number): string { + let out = s.slice(0, max); + if (out.length > 0) { + const last = out.charCodeAt(out.length - 1); + // A high surrogate (U+D800..U+DBFF) at the end is the lead of a split pair. + if (last >= 0xd800 && last <= 0xdbff) { + out = out.slice(0, -1); + } + } + return out; +} + +/** + * Split a raw SSE body into per-event blocks. + * + * Events are delimited by a blank line. Real HTTP/SSE transports use CRLF + * (`\r\n`) line endings, so the inter-event delimiter is `\r\n\r\n` (which + * contains no `\n\n` substring) and each line ends with a trailing `\r`. + * Splitting on `/\r?\n\r?\n/` handles LF, CRLF, and mixed streams; per-line + * `\r` trimming happens in {@link splitSSELines}. Blank blocks are dropped. + */ +function splitSSEEvents(body: string): string[] { + return body.split(/\r?\n\r?\n/).filter((block) => block.trim().length > 0); +} + +/** + * Split a single SSE event block into its lines, trimming a trailing `\r` so + * CRLF streams parse identically to LF streams. + */ +function splitSSELines(block: string): string[] { + return block.split("\n").map((line) => (line.endsWith("\r") ? line.slice(0, -1) : line)); +} + +/** + * Extract the SSE `data` field from a single event block's lines. + * + * Per the SSE spec a single event may carry MULTIPLE `data:` lines; the field + * value is every data line's content joined with "\n". Collecting only the + * first `data:` line (e.g. via `.find`) corrupts payloads that a server split + * across lines. Callers MUST pass lines produced by {@link splitSSELines} so + * any trailing `\r` is already stripped. Returns the joined payload (with the + * leading "data:" prefix and one optional leading space stripped per line), or + * `undefined` when the block contains no `data:` line. + */ +function extractSSEData(lines: string[]): string | undefined { + const dataParts: string[] = []; + for (const line of lines) { + if (!line.startsWith("data:")) continue; + // Strip "data:" then a single optional leading space, per the SSE spec. + let part = line.slice(5); + if (part.startsWith(" ")) part = part.slice(1); + dataParts.push(part); + } + if (dataParts.length === 0) return undefined; + return dataParts.join("\n"); } // --------------------------------------------------------------------------- @@ -39,19 +113,30 @@ export interface CollapseResult { * data: [DONE]\n\n */ export function collapseOpenAISSE(body: string): CollapseResult { - const lines = body.split("\n\n").filter((l) => l.trim().length > 0); + const lines = splitSSEEvents(body); let content = ""; let reasoning = ""; const webSearchQueries: string[] = []; let droppedChunks = 0; let firstDroppedSample: string | undefined; + let harmonyUnparsed = false; + let harmonyNote: string | undefined; const toolCallMap = new Map(); + // Fallback keying for deltas that OMIT `index`. Without this, every + // index-less delta collapses under one `undefined`/NaN key, merging distinct + // tool calls and corrupting arguments. Index-less fragments that share an + // `id` correlate via `idKeyMap`; otherwise each gets a fresh synthetic key + // assigned from a counter kept above any real index so sort order is stable. + // The 1_000_000 sentinel assumes real provider tool-call indices stay below + // it (they are small per-stream counters), so synthetic keys never collide. + let nextSyntheticIndex = 1_000_000; + const idKeyMap = new Map(); for (const line of lines) { - const dataLine = line.split("\n").find((l) => l.startsWith("data:")); - if (!dataLine) continue; + const data = extractSSEData(splitSSELines(line)); + if (data === undefined) continue; - const payload = dataLine.slice(5).trim(); + const payload = data.trim(); if (payload === "[DONE]") continue; let parsed: Record; @@ -61,7 +146,7 @@ export function collapseOpenAISSE(body: string): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`; } continue; } @@ -118,12 +203,30 @@ export function collapseOpenAISSE(body: string): CollapseResult { const toolCalls = delta.tool_calls as Array> | undefined; if (toolCalls) { for (const tc of toolCalls) { - const index = tc.index as number; const fn = tc.function as Record | undefined; + const rawId = typeof tc.id === "string" ? tc.id : undefined; + + // Resolve a stable map key. Prefer the streamed `index`; when it is + // absent, correlate by `id` if present, else mint a fresh synthetic + // key so distinct index-less calls never merge. + let index: number; + if (typeof tc.index === "number") { + index = tc.index; + } else if (rawId !== undefined) { + const existing = idKeyMap.get(rawId); + if (existing !== undefined) { + index = existing; + } else { + index = nextSyntheticIndex++; + idKeyMap.set(rawId, index); + } + } else { + index = nextSyntheticIndex++; + } if (!toolCallMap.has(index)) { toolCallMap.set(index, { - id: (tc.id as string) ?? "", + id: rawId ?? "", name: (fn?.name as string) ?? "", arguments: "", }); @@ -143,17 +246,54 @@ export function collapseOpenAISSE(body: string): CollapseResult { } } - if (toolCallMap.size > 0) { + // Open-weight gpt-oss models (Ollama / vLLM / OpenRouter) stream tool calls + // as raw harmony channel tokens inside delta.content rather than structured + // delta.tool_calls. Harmony parsing is FALLBACK-ONLY: attempt it ONLY when + // there are NO structured delta.tool_calls. If structured tool calls exist, + // any harmony-looking content is prose — never merged (no phantom tool call), + // never stamped as truncated/dropped. When harmony IS the only source, a + // successful parse routes channels (content/reasoning/toolCalls); a failure + // preserves content VERBATIM and surfaces the distinct `harmonyUnparsed` + // signal (NOT droppedChunks/truncated — the bytes are not lost). + const harmonyToolCalls: ToolCall[] = []; + if (toolCallMap.size === 0 && isHarmonyContent(content)) { + const parsed = parseHarmonyContent(content); + if (parsed.failed) { + harmonyUnparsed = true; + harmonyNote = `harmony tokens present but unparseable; content preserved verbatim: ${surrogateSafeSlice(content, 200)}`; + } else { + content = parsed.content; + if (parsed.reasoning) { + reasoning += parsed.reasoning; + } + harmonyToolCalls.push(...parsed.toolCalls); + } + } + + if (toolCallMap.size > 0 || harmonyToolCalls.length > 0) { const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); return { ...(content ? { content } : {}), - toolCalls: sorted.map(([, tc]) => ({ - name: tc.name, - arguments: tc.arguments, - ...(tc.id ? { id: tc.id } : {}), - })), + // Fallback-only: harmonyToolCalls are populated ONLY in the + // no-structured-calls branch, so this is never a merge of both sources. + toolCalls: [ + ...sorted.map(([, tc]) => ({ + name: tc.name, + arguments: tc.arguments, + ...(tc.id ? { id: tc.id } : {}), + })), + ...harmonyToolCalls, + ], + // Reasoning is preserved alongside tool calls for ALL structured streams + // (DeepSeek/OpenRouter reasoning_content, harmony analysis channel), at + // parity with every other collapser and the non-streaming path. + ...(reasoning ? { reasoning } : {}), + // webSearches parity with the text-only return branch. + ...(webSearchQueries.length > 0 ? { webSearches: webSearchQueries } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), + ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}), + ...(harmonyNote ? { harmonyNote } : {}), }; } @@ -163,6 +303,8 @@ export function collapseOpenAISSE(body: string): CollapseResult { ...(webSearchQueries.length > 0 ? { webSearches: webSearchQueries } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), + ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}), + ...(harmonyNote ? { harmonyNote } : {}), }; } @@ -178,21 +320,32 @@ export function collapseOpenAISSE(body: string): CollapseResult { * event: content_block_delta\ndata: {"delta":{"type":"text_delta","text":"Hello"}}\n\n */ export function collapseAnthropicSSE(body: string): CollapseResult { - const blocks = body.split("\n\n").filter((b) => b.trim().length > 0); + const blocks = splitSSEEvents(body); let content = ""; let reasoning = ""; let droppedChunks = 0; let firstDroppedSample: string | undefined; const toolCallMap = new Map(); + // Fallback keying for content blocks that OMIT `index` (mirrors the OpenAI / + // Cohere / Bedrock guards). Without it, every index-less block collapses + // under one `undefined` key, merging distinct tool_use blocks. Index-less + // starts mint a fresh synthetic key (kept above any real index so sort order + // is stable). Despite its name, `lastSyntheticIndex` tracks whichever + // tool_use start most recently opened REGARDLESS of whether its index was + // real or synthetic (it is set on EVERY start), so an index-less delta + // correlates to the most-recent start — not just to the last synthetic one. + // The 1_000_000 sentinel assumes real provider indices stay below it. + let nextSyntheticIndex = 1_000_000; + let lastSyntheticIndex: number | undefined; for (const block of blocks) { - const lines = block.split("\n"); + const lines = splitSSELines(block); const eventLine = lines.find((l) => l.startsWith("event:")); - const dataLine = lines.find((l) => l.startsWith("data:")); - if (!dataLine) continue; + const data = extractSSEData(lines); + if (data === undefined) continue; const eventType = eventLine ? eventLine.slice(6).trim() : ""; - const payload = dataLine.slice(5).trim(); + const payload = data.trim(); let parsed: Record; try { @@ -201,15 +354,23 @@ export function collapseAnthropicSSE(body: string): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`; } continue; } if (eventType === "content_block_start") { - const index = parsed.index as number; const contentBlock = parsed.content_block as Record | undefined; if (contentBlock?.type === "tool_use") { + // Prefer the streamed `index`; when absent, mint a fresh synthetic key + // so distinct index-less tool_use blocks never merge. + let index: number; + if (typeof parsed.index === "number") { + index = parsed.index; + } else { + index = nextSyntheticIndex++; + } + lastSyntheticIndex = index; toolCallMap.set(index, { id: (contentBlock.id as string) ?? "", name: (contentBlock.name as string) ?? "", @@ -219,7 +380,6 @@ export function collapseAnthropicSSE(body: string): CollapseResult { } if (eventType === "content_block_delta") { - const index = parsed.index as number; const delta = parsed.delta as Record | undefined; if (!delta) continue; @@ -232,9 +392,24 @@ export function collapseAnthropicSSE(body: string): CollapseResult { } if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") { - const entry = toolCallMap.get(index); + // Use the streamed `index` when present; otherwise correlate to the + // most recent tool_use start (mirrors the start-side fallback). + const index = typeof parsed.index === "number" ? parsed.index : lastSyntheticIndex; + // A delta that cannot correlate to any known start (no streamed index + // AND no prior start, or a stale index with no entry) would otherwise + // silently lose its args. Account for it as a dropped chunk instead of + // vanishing (mirrors the Cohere uncorrelated-delta path). + const entry = index !== undefined ? toolCallMap.get(index) : undefined; if (entry) { entry.arguments += delta.partial_json; + } else { + droppedChunks++; + if (droppedChunks === 1) { + firstDroppedSample = `input_json_delta with no correlating tool_use start: ${surrogateSafeSlice( + payload, + 200, + )}`; + } } } } @@ -274,7 +449,7 @@ export function collapseAnthropicSSE(body: string): CollapseResult { * data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}\n\n */ export function collapseGeminiSSE(body: string): CollapseResult { - const lines = body.split("\n\n").filter((l) => l.trim().length > 0); + const lines = splitSSEEvents(body); let content = ""; let reasoning = ""; let droppedChunks = 0; @@ -284,10 +459,10 @@ export function collapseGeminiSSE(body: string): CollapseResult { const toolCalls: ToolCall[] = []; for (const line of lines) { - const dataLine = line.split("\n").find((l) => l.startsWith("data:")); - if (!dataLine) continue; + const data = extractSSEData(splitSSELines(line)); + if (data === undefined) continue; - const payload = dataLine.slice(5).trim(); + const payload = data.trim(); let parsed: Record; try { @@ -296,7 +471,7 @@ export function collapseGeminiSSE(body: string): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`; } continue; } @@ -315,7 +490,12 @@ export function collapseGeminiSSE(body: string): CollapseResult { const fc = part.functionCall as Record; toolCalls.push({ name: String(fc.name ?? ""), - arguments: typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args), + // Default undefined/object args to a JSON object string (matches + // collapseGeminiInteractionsSSE / Ollama). JSON.stringify(undefined) + // would otherwise yield the VALUE undefined, violating the + // ToolCall.arguments:string contract. + arguments: + typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args ?? {}), }); } else if ( part.inlineData && @@ -340,9 +520,15 @@ export function collapseGeminiSSE(body: string): CollapseResult { } if (audioB64) { + // Preserve any content / reasoning / tool calls accumulated in the same + // stream — a Gemini turn can interleave audio with text and functionCall + // parts, and the early return must not silently drop them. return { audioB64, audioMimeType, + ...(content ? { content } : {}), + ...(reasoning ? { reasoning } : {}), + ...(toolCalls.length > 0 ? { toolCalls } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), }; @@ -378,12 +564,20 @@ export function collapseGeminiSSE(body: string): CollapseResult { * * /api/generate format: * {"model":"llama3","response":"Hello","done":false}\n + * + * Open-weight gpt-oss served via Ollama streams harmony channel tokens inside + * `message.content` (just like the OpenAI SSE path), so after accumulation the + * content is run through the same fail-safe {@link parseHarmonyContent} gate to + * capture structured tool calls / reasoning instead of leaking raw tokens. */ export function collapseOllamaNDJSON(body: string): CollapseResult { const lines = body.split("\n").filter((l) => l.trim().length > 0); let content = ""; + let reasoning = ""; let droppedChunks = 0; let firstDroppedSample: string | undefined; + let harmonyUnparsed = false; + let harmonyNote: string | undefined; const toolCalls: ToolCall[] = []; for (const line of lines) { @@ -394,7 +588,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${line.trim().slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(line.trim(), 200)}`; } continue; } @@ -413,8 +607,13 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { if (fn) { toolCalls.push({ name: String(fn.name ?? ""), + // Default undefined/object args to a JSON object (matching + // collapseGeminiInteractionsSSE) — JSON.stringify(undefined) + // would otherwise yield the literal string "undefined". arguments: - typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments), + typeof fn.arguments === "string" + ? fn.arguments + : JSON.stringify(fn.arguments ?? {}), }); } } @@ -427,19 +626,46 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { } } + // Open-weight gpt-oss served via Ollama streams harmony channel tokens inside + // message.content (same as the OpenAI SSE path). Harmony parsing is + // FALLBACK-ONLY: attempt it ONLY when there are NO structured message + // tool_calls. If structured tool calls exist, harmony-looking content is + // prose — never merged (no phantom), never stamped truncated/dropped. On a + // harmony failure the content is preserved VERBATIM and surfaced via the + // distinct `harmonyUnparsed` signal (NOT droppedChunks/truncated). + if (toolCalls.length === 0 && isHarmonyContent(content)) { + const parsedHarmony = parseHarmonyContent(content); + if (parsedHarmony.failed) { + harmonyUnparsed = true; + harmonyNote = `harmony tokens present but unparseable; content preserved verbatim: ${surrogateSafeSlice(content, 200)}`; + } else { + content = parsedHarmony.content; + if (parsedHarmony.reasoning) { + reasoning += parsedHarmony.reasoning; + } + toolCalls.push(...parsedHarmony.toolCalls); + } + } + if (toolCalls.length > 0) { return { ...(content ? { content } : {}), toolCalls, + ...(reasoning ? { reasoning } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), + ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}), + ...(harmonyNote ? { harmonyNote } : {}), }; } return { content, + ...(reasoning ? { reasoning } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), + ...(harmonyUnparsed ? { harmonyUnparsed: true } : {}), + ...(harmonyNote ? { harmonyNote } : {}), }; } @@ -454,20 +680,30 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { * event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n */ export function collapseCohereSSE(body: string): CollapseResult { - const blocks = body.split("\n\n").filter((b) => b.trim().length > 0); + const blocks = splitSSEEvents(body); let content = ""; let droppedChunks = 0; let firstDroppedSample: string | undefined; const toolCallMap = new Map(); + // Fallback keying for tool-call events that OMIT `index` (mirrors the + // OpenAI guard). Without it, every index-less tool-call-start collapses + // under one `undefined`/NaN key, merging distinct calls. Index-less starts + // mint a fresh synthetic key. `lastStartKey` tracks the most-recent + // tool-call-start key REGARDLESS of whether it was real or synthetic, so an + // index-less tool-call-delta correlates to whichever start most recently + // opened — not just to the last synthetic one. The 1_000_000 sentinel + // assumes real provider indices stay below it. + let nextSyntheticIndex = 1_000_000; + let lastStartKey: number | undefined; for (const block of blocks) { - const lines = block.split("\n"); + const lines = splitSSELines(block); const eventLine = lines.find((l) => l.startsWith("event:")); - const dataLine = lines.find((l) => l.startsWith("data:")); - if (!dataLine) continue; + const data = extractSSEData(lines); + if (data === undefined) continue; const eventType = eventLine ? eventLine.slice(6).trim() : ""; - const payload = dataLine.slice(5).trim(); + const payload = data.trim(); let parsed: Record; try { @@ -476,7 +712,7 @@ export function collapseCohereSSE(body: string): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`; } continue; } @@ -491,7 +727,15 @@ export function collapseCohereSSE(body: string): CollapseResult { } if (eventType === "tool-call-start") { - const index = parsed.index as number; + let index: number; + if (typeof parsed.index === "number") { + index = parsed.index; + } else { + index = nextSyntheticIndex++; + } + // Track the most-recent start key (real OR synthetic) so a following + // index-less delta correlates to whichever call just opened. + lastStartKey = index; const delta = parsed.delta as Record | undefined; const message = delta?.message as Record | undefined; const toolCalls = message?.tool_calls as Record | undefined; @@ -506,16 +750,29 @@ export function collapseCohereSSE(body: string): CollapseResult { } if (eventType === "tool-call-delta") { - const index = parsed.index as number; + // Use the streamed `index` when present; otherwise correlate to the most + // recent tool-call-start (real or synthetic key). + const index = typeof parsed.index === "number" ? parsed.index : lastStartKey; const delta = parsed.delta as Record | undefined; const message = delta?.message as Record | undefined; const toolCalls = message?.tool_calls as Record | undefined; if (toolCalls) { const fn = toolCalls.function as Record | undefined; if (fn && typeof fn.arguments === "string") { - const entry = toolCallMap.get(index); + // A delta that cannot correlate to any known start (no streamed + // index AND no prior start) would otherwise silently lose its args. + // Account for it as a dropped chunk instead of vanishing. + const entry = index !== undefined ? toolCallMap.get(index) : undefined; if (entry) { entry.arguments += fn.arguments; + } else { + droppedChunks++; + if (droppedChunks === 1) { + firstDroppedSample = `tool-call-delta with no correlating start: ${surrogateSafeSlice( + payload, + 200, + )}`; + } } } } @@ -586,26 +843,55 @@ function decodeEventStreamFrames(buf: Buffer): { // Parse headers const headersStart = offset + 12; const headersEnd = headersStart + headersLength; + const payloadEnd = offset + totalLength - 4; // minus message CRC + + // Validate the headers region fits inside the frame. A frame can carry a + // valid prelude CRC yet declare a `headersLength` that overruns the payload + // region (the prelude CRC only covers total/headers length, not the body). + // Without this guard a per-header read walks off the buffer and throws an + // uncaught RangeError; treat it as truncation instead. + if (headersEnd > payloadEnd || headersEnd > buf.length) { + return { frames, truncated: true }; + } + const headers: Record = {}; let hOffset = headersStart; + let headerOverrun = false; while (hOffset < headersEnd) { + // Each read must stay within the declared headers region. Bail out + // (truncated) on any overrun rather than reading past the boundary. + if (hOffset + 1 > headersEnd) { + headerOverrun = true; + break; + } const nameLen = buf.readUInt8(hOffset); hOffset += 1; + if (hOffset + nameLen + 1 + 2 > headersEnd) { + headerOverrun = true; + break; + } const name = buf.subarray(hOffset, hOffset + nameLen).toString("utf8"); hOffset += nameLen; // Skip header type byte (type 7 = STRING) hOffset += 1; const valueLen = buf.readUInt16BE(hOffset); hOffset += 2; + if (hOffset + valueLen > headersEnd) { + headerOverrun = true; + break; + } const value = buf.subarray(hOffset, hOffset + valueLen).toString("utf8"); hOffset += valueLen; headers[name] = value; } + if (headerOverrun) { + return { frames, truncated: true }; + } + // Extract payload const payloadStart = headersEnd; - const payloadEnd = offset + totalLength - 4; // minus message CRC const payload = buf.subarray(payloadStart, payloadEnd); // Validate message CRC (covers entire frame minus last 4 bytes) @@ -644,7 +930,7 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${frameStr.slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(frameStr, 200)}`; } continue; } @@ -657,9 +943,20 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult { } if (delta?.type === "input_json_delta" && typeof delta.partial_json === "string") { const index = parsed.index as number | undefined; - if (index !== undefined) { - const entry = toolCallMap.get(index); - if (entry) entry.arguments += delta.partial_json; + // An arg delta that cannot correlate to a known tool_use start would + // otherwise silently lose its args. Account for it as a dropped chunk + // instead of vanishing (mirrors the Cohere uncorrelated-delta path). + const entry = index !== undefined ? toolCallMap.get(index) : undefined; + if (entry) { + entry.arguments += delta.partial_json; + } else { + droppedChunks++; + if (droppedChunks === 1) { + firstDroppedSample = `input_json_delta with no correlating tool_use start: ${surrogateSafeSlice( + frameStr, + 200, + )}`; + } } } continue; @@ -712,10 +1009,21 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult { // Tool use input JSON delta if (typeof delta.toolUse === "object" && delta.toolUse !== null) { const toolUseDelta = delta.toolUse as Record; - if (typeof toolUseDelta.input === "string" && index !== undefined) { - const entry = toolCallMap.get(index); + if (typeof toolUseDelta.input === "string") { + // An arg delta that cannot correlate to a known tool_use start would + // otherwise silently lose its args. Account for it as a dropped chunk + // instead of vanishing (mirrors the Cohere uncorrelated-delta path). + const entry = index !== undefined ? toolCallMap.get(index) : undefined; if (entry) { entry.arguments += toolUseDelta.input; + } else { + droppedChunks++; + if (droppedChunks === 1) { + firstDroppedSample = `toolUse.input delta with no correlating tool_use start: ${surrogateSafeSlice( + frameStr, + 200, + )}`; + } } } } @@ -756,7 +1064,7 @@ export function collapseBedrockEventStream(body: Buffer): CollapseResult { * data: {"event_type":"interaction.complete","interaction":{"id":"...","usage":{...}}}\n\n */ export function collapseGeminiInteractionsSSE(body: string): CollapseResult { - const lines = body.split("\n\n").filter((l) => l.trim().length > 0); + const lines = splitSSEEvents(body); let content = ""; let reasoning = ""; let droppedChunks = 0; @@ -764,10 +1072,10 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult { const toolCalls: ToolCall[] = []; for (const line of lines) { - const dataLine = line.split("\n").find((l) => l.startsWith("data:")); - if (!dataLine) continue; + const data = extractSSEData(splitSSELines(line)); + if (data === undefined) continue; - const payload = dataLine.slice(5).trim(); + const payload = data.trim(); let parsed: Record; try { @@ -776,7 +1084,7 @@ export function collapseGeminiInteractionsSSE(body: string): CollapseResult { droppedChunks++; if (droppedChunks === 1) { const msg = err instanceof Error ? err.message : "unknown"; - firstDroppedSample = `parse failed (${msg}): ${payload.slice(0, 200)}`; + firstDroppedSample = `parse failed (${msg}): ${surrogateSafeSlice(payload, 200)}`; } continue; } From b3a1d803ce3657415c2b81fb865de0cd032297dc Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 2 Jun 2026 13:27:56 -0700 Subject: [PATCH 3/5] fix: decode multibyte UTF-8 incrementally, harden recorder frame timing, and record audio companions --- .../recorder-multibyte-stream.test.ts | 175 ++++++ src/__tests__/recorder.test.ts | 592 +++++++++++++++++- src/recorder.ts | 98 ++- src/types.ts | 14 + 4 files changed, 867 insertions(+), 12 deletions(-) create mode 100644 src/__tests__/recorder-multibyte-stream.test.ts diff --git a/src/__tests__/recorder-multibyte-stream.test.ts b/src/__tests__/recorder-multibyte-stream.test.ts new file mode 100644 index 0000000..3455ab4 --- /dev/null +++ b/src/__tests__/recorder-multibyte-stream.test.ts @@ -0,0 +1,175 @@ +import { describe, it, expect } from "vitest"; +import { StreamingFrameDecoder } from "../recorder.js"; +import { collapseOpenAISSE } from "../stream-collapse.js"; + +// --------------------------------------------------------------------------- +// Multibyte UTF-8 streaming decode — regression for fixture garbling. +// +// When aimock proxies a streamed upstream LLM response, makeUpstreamRequest +// decodes each TCP chunk to text so it can split the byte stream into SSE / +// NDJSON frames. A multibyte UTF-8 character (CJK, emoji, ...) can have its +// bytes split across a TCP chunk boundary. Decoding each chunk independently +// with Buffer#toString() turns the partial sequence into U+FFFD replacement +// characters, corrupting the decoded frame text (a user reported garbage like +// "官网群" in a recorded fixture). +// +// There are TWO decode paths and this file covers BOTH: +// 1. The frame-TIMING path (StreamingFrameDecoder) — used when capturing +// per-frame arrival timestamps; it decodes incrementally as TCP chunks +// arrive, so it must buffer partial multibyte sequences across chunk +// boundaries itself. The first describe-block drives that decoder +// directly (the exact code makeUpstreamRequest uses), not a reimpl. +// 2. The recorded-BODY path — the full upstream byte stream is buffered via +// Buffer.concat and decoded ONCE with rawBuffer.toString() before being +// handed to the collapse functions. The final describe-block drives that +// path end-to-end (split-chunk bytes -> concat -> toString -> collapse) +// to pin that the body a fixture is built from is also U+FFFD-free. +// --------------------------------------------------------------------------- + +/** + * Split a UTF-8 buffer at an arbitrary byte offset, guaranteeing the cut lands + * inside a multibyte sequence (so naive per-chunk decode would mangle it). + */ +function splitMidCharacter(text: string): { first: Buffer; second: Buffer } { + const full = Buffer.from(text, "utf8"); + // Locate the first non-ASCII (multibyte lead) byte and cut one byte past it, + // so the split lands inside the multibyte sequence. + let i = 0; + while (i < full.length && full[i] < 0x80) i++; + // Guard against ASCII-only misuse: without a multibyte lead byte the cut + // would not straddle a character and the test would be a degenerate no-op. + if (i >= full.length) { + throw new Error("splitMidCharacter: no multibyte lead byte found (ASCII-only input)"); + } + const cut = i + 1; // one byte into the multibyte sequence + return { first: full.subarray(0, cut), second: full.subarray(cut) }; +} + +describe("StreamingFrameDecoder", () => { + it("reassembles a CJK character split across two chunks without U+FFFD", () => { + const original = 'data: {"delta":"官网群"}\n\n'; + const { first, second } = splitMidCharacter(original); + + // Sanity: the split really does straddle a multibyte boundary, so a naive + // per-chunk decode would corrupt it. This pins WHY the test is meaningful. + expect(first.toString() + second.toString()).toContain("�"); + + const decoder = new StreamingFrameDecoder(); + let out = ""; + out += decoder.write(first); + out += decoder.write(second); + out += decoder.end(); + + expect(out).toBe(original); + expect(out).not.toContain("�"); + }); + + it("reassembles a 4-byte emoji split across two chunks without U+FFFD", () => { + const original = "data: 🎉🎉\n\n"; + const { first, second } = splitMidCharacter(original); + + expect(first.toString() + second.toString()).toContain("�"); + + const decoder = new StreamingFrameDecoder(); + let out = ""; + out += decoder.write(first); + out += decoder.write(second); + out += decoder.end(); + + expect(out).toBe(original); + expect(out).not.toContain("�"); + }); + + it("handles a multibyte character split byte-by-byte across many chunks", () => { + const original = "官"; // 3 bytes: E5 AE 98 + const full = Buffer.from(original, "utf8"); + const decoder = new StreamingFrameDecoder(); + let out = ""; + for (const byte of full) { + out += decoder.write(Buffer.from([byte])); + } + out += decoder.end(); + expect(out).toBe(original); + expect(out).not.toContain("�"); + }); + + it("passes ASCII-only frames through unchanged", () => { + const decoder = new StreamingFrameDecoder(); + let out = ""; + out += decoder.write(Buffer.from("data: hello\n\n", "utf8")); + out += decoder.end(); + expect(out).toBe("data: hello\n\n"); + }); +}); + +// --------------------------------------------------------------------------- +// Recorded-BODY multibyte path — regression for fixture garbling on the +// collapse path (NOT the frame-timing decoder above). +// +// On the non-timing capture path, makeUpstreamRequest accumulates the raw +// upstream bytes and decodes the COMPLETE buffer once (Buffer.concat then +// rawBuffer.toString()) before handing the text to a collapse function. Even +// when a multibyte UTF-8 character is split across TCP chunk boundaries, the +// concat-then-decode order means the body the fixture is built from must be +// U+FFFD-free. This test drives that exact order through collapseOpenAISSE. +// --------------------------------------------------------------------------- + +describe("recorded-body multibyte decode (Buffer.concat -> toString -> collapse)", () => { + /** + * Build an OpenAI SSE body containing the given content, then return its raw + * UTF-8 bytes split into two chunks at an offset that lands INSIDE a + * multibyte sequence (so a naive per-chunk decode would mangle it). + */ + function sseBytesSplitMidCharacter(content: string): { first: Buffer; second: Buffer } { + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-mb", choices: [{ delta: { content } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + const full = Buffer.from(body, "utf8"); + // Cut one byte into the first multibyte (non-ASCII) sequence. + let i = 0; + while (i < full.length && full[i] < 0x80) i++; + // Guard against ASCII-only misuse: without a multibyte lead byte the cut + // would not straddle a character and the test would be a degenerate no-op. + if (i >= full.length) { + throw new Error( + "sseBytesSplitMidCharacter: no multibyte lead byte found (ASCII-only content)", + ); + } + const cut = i + 1; + return { first: full.subarray(0, cut), second: full.subarray(cut) }; + } + + it("decodes a CJK body split across chunks with no U+FFFD via the collapse path", () => { + const content = "官网群 says hello 🎉"; + const { first, second } = sseBytesSplitMidCharacter(content); + + // Sanity: decoding the chunks INDEPENDENTLY would corrupt the text — this + // pins that the split really straddles a multibyte boundary (so the test + // is meaningful and not trivially green). + expect(first.toString() + second.toString()).toContain("�"); + + // The recorder's actual order: buffer all bytes, decode once, then collapse. + const rawBuffer = Buffer.concat([first, second]); + const decoded = rawBuffer.toString("utf8"); + const result = collapseOpenAISSE(decoded); + + expect(result.content).toBe(content); + expect(result.content).not.toContain("�"); + }); + + it("decodes an emoji-only body split across chunks with no U+FFFD via the collapse path", () => { + const content = "🎉🎉🎉"; + const { first, second } = sseBytesSplitMidCharacter(content); + + expect(first.toString() + second.toString()).toContain("�"); + + const rawBuffer = Buffer.concat([first, second]); + const result = collapseOpenAISSE(rawBuffer.toString("utf8")); + + expect(result.content).toBe(content); + expect(result.content).not.toContain("�"); + }); +}); diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts index 25ae6e6..4d777cd 100644 --- a/src/__tests__/recorder.test.ts +++ b/src/__tests__/recorder.test.ts @@ -5006,9 +5006,8 @@ describe("multi-call fixture disambiguation (issue #185)", () => { // The haiku fixture has systemHash metadata (from whichever call won) const haikuFixture = fixtures.find((f) => f.match.model === "claude-3-5-haiku")!; expect(haikuFixture).toBeDefined(); - expect((haikuFixture as Record).metadata).toBeDefined(); - const meta = (haikuFixture as Record).metadata as Record; - expect(meta.systemHash).toMatch(/^[a-f0-9]{8}$/); + expect(haikuFixture.metadata).toBeDefined(); + expect(haikuFixture.metadata!.systemHash).toMatch(/^[a-f0-9]{8}$/); // Cleanup await new Promise((resolve) => recorderServer.server.close(() => resolve())); @@ -5166,3 +5165,590 @@ describe("fixture metadata recording", () => { expect(hash1).not.toBe(hash2); }); }); + +// --------------------------------------------------------------------------- +// webSearches propagation into the persisted fixture +// +// Drives a raw upstream that emits exactly the OpenAI Responses-API SSE shape +// `collapseOpenAISSE` recognizes (a completed web_search_call), then exercises +// `proxyAndRecord` end-to-end and asserts that the collapsed `webSearches` land +// in the persisted fixture. +// --------------------------------------------------------------------------- + +describe("recorder webSearches propagation", () => { + let rawServer: http.Server | undefined; + + afterEach(async () => { + if (rawServer) { + await new Promise((resolve) => rawServer!.close(() => resolve())); + rawServer = undefined; + } + }); + + // Spin up a raw upstream that replies with a fixed SSE body and a real recorder + // server pointed at it, then POST a streaming chat request through the recorder. + async function recordSse(sseBody: string): Promise<{ + fixturePath: string; + response: { status: number; body: string }; + }> { + rawServer = http.createServer((_upReq, upRes) => { + upRes.writeHead(200, { "Content-Type": "text/event-stream" }); + upRes.end(sseBody); + }); + await new Promise((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve())); + const upstreamPort = (rawServer!.address() as { port: number }).port; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-collapse-prop-")); + recorder = await createServer([], { + port: 0, + record: { + providers: { openai: `http://127.0.0.1:${upstreamPort}` }, + fixturePath: tmpDir, + }, + }); + + const response = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "search the web" }], + stream: true, + }); + + return { fixturePath: tmpDir, response }; + } + + it("propagates webSearches from a collapsed Responses-API stream into the persisted fixture", async () => { + // OpenAI Responses-API SSE: a completed web_search_call followed by text output. + // collapseOpenAISSE returns { content, webSearches: ["..."] }. + const sse = [ + 'data: {"type":"response.output_item.done","item":{"type":"web_search_call","action":{"query":"weather in Paris"}}}', + 'data: {"type":"response.output_text.delta","delta":"It is sunny in Paris."}', + "data: [DONE]", + ] + .map((l) => l + "\n\n") + .join(""); + + const { fixturePath } = await recordSse(sse); + + const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"), + ) as FixtureFile; + const saved = fixtureContent.fixtures[0].response as { + content: string; + webSearches?: string[]; + }; + expect(saved.content).toBe("It is sunny in Paris."); + // The bug: webSearches was collapsed but never written to the fixture. + expect(saved.webSearches).toEqual(["weather in Paris"]); + }); + + it("propagates webSearches alongside tool calls into the persisted fixture", async () => { + // web_search_call + a structured tool call → collapsed result carries both + // toolCalls and webSearches; the fixture must retain webSearches in the + // tool-call branch too. + const sse = [ + 'data: {"type":"response.output_item.done","item":{"type":"web_search_call","action":{"query":"latest news"}}}', + 'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","function":{"name":"get_news","arguments":"{}"}}]}}]}', + "data: [DONE]", + ] + .map((l) => l + "\n\n") + .join(""); + + const { fixturePath } = await recordSse(sse); + + const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"), + ) as FixtureFile; + const saved = fixtureContent.fixtures[0].response as { + toolCalls: unknown[]; + webSearches?: string[]; + }; + expect(saved.toolCalls).toHaveLength(1); + expect(saved.webSearches).toEqual(["latest news"]); + }); +}); + +// --------------------------------------------------------------------------- +// Dropped-chunk diagnostic logging +// +// A malformed SSE frame is dropped during collapse; the collapser captures a +// `firstDroppedSample` diagnostic. Assert that sample reaches the logged +// dropped-chunk warning so the loss is actionable. +// --------------------------------------------------------------------------- + +describe("recorder dropped-chunk diagnostic", () => { + let rawServer: http.Server | undefined; + let warnSpy: MockInstance | undefined; + + afterEach(async () => { + warnSpy?.mockRestore(); + warnSpy = undefined; + if (rawServer) { + await new Promise((resolve) => rawServer!.close(() => resolve())); + rawServer = undefined; + } + }); + + it("logs firstDroppedSample alongside the dropped-chunk warning", async () => { + // A malformed data frame increments droppedChunks and sets firstDroppedSample. + const sse = [ + 'data: {"choices":[{"delta":{"content":"Hello"}}]}', + "data: {not valid json", // malformed → dropped, captured as the first sample + "data: [DONE]", + ] + .map((l) => l + "\n\n") + .join(""); + + // Capture warnings via a real logger instance (silent suppresses output, so + // spy on the instance method directly and run it through proxyAndRecord). + const logger = new Logger("warn"); + warnSpy = vi.spyOn(logger, "warn").mockImplementation(() => {}); + + rawServer = http.createServer((_upReq, upRes) => { + upRes.writeHead(200, { "Content-Type": "text/event-stream" }); + upRes.end(sse); + }); + await new Promise((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve())); + const upstreamPort = (rawServer!.address() as { port: number }).port; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-collapse-dropped-")); + const record: RecordConfig = { + providers: { openai: `http://127.0.0.1:${upstreamPort}` }, + fixturePath: tmpDir, + }; + + const { req, res } = createMockReqRes(); + Object.assign(res, { + writeHead: () => res, + write: () => true, + end: () => res, + setHeader: () => res, + flushHeaders: () => undefined, + }); + + await proxyAndRecord( + req, + res, + { model: "gpt-4", messages: [{ role: "user", content: "drop a chunk" }] }, + "openai", + "/v1/chat/completions", + [], + { record, logger }, + ); + + const warnings = warnSpy.mock.calls.map((c) => String(c[0])); + const droppedWarning = warnings.find((w) => w.includes("dropped during stream collapse")); + expect(droppedWarning).toBeDefined(); + // The bug: the sample diagnostic was computed but never surfaced. + expect(droppedWarning).toContain("not valid json"); + }); +}); + +// --------------------------------------------------------------------------- +// Gemini audio-branch companion-modality propagation +// +// A single Gemini turn can interleave inlineData audio with a functionCall (and +// text/thought parts). collapseGeminiSSE returns audioB64 ALONGSIDE +// toolCalls/content/reasoning, but the recorder audio branch historically built +// only `{ audio: { b64Json, contentType } }` — silently discarding the tool +// call. These tests drive the real record path against a raw Gemini SSE upstream +// and assert the persisted fixture retains the companion modalities. +// --------------------------------------------------------------------------- + +describe("recorder Gemini audio-branch propagation", () => { + let rawServer: http.Server | undefined; + + afterEach(async () => { + if (rawServer) { + await new Promise((resolve) => rawServer!.close(() => resolve())); + rawServer = undefined; + } + }); + + // Raw Gemini upstream emitting a fixed SSE body, fronted by a real recorder + // configured with the `gemini` provider key so collapseGeminiSSE runs. + async function recordGeminiSse(sseBody: string): Promise<{ + fixturePath: string; + response: { status: number; body: string }; + }> { + rawServer = http.createServer((_upReq, upRes) => { + upRes.writeHead(200, { "Content-Type": "text/event-stream" }); + upRes.end(sseBody); + }); + await new Promise((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve())); + const upstreamPort = (rawServer!.address() as { port: number }).port; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-gemini-audio-")); + recorder = await createServer([], { + port: 0, + record: { + providers: { gemini: `http://127.0.0.1:${upstreamPort}` }, + fixturePath: tmpDir, + }, + }); + + // Gemini streaming is the :streamGenerateContent endpoint with a Gemini-shaped + // request body (contents/parts), which routes to the `gemini` provider so + // collapseGeminiSSE runs over the upstream SSE. + const response = await post( + `${recorder.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, + { + contents: [{ role: "user", parts: [{ text: "speak and call a tool" }] }], + }, + ); + + return { fixturePath: tmpDir, response }; + } + + it("retains a functionCall in the persisted fixture when audio is also present", async () => { + // Gemini SSE interleaving inlineData audio with a functionCall part. + // collapseGeminiSSE returns { audioB64, audioMimeType, toolCalls }. + const sse = [ + JSON.stringify({ + candidates: [ + { + content: { + parts: [{ inlineData: { mimeType: "audio/pcm", data: "QUJD" } }], + }, + }, + ], + }), + JSON.stringify({ + candidates: [ + { + content: { + parts: [{ functionCall: { name: "get_weather", args: { city: "SF" } } }], + }, + }, + ], + }), + ] + .map((l) => `data: ${l}\n\n`) + .join(""); + + const { fixturePath } = await recordGeminiSse(sse); + + const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"), + ) as FixtureFile; + const saved = fixtureContent.fixtures[0].response as { + audio: { b64Json: string; contentType?: string }; + toolCalls?: Array<{ name: string; arguments: string }>; + }; + // Audio still persisted. + expect(saved.audio.b64Json).toBe("QUJD"); + expect(saved.audio.contentType).toBe("audio/pcm"); + // The bug: the tool call was collapsed but dropped from the fixture. + expect(saved.toolCalls).toHaveLength(1); + expect(saved.toolCalls![0].name).toBe("get_weather"); + expect(JSON.parse(saved.toolCalls![0].arguments)).toEqual({ city: "SF" }); + }); + + it("retains text content and reasoning alongside audio in the persisted fixture", async () => { + // Audio interleaved with a normal text part and a `thought` (reasoning) part. + const sse = [ + JSON.stringify({ + candidates: [ + { + content: { + parts: [{ inlineData: { mimeType: "audio/pcm", data: "WFla" } }], + }, + }, + ], + }), + JSON.stringify({ + candidates: [ + { + content: { + parts: [ + { text: "Here is the weather.", thought: false }, + { text: "Thinking about it.", thought: true }, + ], + }, + }, + ], + }), + ] + .map((l) => `data: ${l}\n\n`) + .join(""); + + const { fixturePath } = await recordGeminiSse(sse); + + const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"), + ) as FixtureFile; + const saved = fixtureContent.fixtures[0].response as { + audio: { b64Json: string }; + content?: string; + reasoning?: string; + }; + expect(saved.audio.b64Json).toBe("WFla"); + // The bug: content/reasoning collapsed alongside audio were dropped. + expect(saved.content).toBe("Here is the weather."); + expect(saved.reasoning).toBe("Thinking about it."); + }); +}); + +// --------------------------------------------------------------------------- +// Harmony-unparsed recording (end-to-end) +// +// When a gpt-oss stream carries harmony channel tokens that cannot be parsed +// into a valid harmony structure, the collapser preserves the bytes VERBATIM and +// surfaces the distinct `harmonyUnparsed` signal — it is NOT transport loss. +// The recorder must therefore persist a content-bearing fixture (verbatim, not +// an error/truncated fixture) and emit a DISTINCT harmony warning, never the +// dropped-chunk or truncation warnings. +// --------------------------------------------------------------------------- + +describe("recorder harmony-unparsed recording", () => { + let rawServer: http.Server | undefined; + let warnSpy: MockInstance | undefined; + + afterEach(async () => { + warnSpy?.mockRestore(); + warnSpy = undefined; + if (rawServer) { + await new Promise((resolve) => rawServer!.close(() => resolve())); + rawServer = undefined; + } + }); + + // A <|channel|> + <|message|> opener whose tool-call body never yields valid + // JSON — isHarmonyContent recognizes the tokens but parsing fails, so the + // collapser sets harmonyUnparsed and preserves content verbatim. + const BROKEN_HARMONY = + "<|start|>assistant<|channel|>commentary to=functions.broken<|constrain|>json<|message|>{not valid json"; + + it("persists verbatim harmony content as a content fixture (not error/truncated)", async () => { + const sse = [ + `data: ${JSON.stringify({ id: "chatcmpl-broken", choices: [{ delta: { content: BROKEN_HARMONY } }] })}`, + "data: [DONE]", + ] + .map((l) => l + "\n\n") + .join(""); + + rawServer = http.createServer((_upReq, upRes) => { + upRes.writeHead(200, { "Content-Type": "text/event-stream" }); + upRes.end(sse); + }); + await new Promise((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve())); + const upstreamPort = (rawServer!.address() as { port: number }).port; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-harmony-record-")); + recorder = await createServer([], { + port: 0, + record: { + providers: { openai: `http://127.0.0.1:${upstreamPort}` }, + fixturePath: tmpDir, + }, + }); + + await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-oss", + messages: [{ role: "user", content: "use harmony" }], + stream: true, + }); + + const files = fs.readdirSync(tmpDir).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"), + ) as FixtureFile; + const saved = fixtureContent.fixtures[0].response as { + content?: string; + error?: unknown; + toolCalls?: unknown[]; + }; + // Verbatim content, no fabricated tool call, no error fixture. + expect(saved.error).toBeUndefined(); + expect(saved.toolCalls).toBeUndefined(); + expect(saved.content).toBe(BROKEN_HARMONY); + }); + + it("emits the distinct harmony-unparsed warning, not dropped-chunk/truncation warnings", async () => { + const sse = [ + `data: ${JSON.stringify({ id: "chatcmpl-broken", choices: [{ delta: { content: BROKEN_HARMONY } }] })}`, + "data: [DONE]", + ] + .map((l) => l + "\n\n") + .join(""); + + const logger = new Logger("warn"); + warnSpy = vi.spyOn(logger, "warn").mockImplementation(() => {}); + + rawServer = http.createServer((_upReq, upRes) => { + upRes.writeHead(200, { "Content-Type": "text/event-stream" }); + upRes.end(sse); + }); + await new Promise((resolve) => rawServer!.listen(0, "127.0.0.1", () => resolve())); + const upstreamPort = (rawServer!.address() as { port: number }).port; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-harmony-warn-")); + const record: RecordConfig = { + providers: { openai: `http://127.0.0.1:${upstreamPort}` }, + fixturePath: tmpDir, + }; + + const { req, res } = createMockReqRes(); + Object.assign(res, { + writeHead: () => res, + write: () => true, + end: () => res, + setHeader: () => res, + flushHeaders: () => undefined, + }); + + await proxyAndRecord( + req, + res, + { model: "gpt-oss", messages: [{ role: "user", content: "use harmony" }] }, + "openai", + "/v1/chat/completions", + [], + { record, logger }, + ); + + const warnings = warnSpy.mock.calls.map((c) => String(c[0])); + const harmonyWarning = warnings.find((w) => + w.includes("Harmony tokens present but unparseable"), + ); + expect(harmonyWarning).toBeDefined(); + // Distinct signal — NOT counted as dropped/truncated transport loss. + expect(warnings.some((w) => w.includes("dropped during stream collapse"))).toBe(false); + expect(warnings.some((w) => w.includes("may be truncated"))).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// Frame-timing splitter — CRLF delimiter tolerance +// +// Some upstreams/proxies emit SSE/NDJSON frames with CRLF line endings +// (\r\n\r\n for SSE, \r\n for NDJSON), which the SSE spec permits. The +// frame-timing splitter must split on these boundaries so per-frame +// timestamps are captured. An LF-only splitter sees the whole stream as a +// single frame, producing no recordedTimings. +// --------------------------------------------------------------------------- + +describe("recorder frame-timing: CRLF delimiters", () => { + it("captures per-frame timing for CRLF-delimited SSE streams", async () => { + // Anthropic-style SSE, but with CRLF (\r\n\r\n) frame boundaries. + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "text/event-stream" }); + const frames = [ + `event: message_start\r\ndata: ${JSON.stringify({ type: "message_start", message: { id: "msg_crlf", role: "assistant" } })}`, + `event: content_block_delta\r\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "CRLF " } })}`, + `event: content_block_delta\r\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "frames" } })}`, + `event: message_stop\r\ndata: ${JSON.stringify({ type: "message_stop" })}`, + ]; + // Write each frame with a CRLF/CRLF terminator on its own tick so the + // per-frame timestamps are distinguishable. + let i = 0; + const writeNext = () => { + if (i >= frames.length) { + res.end(); + return; + } + res.write(`${frames[i]}\r\n\r\n`); + i++; + setTimeout(writeNext, 2); + }; + writeNext(); + }); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { anthropic: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/messages`, { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "crlf sse timing test" }], + stream: true, + }); + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"), + ) as FixtureFile; + + // Content collapse must still work across CRLF frames. + const savedResponse = fixtureContent.fixtures[0].response as { content?: string }; + expect(savedResponse.content).toBe("CRLF frames"); + + // The splitter must have seen each CRLF-terminated frame individually, + // so recordedTimings is present with one inter-chunk delay per frame gap. + const timings = fixtureContent.fixtures[0].recordedTimings; + expect(timings).toBeDefined(); + // 4 frames → 3 inter-frame delays. + expect(timings!.interChunkDelaysMs.length).toBe(3); + + await new Promise((resolve) => rawServer.close(() => resolve())); + }); + + it("captures per-frame timing for CRLF-delimited NDJSON streams", async () => { + // Ollama-style NDJSON, but with CRLF (\r\n) line endings. + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/x-ndjson" }); + const lines = [ + JSON.stringify({ message: { role: "assistant", content: "NDJSON " }, done: false }), + JSON.stringify({ message: { role: "assistant", content: "over " }, done: false }), + JSON.stringify({ message: { role: "assistant", content: "CRLF" }, done: true }), + ]; + let i = 0; + const writeNext = () => { + if (i >= lines.length) { + res.end(); + return; + } + res.write(`${lines[i]}\r\n`); + i++; + setTimeout(writeNext, 2); + }; + writeNext(); + }); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { ollama: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "crlf ndjson timing test" }], + stream: true, + }); + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, files[0]), "utf-8"), + ) as FixtureFile; + + // Each CRLF-terminated NDJSON line must be timestamped individually. + const timings = fixtureContent.fixtures[0].recordedTimings; + expect(timings).toBeDefined(); + // 3 frames → 2 inter-frame delays. + expect(timings!.interChunkDelaysMs.length).toBe(2); + + await new Promise((resolve) => rawServer.close(() => resolve())); + }); +}); diff --git a/src/recorder.ts b/src/recorder.ts index a892a9d..b503ea4 100644 --- a/src/recorder.ts +++ b/src/recorder.ts @@ -3,6 +3,7 @@ import * as https from "node:https"; import * as fs from "node:fs"; import * as path from "node:path"; import * as crypto from "node:crypto"; +import { StringDecoder } from "node:string_decoder"; import type { ChatCompletionRequest, Fixture, @@ -366,15 +367,40 @@ export async function proxyAndRecord( defaults.logger.warn("Bedrock EventStream: CRC mismatch — response may be truncated"); } if (collapsed.droppedChunks && collapsed.droppedChunks > 0) { - defaults.logger.warn(`${collapsed.droppedChunks} chunk(s) dropped during stream collapse`); + defaults.logger.warn( + `${collapsed.droppedChunks} chunk(s) dropped during stream collapse${collapsed.firstDroppedSample ? ` — first: ${collapsed.firstDroppedSample}` : ""}`, + ); + } + if (collapsed.harmonyUnparsed) { + defaults.logger.warn( + `Harmony tokens present but unparseable — content preserved verbatim${collapsed.harmonyNote ? ` (${collapsed.harmonyNote})` : ""}`, + ); } - // Audio from streamed inlineData (e.g. Gemini SSE with audio parts) + // Audio from streamed inlineData (e.g. Gemini SSE with audio parts). + // A single Gemini turn can interleave audio with a functionCall and/or + // text/thought parts; preserve those companion modalities so the tool call + // / content / reasoning are not silently dropped when audio is present. if (collapsed.audioB64) { + const audioToolCallsSpread = + collapsed.toolCalls && collapsed.toolCalls.length > 0 + ? { + toolCalls: collapsed.toolCalls.map((tc) => ({ + ...tc, + name: tc.name ?? "", + arguments: tc.arguments ?? "{}", + })), + } + : {}; + const audioContentSpread = collapsed.content ? { content: collapsed.content } : {}; + const audioReasoningSpread = collapsed.reasoning ? { reasoning: collapsed.reasoning } : {}; fixtureResponse = { audio: { b64Json: collapsed.audioB64, contentType: collapsed.audioMimeType ?? "audio/mpeg", }, + ...audioToolCallsSpread, + ...audioContentSpread, + ...audioReasoningSpread, }; } else if ( collapsed.content === "" && @@ -382,9 +408,19 @@ export async function proxyAndRecord( ) { defaults.logger.warn("Stream collapse produced empty content — fixture may be incomplete"); const reasoningSpread = collapsed.reasoning ? { reasoning: collapsed.reasoning } : {}; - fixtureResponse = { content: collapsed.content ?? "", ...reasoningSpread }; + const webSearchesSpread = collapsed.webSearches?.length + ? { webSearches: collapsed.webSearches } + : {}; + fixtureResponse = { + content: collapsed.content ?? "", + ...reasoningSpread, + ...webSearchesSpread, + }; } else { const reasoningSpread = collapsed.reasoning ? { reasoning: collapsed.reasoning } : {}; + const webSearchesSpread = collapsed.webSearches?.length + ? { webSearches: collapsed.webSearches } + : {}; if (collapsed.toolCalls && collapsed.toolCalls.length > 0) { const sanitizedToolCalls = collapsed.toolCalls.map((tc) => ({ ...tc, @@ -397,12 +433,21 @@ export async function proxyAndRecord( content: collapsed.content, toolCalls: sanitizedToolCalls, ...reasoningSpread, + ...webSearchesSpread, }; } else { - fixtureResponse = { toolCalls: sanitizedToolCalls, ...reasoningSpread }; + fixtureResponse = { + toolCalls: sanitizedToolCalls, + ...reasoningSpread, + ...webSearchesSpread, + }; } } else { - fixtureResponse = { content: collapsed.content ?? "", ...reasoningSpread }; + fixtureResponse = { + content: collapsed.content ?? "", + ...reasoningSpread, + ...webSearchesSpread, + }; } } } else { @@ -572,6 +617,26 @@ export async function proxyAndRecord( // Internal helpers // --------------------------------------------------------------------------- +/** + * Decodes a sequence of byte chunks to UTF-8 text for SSE/NDJSON frame + * splitting on the streamed-capture path. Wraps Node's StringDecoder so a + * multibyte UTF-8 character (CJK, emoji, ...) whose bytes are split across a + * TCP chunk boundary buffers across chunks instead of decoding to U+FFFD + * replacement characters — decoding each chunk independently with + * Buffer#toString() would corrupt the recorded frame text. + */ +export class StreamingFrameDecoder { + private decoder = new StringDecoder("utf8"); + /** Decode a chunk, holding back any trailing partial multibyte sequence. */ + write(chunk: Buffer): string { + return this.decoder.write(chunk); + } + /** Flush any buffered bytes once the stream has ended. */ + end(): string { + return this.decoder.end(); + } +} + function clampTimeout(value: number | undefined, fallback: number): number { if (value == null || !Number.isFinite(value) || value <= 0) return fallback; return value; @@ -631,6 +696,10 @@ function makeUpstreamRequest( const frameTimestamps: number[] = []; const streamStartTime = Date.now(); let frameBuffer = ""; + // Decode chunks through a streaming-aware decoder so a multibyte UTF-8 + // character split across a TCP chunk boundary buffers across chunks + // instead of decoding to U+FFFD replacement characters. + const frameDecoder = new StreamingFrameDecoder(); let binaryFrameBuffer = Buffer.alloc(0); let streamedToClient = false; @@ -670,8 +739,14 @@ function makeUpstreamRequest( // TCP data events don't align with SSE frames — buffer and // split on the protocol delimiter to timestamp each complete frame. if (isSSE || isNDJSON) { - frameBuffer += chunk.toString(); - const delimiter = isNDJSON ? "\n" : "\n\n"; + frameBuffer += frameDecoder.write(chunk); + // Split on the protocol delimiter, tolerating CRLF line endings. + // The SSE spec permits CRLF, and some upstreams/proxies emit + // \r\n\r\n (SSE) or \r\n (NDJSON) frame boundaries. An LF-only + // split would see the whole CRLF stream as a single frame and + // lose per-frame timing. The last split element (a partial frame + // tail) stays buffered, exactly as with a string delimiter. + const delimiter = isNDJSON ? /\r?\n/ : /\r?\n\r?\n/; const parts = frameBuffer.split(delimiter); // All complete frames (everything except the last part which // may be incomplete). @@ -721,8 +796,13 @@ function makeUpstreamRequest( // the stream ended without a trailing delimiter. Binary EventStream // frames are length-prefixed so partial frames at end-of-stream are // genuinely incomplete and should not be timestamped. - if ((isSSE || isNDJSON) && frameBuffer.trim().length > 0) { - frameTimestamps.push(Date.now()); + if (isSSE || isNDJSON) { + // Drain any bytes the decoder buffered for an incomplete multibyte + // sequence so the final frame text is complete before we test it. + frameBuffer += frameDecoder.end(); + if (frameBuffer.trim().length > 0) { + frameTimestamps.push(Date.now()); + } } const rawBuffer = Buffer.concat(chunks); if ( diff --git a/src/types.ts b/src/types.ts index 5124991..bb60d30 100644 --- a/src/types.ts +++ b/src/types.ts @@ -192,9 +192,23 @@ export interface ImageResponse { images?: ImageItem[]; } +// ORDERING CONTRACT: audio fixtures MUST be discriminated by `isAudioResponse` +// BEFORE the `isContentWithToolCallsResponse` / `isToolCallResponse` / text +// guards, because the optional companion fields below make these shapes +// structurally overlap (an AudioResponse with `toolCalls`/`content` would also +// satisfy those guards otherwise). export interface AudioResponse { audio: string | { b64Json: string; contentType?: string }; format?: string; + /** + * Companion modalities that can accompany streamed audio. A single Gemini turn + * may interleave inlineData audio with a functionCall and/or text/thought + * parts; the recorder preserves them here so the tool call / content / reasoning + * are not silently discarded when audio is also present. + */ + toolCalls?: ToolCall[]; + content?: string; + reasoning?: string; } export interface TranscriptionResponse { From b612db1fb6575857a8bb725042d5941ebee8073c Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 2 Jun 2026 13:27:59 -0700 Subject: [PATCH 4/5] fix: replay Gemini audio companion tool calls, content, and reasoning --- src/__tests__/gemini-audio.test.ts | 102 +++++++++++++++++++++++++++++ src/gemini.ts | 39 ++++++++--- 2 files changed, 132 insertions(+), 9 deletions(-) diff --git a/src/__tests__/gemini-audio.test.ts b/src/__tests__/gemini-audio.test.ts index 8b5c03b..fb14b4e 100644 --- a/src/__tests__/gemini-audio.test.ts +++ b/src/__tests__/gemini-audio.test.ts @@ -153,6 +153,108 @@ describe("Gemini audio responses", () => { }); }); + test("non-streaming audio turn replays companion tool call + content + reasoning", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "audio with tool call" }, + response: { + audio: "SGVsbG8=", + format: "mp3", + content: "Here is the audio you asked for.", + reasoning: "User wants audio plus a lookup.", + toolCalls: [{ id: "call_1", name: "lookup", arguments: '{"query":"weather"}' }], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1beta/models/lyria-3:generateContent`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [{ role: "user", parts: [{ text: "audio with tool call" }] }], + }), + }); + expect(res.status).toBe(200); + const data = await res.json(); + const parts = data.candidates[0].content.parts; + + // Audio (inlineData) must still be present and first. + expect(parts[0].inlineData).toEqual({ mimeType: "audio/mpeg", data: "SGVsbG8=" }); + + // Coverage pin: a tool-call-bearing turn must finish with FUNCTION_CALL, + // never STOP — guards against a regression that emits STOP with a tool call. + expect(data.candidates[0].finishReason).toBe("FUNCTION_CALL"); + + // Companion modalities must NOT be dropped on replay. + const functionCallPart = parts.find((p: { functionCall?: unknown }) => p.functionCall); + expect(functionCallPart).toBeDefined(); + expect(functionCallPart.functionCall.name).toBe("lookup"); + expect(functionCallPart.functionCall.args).toEqual({ query: "weather" }); + expect(functionCallPart.functionCall.id).toBe("call_1"); + + const textPart = parts.find((p: { text?: string; thought?: boolean }) => p.text && !p.thought); + expect(textPart?.text).toBe("Here is the audio you asked for."); + + const thoughtPart = parts.find((p: { text?: string; thought?: boolean }) => p.thought); + expect(thoughtPart?.text).toBe("User wants audio plus a lookup."); + }); + + test("streaming audio turn replays companion tool call + content + reasoning", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "stream audio with tool call" }, + response: { + audio: "SGVsbG8=", + format: "mp3", + content: "Streamed text.", + reasoning: "Streamed thought.", + toolCalls: [{ id: "call_2", name: "fetch", arguments: '{"url":"x"}' }], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1beta/models/lyria-3:streamGenerateContent`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [{ role: "user", parts: [{ text: "stream audio with tool call" }] }], + }), + }); + expect(res.status).toBe(200); + + const text = await res.text(); + const chunks = text + .split("\n\n") + .filter((line) => line.startsWith("data: ")) + .map((line) => JSON.parse(line.replace("data: ", ""))); + + const allParts = chunks.flatMap((c) => c.candidates[0].content.parts); + + expect(allParts.some((p: { inlineData?: unknown }) => p.inlineData)).toBe(true); + + // Coverage pin: a tool-call-bearing turn must finish with FUNCTION_CALL, + // never STOP — guards against a regression that emits STOP with a tool call. + expect( + chunks.some( + (c: { candidates: Array<{ finishReason?: string }> }) => + c.candidates[0].finishReason === "FUNCTION_CALL", + ), + ).toBe(true); + + const functionCallPart = allParts.find((p: { functionCall?: unknown }) => p.functionCall); + expect(functionCallPart).toBeDefined(); + expect(functionCallPart.functionCall.name).toBe("fetch"); + expect(functionCallPart.functionCall.id).toBe("call_2"); + + const textPart = allParts.find( + (p: { text?: string; thought?: boolean }) => p.text && !p.thought, + ); + expect(textPart?.text).toBe("Streamed text."); + + const thoughtPart = allParts.find((p: { text?: string; thought?: boolean }) => p.thought); + expect(thoughtPart?.text).toBe("Streamed thought."); + }); + test("onAudio() convenience method works via Gemini", async () => { mock = new LLMock({ port: 0 }); mock.onAudio("piano loop", { audio: "SGVsbG8=" }); diff --git a/src/gemini.ts b/src/gemini.ts index a10d13e..f240f16 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -509,13 +509,35 @@ function resolveAudioInlineData(audio: AudioResponse): { mimeType: string; data: }; } -function buildGeminiAudioResponse(audio: AudioResponse): GeminiResponseChunk { +// Build the ordered Gemini parts for an audio turn: inlineData audio first, +// then any companion modalities (reasoning/thought, text content, tool calls) +// the recorder preserved on the AudioResponse. Without this, a recorded turn +// that interleaves audio with a functionCall and/or text silently drops the +// companions on replay. +// NOTE: audio companions are only re-emitted on this Gemini replay path because +// `audioB64` collapse is currently Gemini-only — a cross-provider audio fixture +// would not replay its companions. +function buildGeminiAudioParts(audio: AudioResponse, logger: Logger): GeminiPart[] { const inlineData = resolveAudioInlineData(audio); + const parts: GeminiPart[] = [{ inlineData }]; + if (audio.reasoning) { + parts.push({ text: audio.reasoning, thought: true }); + } + if (audio.content) { + parts.push({ text: audio.content }); + } + if (audio.toolCalls?.length) { + parts.push(...audio.toolCalls.map((tc) => parseToolCallPart(tc, logger))); + } + return parts; +} + +function buildGeminiAudioResponse(audio: AudioResponse, logger: Logger): GeminiResponseChunk { return { candidates: [ { - content: { role: "model", parts: [{ inlineData }] }, - finishReason: "STOP", + content: { role: "model", parts: buildGeminiAudioParts(audio, logger) }, + finishReason: audio.toolCalls?.length ? "FUNCTION_CALL" : "STOP", index: 0, }, ], @@ -523,14 +545,13 @@ function buildGeminiAudioResponse(audio: AudioResponse): GeminiResponseChunk { }; } -function buildGeminiAudioStreamChunks(audio: AudioResponse): GeminiResponseChunk[] { - const inlineData = resolveAudioInlineData(audio); +function buildGeminiAudioStreamChunks(audio: AudioResponse, logger: Logger): GeminiResponseChunk[] { return [ { candidates: [ { - content: { role: "model", parts: [{ inlineData }] }, - finishReason: "STOP", + content: { role: "model", parts: buildGeminiAudioParts(audio, logger) }, + finishReason: audio.toolCalls?.length ? "FUNCTION_CALL" : "STOP", index: 0, }, ], @@ -790,11 +811,11 @@ export async function handleGemini( response: { status: 200, fixture }, }); if (!streaming) { - const body = buildGeminiAudioResponse(response); + const body = buildGeminiAudioResponse(response, logger); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); } else { - const chunks = buildGeminiAudioStreamChunks(response); + const chunks = buildGeminiAudioStreamChunks(response, logger); const interruption = createInterruptionSignal(fixture); const completed = await writeGeminiSSEStream(res, chunks, { latency, From 75c1066ad996cbe77d04ac1984a698cb2ad3b516 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 2 Jun 2026 14:38:03 -0700 Subject: [PATCH 5/5] chore: release v1.28.0 --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- CHANGELOG.md | 12 ++++++++++++ charts/aimock/Chart.yaml | 2 +- package.json | 2 +- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8d51cfa..6f7bd7f 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ "source": { "source": "npm", "package": "@copilotkit/aimock", - "version": "^1.27.3" + "version": "^1.28.0" }, "description": "Fixture authoring skill for @copilotkit/aimock — LLM, multimedia (image/TTS/transcription/video), MCP, A2A, AG-UI, vector, embeddings, structured output, sequential responses, streaming physics, record/replay, agent loop patterns, and debugging" } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index ef930da..a92dbcd 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "aimock", - "version": "1.27.3", + "version": "1.28.0", "description": "Fixture authoring guidance for @copilotkit/aimock — LLM, multimedia, MCP, A2A, AG-UI, vector, and service mocking", "author": { "name": "CopilotKit" diff --git a/CHANGELOG.md b/CHANGELOG.md index 91d17e2..f17158e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ ## [Unreleased] +## [1.28.0] - 2026-06-02 + +### Added + +- **Harmony channel format** — parse OpenAI "harmony" channel tokens (`<|channel|>… <|message|>… <|call|>`) emitted by local gpt-oss models (Ollama / vLLM / OpenRouter) so their tool calls, reasoning, and content are captured when recording (hosted OpenAI pre-parses harmony, so only local runtimes pass it through raw). Implemented as a lexer + state-machine parser with a uniform all-or-nothing verbatim fail-safe, wired as fallback-only so it never produces phantom tool calls. + +### Fixed + +- **Recorder** — decode streamed response chunks incrementally to prevent multibyte UTF-8 corruption; CRLF-tolerant frame-timing splitter; propagate `webSearches` and audio-companion fields (tool calls / content / reasoning) into recorded fixtures; log `firstDroppedSample` alongside dropped-chunk warnings. +- **Stream collapsers** — multi-line and CRLF SSE handling; missing/uncorrelated tool-call index guards with symmetric dropped-chunk accounting across OpenAI / Anthropic / Bedrock / Cohere; bound Bedrock EventStream header parsing against malformed frames. +- **Gemini** — replay audio-companion tool calls / content / reasoning on audio turns instead of dropping them. + ## [1.27.3] - 2026-05-27 ### Fixed diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml index 79c178c..1eba0cd 100644 --- a/charts/aimock/Chart.yaml +++ b/charts/aimock/Chart.yaml @@ -3,4 +3,4 @@ name: aimock description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector) type: application version: 0.1.0 -appVersion: "1.27.3" +appVersion: "1.28.0" diff --git a/package.json b/package.json index dba0ec2..2f7263a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@copilotkit/aimock", - "version": "1.27.3", + "version": "1.28.0", "description": "Mock infrastructure for AI application testing — LLM APIs, image generation, text-to-speech, transcription, audio generation, video generation, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.", "license": "MIT", "keywords": [