diff --git a/.changeset/fix-gemini-thought-signature-part-level.md b/.changeset/fix-gemini-thought-signature-part-level.md new file mode 100644 index 000000000..abb5c1860 --- /dev/null +++ b/.changeset/fix-gemini-thought-signature-part-level.md @@ -0,0 +1,11 @@ +--- +'@tanstack/ai-gemini': patch +--- + +fix(ai-gemini): read/write thoughtSignature at Part level for Gemini 3.x + +Gemini 3.x models emit `thoughtSignature` as a Part-level sibling of `functionCall` (per the `@google/genai` `Part` type definition), not nested inside `functionCall`. The adapter was reading from `functionCall.thoughtSignature` (which doesn't exist in the SDK types) and writing it back nested inside `functionCall`, causing the Gemini API to reject subsequent tool-call turns with `400 INVALID_ARGUMENT: "Function call is missing a thought_signature"`. + +This fix: +- **Read side:** reads `part.thoughtSignature` first, falls back to `functionCall.thoughtSignature` for older Gemini 2.x models +- **Write side:** emits `thoughtSignature` as a Part-level sibling of `functionCall` instead of nesting it inside diff --git a/packages/typescript/ai-gemini/src/adapters/text.ts b/packages/typescript/ai-gemini/src/adapters/text.ts index 4430534c0..53e654f40 100644 --- a/packages/typescript/ai-gemini/src/adapters/text.ts +++ b/packages/typescript/ai-gemini/src/adapters/text.ts @@ -289,6 +289,15 @@ export class GeminiTextAdapter< `${functionCall.name}_${Date.now()}_${nextToolIndex}` const functionArgs = functionCall.args || {} + // Gemini 3.x emits thoughtSignature as a Part-level sibling of + // functionCall (see @google/genai Part type), not nested inside + // functionCall. Read from the Part first, fall back to + // functionCall for older Gemini 2.x models. + const partThoughtSignature = + (part as any).thoughtSignature || + (functionCall as any).thoughtSignature || + undefined + let toolCallData = toolCallMap.get(toolCallId) if (!toolCallData) { toolCallData = { @@ -299,11 +308,13 @@ export class GeminiTextAdapter< : JSON.stringify(functionArgs), index: nextToolIndex++, started: false, - thoughtSignature: - (functionCall as any).thoughtSignature || undefined, + thoughtSignature: partThoughtSignature, } toolCallMap.set(toolCallId, toolCallData) } else { + if (!toolCallData.thoughtSignature && partThoughtSignature) { + toolCallData.thoughtSignature = partThoughtSignature + } try { const existingArgs = JSON.parse(toolCallData.args) const newArgs = @@ -585,14 +596,18 @@ export class GeminiTextAdapter< const thoughtSignature = toolCall.providerMetadata ?.thoughtSignature as string | undefined + // Gemini 3.x requires thoughtSignature at the Part level (sibling + // of functionCall), not nested inside functionCall. Nesting it + // causes the API to reject the next turn with + // "Function call is missing a thought_signature". parts.push({ functionCall: { id: toolCall.id, name: toolCall.function.name, args: parsedArgs, - ...(thoughtSignature && { thoughtSignature }), - } as any, - }) + }, + ...(thoughtSignature && { thoughtSignature }), + } as Part) } } diff --git a/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts b/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts index d713cfc1f..50bfec090 100644 --- a/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts +++ b/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts @@ -506,10 +506,11 @@ describe('GeminiAdapter through AI', () => { expect(textParts[0].text).toBe("what's a good electric guitar?") }) - it('preserves thoughtSignature in functionCall parts when sending history back to Gemini', async () => { + it('reads Part-level thoughtSignature from Gemini 3.x streaming response', async () => { const thoughtSig = 'base64-encoded-thought-signature-xyz' - // First stream: model returns a function call with a thoughtSignature (thinking model) + // Gemini 3.x emits thoughtSignature at the Part level, as a sibling of + // functionCall (per @google/genai Part type), not nested inside functionCall. const firstStream = [ { candidates: [ @@ -517,11 +518,11 @@ describe('GeminiAdapter through AI', () => { content: { parts: [ { + thoughtSignature: thoughtSig, functionCall: { id: 'fc_001', name: 'sum_tool', args: { numbers: [1, 2, 5] }, - thoughtSignature: thoughtSig, }, }, ], @@ -537,7 +538,6 @@ describe('GeminiAdapter through AI', () => { }, ] - // Second stream: model returns the final answer const secondStream = [ { candidates: [ @@ -591,8 +591,93 @@ describe('GeminiAdapter through AI', () => { const functionCallPart = modelTurn.parts.find((p: any) => p.functionCall) expect(functionCallPart).toBeDefined() expect(functionCallPart.functionCall.name).toBe('sum_tool') - // The thoughtSignature must be preserved in the model turn's functionCall - expect(functionCallPart.functionCall.thoughtSignature).toBe(thoughtSig) + // thoughtSignature must be at the Part level, NOT nested in functionCall + expect(functionCallPart.thoughtSignature).toBe(thoughtSig) + expect(functionCallPart.functionCall.thoughtSignature).toBeUndefined() + }) + + it('falls back to functionCall.thoughtSignature for Gemini 2.x models', async () => { + const thoughtSig = 'legacy-thought-signature' + + // Gemini 2.x nests thoughtSignature inside functionCall + const firstStream = [ + { + candidates: [ + { + content: { + parts: [ + { + functionCall: { + id: 'fc_legacy', + name: 'sum_tool', + args: { numbers: [3, 4] }, + thoughtSignature: thoughtSig, + }, + }, + ], + }, + finishReason: 'STOP', + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ] + + const secondStream = [ + { + candidates: [ + { + content: { parts: [{ text: 'The sum is 7.' }] }, + finishReason: 'STOP', + }, + ], + usageMetadata: { + promptTokenCount: 20, + candidatesTokenCount: 5, + totalTokenCount: 25, + }, + }, + ] + + mocks.generateContentStreamSpy + .mockResolvedValueOnce(createStream(firstStream)) + .mockResolvedValueOnce(createStream(secondStream)) + + const adapter = createTextAdapter() + + const sumTool: Tool = { + name: 'sum_tool', + description: 'Sums an array of numbers.', + execute: async (input: any) => ({ + result: input.numbers.reduce((a: number, b: number) => a + b, 0), + }), + } + + for await (const _ of chat({ + adapter, + tools: [sumTool], + messages: [{ role: 'user', content: 'What is 3 + 4?' }], + })) { + /* consume stream */ + } + + expect(mocks.generateContentStreamSpy).toHaveBeenCalledTimes(2) + + const [secondPayload] = mocks.generateContentStreamSpy.mock.calls[1] + const modelTurn = secondPayload.contents.find( + (c: any) => c.role === 'model', + ) + expect(modelTurn).toBeDefined() + + const functionCallPart = modelTurn.parts.find((p: any) => p.functionCall) + expect(functionCallPart).toBeDefined() + // Even for legacy input, the write side should emit at Part level + expect(functionCallPart.thoughtSignature).toBe(thoughtSig) + expect(functionCallPart.functionCall.thoughtSignature).toBeUndefined() }) it('uses function name (not toolCallId) in functionResponse and preserves the call id', async () => {