Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .changeset/multimodal-tool-results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
'@tanstack/ai': minor
'@tanstack/ai-client': minor
'@tanstack/openai-base': minor
'@tanstack/ai-anthropic': minor
'@tanstack/ai-gemini': minor
---

feat: support multimodal (image) tool results

Tools may now return an `Array<ContentPart>` (e.g. a text part plus an image part) and have it transmitted to the model as structured multimodal tool output instead of a `JSON.stringify`'d blob. This unblocks use cases like returning a screenshot from a tool so the model can see it (issue #363).

- Detection is structural and opt-in by shape: a tool that returns a non-empty array whose every element is a valid `ContentPart` is passed through unchanged; strings and all other return values are serialized exactly as before, so there are no breaking changes.
- The OpenAI Responses, Anthropic, and Google Gemini adapters convert the content parts into their native multimodal tool-output formats (`function_call_output.output`, `tool_result` content blocks, and `functionResponse.parts` respectively). Providers on the Chat Completions path (Groq, Ollama, Grok, OpenRouter chat) fall back to stringifying, which their APIs require.
- AG-UI stream events (`TOOL_CALL_RESULT.content`, `TOOL_CALL_END.result`) remain string-only per the spec; the multimodal array travels on the tool message itself.
Binary file added examples/ts-react-chat/public/repro-secret.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
123 changes: 123 additions & 0 deletions examples/ts-react-chat/scripts/make-repro-image.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// One-off generator for the issue #363 repro image.
// Renders a fixed 3-digit secret number as black pixels on white.
// The number is impossible to guess (1/1000) and appears nowhere in any text
// the tool returns, so a model that genuinely *sees* the image can read it and
// a model that only received stringified JSON cannot. Writes the PNG to
// public/ and prints its base64 for embedding in the tool.
import { deflateSync } from 'node:zlib'
import { writeFileSync } from 'node:fs'
import { fileURLToPath } from 'node:url'
import { dirname, join } from 'node:path'

// Keep this in sync with REPRO_SECRET in src/lib/image-tool-repro.ts
const SECRET = '473'
const CELL = 16 // px per font cell
const MARGIN = 24
const GAP = CELL // gap between digits

// 3x5 bitmap font for digits 0-9.
const FONT = {
0: ['111', '101', '101', '101', '111'],
1: ['010', '110', '010', '010', '111'],
2: ['111', '001', '111', '100', '111'],
3: ['111', '001', '111', '001', '111'],
4: ['101', '101', '111', '001', '001'],
5: ['111', '100', '111', '001', '111'],
6: ['111', '100', '111', '101', '111'],
7: ['111', '001', '010', '010', '010'],
8: ['111', '101', '111', '101', '111'],
9: ['111', '101', '111', '001', '111'],
}

const digits = [...SECRET]
const digitW = 3 * CELL
const digitH = 5 * CELL
const width = MARGIN * 2 + digits.length * digitW + (digits.length - 1) * GAP
const height = MARGIN * 2 + digitH

// White background, black digit pixels.
const px = new Uint8Array(width * height * 3).fill(255)
function setBlack(x, y) {
const i = (y * width + x) * 3
px[i] = 0
px[i + 1] = 0
px[i + 2] = 0
}

digits.forEach((d, di) => {
const glyph = FONT[d]
const originX = MARGIN + di * (digitW + GAP)
for (let gy = 0; gy < 5; gy++) {
for (let gx = 0; gx < 3; gx++) {
if (glyph[gy][gx] !== '1') continue
for (let cy = 0; cy < CELL; cy++) {
for (let cx = 0; cx < CELL; cx++) {
setBlack(originX + gx * CELL + cx, MARGIN + gy * CELL + cy)
}
}
}
}
})

// Pack into PNG scanlines (filter byte 0 per row).
const raw = Buffer.alloc((width * 3 + 1) * height)
let o = 0
for (let y = 0; y < height; y++) {
raw[o++] = 0
for (let x = 0; x < width; x++) {
const i = (y * width + x) * 3
raw[o++] = px[i]
raw[o++] = px[i + 1]
raw[o++] = px[i + 2]
}
}

// CRC32 (PNG chunks require it).
const crcTable = (() => {
const t = new Uint32Array(256)
for (let n = 0; n < 256; n++) {
let c = n
for (let k = 0; k < 8; k++) c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1
t[n] = c >>> 0
}
return t
})()
function crc32(buf) {
let c = 0xffffffff
for (let i = 0; i < buf.length; i++)
c = crcTable[(c ^ buf[i]) & 0xff] ^ (c >>> 8)
return (c ^ 0xffffffff) >>> 0
}

function chunk(type, data) {
const typeBuf = Buffer.from(type, 'ascii')
const len = Buffer.alloc(4)
len.writeUInt32BE(data.length, 0)
const crc = Buffer.alloc(4)
crc.writeUInt32BE(crc32(Buffer.concat([typeBuf, data])), 0)
return Buffer.concat([len, typeBuf, data, crc])
}

const ihdr = Buffer.alloc(13)
ihdr.writeUInt32BE(width, 0)
ihdr.writeUInt32BE(height, 4)
ihdr[8] = 8 // bit depth
ihdr[9] = 2 // colour type: truecolour RGB
ihdr[10] = 0
ihdr[11] = 0
ihdr[12] = 0

const png = Buffer.concat([
Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]),
chunk('IHDR', ihdr),
chunk('IDAT', deflateSync(raw)),
chunk('IEND', Buffer.alloc(0)),
])

const here = dirname(fileURLToPath(import.meta.url))
const outPath = join(here, '..', 'public', 'repro-secret.png')
writeFileSync(outPath, png)
console.log('Wrote', outPath, `(${png.length} bytes, secret=${SECRET})`)
console.log('BASE64_START')
console.log(png.toString('base64'))
console.log('BASE64_END')
54 changes: 54 additions & 0 deletions examples/ts-react-chat/src/lib/image-tool-repro.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { toolDefinition } from '@tanstack/ai'
import { z } from 'zod'
import type { ContentPart } from '@tanstack/ai'

/**
* Repro for https://github.com/TanStack/ai/issues/363
*
* `getReproImage` returns a multimodal tool result — an array of `ContentPart`
* (a text part + an image part). With multimodal tool results supported, the
* OpenAI Responses adapter sends the image as a structured `input_image` in
* `function_call_output.output`, so the model can actually see it. Before the
* fix every tool result was `JSON.stringify`'d and the model never received the
* image.
*
* The image is a PNG showing a fixed 3-digit secret number ({@link REPRO_SECRET})
* rendered as black pixels on white. The number appears nowhere in any text the
* tool returns, so a model that can genuinely see the image can read it back and
* a model that only received stringified JSON cannot — a clean pass/fail signal
* that a blind model cannot fake (1-in-1000 to guess).
*
* Generated by `scripts/make-repro-image.mjs` (also written to
* `public/repro-secret.png` for human comparison). Keep REPRO_SECRET in sync
* with the SECRET constant in that script.
*/
export const REPRO_SECRET = '473'

const REPRO_IMAGE_BASE64 =
'iVBORw0KGgoAAAANSUhEUgAAAOAAAACACAIAAACdu/LsAAAEIElEQVR4nO2SgWkAMRDDfv+l2wlSaJA5JbEG8FmHv59SxHzTBUr5iw60qOlAi5oOtKjpQIuaDrSo6UCLmg60qOlAi5oOtKjpQIuaDrSo6UCLmg60qOlAi5oOtKjpQIuaDrSo6UCLmg60qOlAi5oOtKjpQIsabKDfPzk9P81r/ZdeWFBYwJaf5rX+Sy8sKCxgy0/zWv+lFxYUFrDlp3mt/9ILCwoL2PLTvNZ/6YUFhQVs+Wle67/0woLCArb8NK/1X3phQWEBW36a1/ovvbCgsIAtP81r/ZdeWFBYwJaf5rX+Sy8sKCxgy0/zWv+lFxYUFrDlp3mt/9ILCwoL2PLTvNZ/6YUFhQVs+Wle67/0woLCArb8NK/1X3phQWEBW36a1/ovvbCgsMDUgyjaf/MuFhQWmHoQRftv3sWCwgJTD6Jo/827WFBYYOpBFO2/eRcLCgtMPYii/TfvYkFhgakHUbT/5l0sKCww9SCK9t+8iwWFBaYeRNH+m3exoLDA1IMo2n/zLhYUFph6EEX7b97FgsICUw+iaP/Nu1hQWGDqQRTtv3kXCwoLTD2Iov0372JBYYGpB1G0/+ZdLCgsMPUgivbfvIsFhQWmHkTR/pt3sSAZlBfla+uTBvPCgmRQXpSvrU8azAsLkkF5Ub62PmkwLyxIBuVF+dr6pMG8sCAZlBfla+uTBvPCgmRQXpSvrU8azAsLkkF5Ub62PmkwLyxIBuVF+dr6pMG8sCAZlBfla+uTBvPCgmRQXpSvrU8azAsLkkF5Ub62PmkwLyxIBuVF+dr6pMG8sCAZlBfla+uTBvPCgmRQXpSvrU8azAsLkkF5Ub62PmkwLyxIBuVF+dr6pMG8qKA0Uw86pU+aKd9jHmcbhK1PminfYx5nG4StT5op32MeZxuErU+aKd9jHmcbhK1PminfYx5nG4StT5op32MeZxuErU+aKd9jHmcbhK1PminfYx5nG4StT5op32MeZxuErU+aKd9jHmcbhK1PminfYx5nG4StT5op32MeZxuErU+aKd9jHmcbhK1PminfYx5nG4StT5op32MeZxuErU+aKd9jHmcbxOl90mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFpZl60K190mBeVFCaqQfd2icN5kUFlZKgAy1qOtCipgMtajrQoqYDLWo60KKmAy1qOtCipgMtajrQoqYDLWo60KKmAy1qOtCipgMtajrQoqYDLWo60KKmAy1qOtCipgMtajrQoqYDLWp+ARIFD+N3ElIHAAAAAElFTkSuQmCC'

export const getReproImageToolDef = toolDefinition({
name: 'getReproImage',
description:
'Returns an image for the user to inspect. Call this whenever the user asks you to look at, view, or describe the image.',
inputSchema: z.object({}),
})

// Server implementation: returns a multimodal content-part array, NOT a string.
export const getReproImage = getReproImageToolDef.server(
(): Array<ContentPart> => [
{
type: 'text',
content:
'Here is the image you asked to inspect. Read whatever it shows.',
},
{
type: 'image',
source: {
type: 'data',
value: REPRO_IMAGE_BASE64,
mimeType: 'image/png',
},
},
],
)
42 changes: 42 additions & 0 deletions examples/ts-react-chat/src/routeTree.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { Route as rootRouteImport } from './routes/__root'
import { Route as ServerFnChatRouteImport } from './routes/server-fn-chat'
import { Route as RealtimeRouteImport } from './routes/realtime'
import { Route as Issue176ToolResultRouteImport } from './routes/issue-176-tool-result'
import { Route as ImageToolReproRouteImport } from './routes/image-tool-repro'
import { Route as ImageGenRouteImport } from './routes/image-gen'
import { Route as GenerationHooksRouteImport } from './routes/generation-hooks'
import { Route as IndexRouteImport } from './routes/index'
Expand All @@ -29,6 +30,7 @@ import { Route as ApiTanchatRouteImport } from './routes/api.tanchat'
import { Route as ApiSummarizeRouteImport } from './routes/api.summarize'
import { Route as ApiStructuredOutputRouteImport } from './routes/api.structured-output'
import { Route as ApiStructuredChatRouteImport } from './routes/api.structured-chat'
import { Route as ApiImageToolReproRouteImport } from './routes/api.image-tool-repro'
import { Route as ApiImageGenRouteImport } from './routes/api.image-gen'
import { Route as ExampleGuitarsIndexRouteImport } from './routes/example.guitars/index'
import { Route as ExampleGuitarsGuitarIdRouteImport } from './routes/example.guitars/$guitarId'
Expand All @@ -52,6 +54,11 @@ const Issue176ToolResultRoute = Issue176ToolResultRouteImport.update({
path: '/issue-176-tool-result',
getParentRoute: () => rootRouteImport,
} as any)
const ImageToolReproRoute = ImageToolReproRouteImport.update({
id: '/image-tool-repro',
path: '/image-tool-repro',
getParentRoute: () => rootRouteImport,
} as any)
const ImageGenRoute = ImageGenRouteImport.update({
id: '/image-gen',
path: '/image-gen',
Expand Down Expand Up @@ -140,6 +147,11 @@ const ApiStructuredChatRoute = ApiStructuredChatRouteImport.update({
path: '/api/structured-chat',
getParentRoute: () => rootRouteImport,
} as any)
const ApiImageToolReproRoute = ApiImageToolReproRouteImport.update({
id: '/api/image-tool-repro',
path: '/api/image-tool-repro',
getParentRoute: () => rootRouteImport,
} as any)
const ApiImageGenRoute = ApiImageGenRouteImport.update({
id: '/api/image-gen',
path: '/api/image-gen',
Expand Down Expand Up @@ -180,10 +192,12 @@ export interface FileRoutesByFullPath {
'/': typeof IndexRoute
'/generation-hooks': typeof GenerationHooksRoute
'/image-gen': typeof ImageGenRoute
'/image-tool-repro': typeof ImageToolReproRoute
'/issue-176-tool-result': typeof Issue176ToolResultRoute
'/realtime': typeof RealtimeRoute
'/server-fn-chat': typeof ServerFnChatRoute
'/api/image-gen': typeof ApiImageGenRoute
'/api/image-tool-repro': typeof ApiImageToolReproRoute
'/api/structured-chat': typeof ApiStructuredChatRoute
'/api/structured-output': typeof ApiStructuredOutputRoute
'/api/summarize': typeof ApiSummarizeRoute
Expand All @@ -209,10 +223,12 @@ export interface FileRoutesByTo {
'/': typeof IndexRoute
'/generation-hooks': typeof GenerationHooksRoute
'/image-gen': typeof ImageGenRoute
'/image-tool-repro': typeof ImageToolReproRoute
'/issue-176-tool-result': typeof Issue176ToolResultRoute
'/realtime': typeof RealtimeRoute
'/server-fn-chat': typeof ServerFnChatRoute
'/api/image-gen': typeof ApiImageGenRoute
'/api/image-tool-repro': typeof ApiImageToolReproRoute
'/api/structured-chat': typeof ApiStructuredChatRoute
'/api/structured-output': typeof ApiStructuredOutputRoute
'/api/summarize': typeof ApiSummarizeRoute
Expand All @@ -239,10 +255,12 @@ export interface FileRoutesById {
'/': typeof IndexRoute
'/generation-hooks': typeof GenerationHooksRoute
'/image-gen': typeof ImageGenRoute
'/image-tool-repro': typeof ImageToolReproRoute
'/issue-176-tool-result': typeof Issue176ToolResultRoute
'/realtime': typeof RealtimeRoute
'/server-fn-chat': typeof ServerFnChatRoute
'/api/image-gen': typeof ApiImageGenRoute
'/api/image-tool-repro': typeof ApiImageToolReproRoute
'/api/structured-chat': typeof ApiStructuredChatRoute
'/api/structured-output': typeof ApiStructuredOutputRoute
'/api/summarize': typeof ApiSummarizeRoute
Expand Down Expand Up @@ -270,10 +288,12 @@ export interface FileRouteTypes {
| '/'
| '/generation-hooks'
| '/image-gen'
| '/image-tool-repro'
| '/issue-176-tool-result'
| '/realtime'
| '/server-fn-chat'
| '/api/image-gen'
| '/api/image-tool-repro'
| '/api/structured-chat'
| '/api/structured-output'
| '/api/summarize'
Expand All @@ -299,10 +319,12 @@ export interface FileRouteTypes {
| '/'
| '/generation-hooks'
| '/image-gen'
| '/image-tool-repro'
| '/issue-176-tool-result'
| '/realtime'
| '/server-fn-chat'
| '/api/image-gen'
| '/api/image-tool-repro'
| '/api/structured-chat'
| '/api/structured-output'
| '/api/summarize'
Expand All @@ -328,10 +350,12 @@ export interface FileRouteTypes {
| '/'
| '/generation-hooks'
| '/image-gen'
| '/image-tool-repro'
| '/issue-176-tool-result'
| '/realtime'
| '/server-fn-chat'
| '/api/image-gen'
| '/api/image-tool-repro'
| '/api/structured-chat'
| '/api/structured-output'
| '/api/summarize'
Expand All @@ -358,10 +382,12 @@ export interface RootRouteChildren {
IndexRoute: typeof IndexRoute
GenerationHooksRoute: typeof GenerationHooksRoute
ImageGenRoute: typeof ImageGenRoute
ImageToolReproRoute: typeof ImageToolReproRoute
Issue176ToolResultRoute: typeof Issue176ToolResultRoute
RealtimeRoute: typeof RealtimeRoute
ServerFnChatRoute: typeof ServerFnChatRoute
ApiImageGenRoute: typeof ApiImageGenRoute
ApiImageToolReproRoute: typeof ApiImageToolReproRoute
ApiStructuredChatRoute: typeof ApiStructuredChatRoute
ApiStructuredOutputRoute: typeof ApiStructuredOutputRoute
ApiSummarizeRoute: typeof ApiSummarizeRoute
Expand Down Expand Up @@ -407,6 +433,13 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof Issue176ToolResultRouteImport
parentRoute: typeof rootRouteImport
}
'/image-tool-repro': {
id: '/image-tool-repro'
path: '/image-tool-repro'
fullPath: '/image-tool-repro'
preLoaderRoute: typeof ImageToolReproRouteImport
parentRoute: typeof rootRouteImport
}
'/image-gen': {
id: '/image-gen'
path: '/image-gen'
Expand Down Expand Up @@ -526,6 +559,13 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof ApiStructuredChatRouteImport
parentRoute: typeof rootRouteImport
}
'/api/image-tool-repro': {
id: '/api/image-tool-repro'
path: '/api/image-tool-repro'
fullPath: '/api/image-tool-repro'
preLoaderRoute: typeof ApiImageToolReproRouteImport
parentRoute: typeof rootRouteImport
}
'/api/image-gen': {
id: '/api/image-gen'
path: '/api/image-gen'
Expand Down Expand Up @@ -582,10 +622,12 @@ const rootRouteChildren: RootRouteChildren = {
IndexRoute: IndexRoute,
GenerationHooksRoute: GenerationHooksRoute,
ImageGenRoute: ImageGenRoute,
ImageToolReproRoute: ImageToolReproRoute,
Issue176ToolResultRoute: Issue176ToolResultRoute,
RealtimeRoute: RealtimeRoute,
ServerFnChatRoute: ServerFnChatRoute,
ApiImageGenRoute: ApiImageGenRoute,
ApiImageToolReproRoute: ApiImageToolReproRoute,
ApiStructuredChatRoute: ApiStructuredChatRoute,
ApiStructuredOutputRoute: ApiStructuredOutputRoute,
ApiSummarizeRoute: ApiSummarizeRoute,
Expand Down
Loading
Loading