From 21bf43750c9e0a9cd7e69fde2820e3f8bbceca1a Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 12:02:42 +0200
Subject: [PATCH 1/7] docs: fix inaccurate code samples and expand coverage
 across guides

Audited all guide pages against the actual package APIs and fixed
copy-paste-broken / outdated samples and filled coverage gaps.

Middleware & structured outputs:
- New Built-in Middleware page (toolCache, contentGuard, otel) + new
  top-level Middleware nav section; document structured-output chunk
  transforms via onChunk + ctx.phase; fix middleware type import paths.
- Document client consumption (useChat partial/final) on the one-shot page.

Correctness fixes (verified against packages/ source):
- chat: providerOptions -> modelOptions; invalid model ids; budget_tokens
  requires maxTokens; async stream() factory -> fetcher; add missing
  imports; document default maxIterations(5) and agentLoopStrategy.
- tools: toServerSentEventsStream -> toServerSentEventsResponse; remove
  duplicate tools key; clarify tool-call vs tool-result states; fix the
  React examples and state diagram; add emitCustomEvent / runtime-context.
- media: add required model args to factory calls; fix recursive
  generateVideo; TranscriptionResult.words is top-level; speed is a
  top-level speech option; Gemini audio returns b64Json; onResult transform.
- advanced: adapter.model (not selectedModel); GeminiImageMetadata;
  source.mimeType; text.format structured-output shape; fill How It Works;
  createModel capabilities form; soften unsubstantiated bundle figures.
- protocol: rewrite SSE / HTTP-stream pages to the AG-UI event format
  (drop obsolete chunk shapes and [DONE]); use toHttpResponse/toHttpStream;
  expand chunk-definitions with TOOL_CALL_RESULT, MESSAGES_SNAPSHOT,
  REASONING_* and deprecated-alias notes.
- adapters: elevenlabs SFX model + @elevenlabs/client; ollama modelOptions
  placement; cencori AG-UI event/tools alignment.
- fix @tanstack/ai-openai/adapters -> @tanstack/ai-openai (ag-ui-compliance, otel).
---
 docs/adapters/anthropic.md                 |   2 +-
 docs/adapters/elevenlabs.md                |   6 +-
 docs/advanced/built-in-middleware.md       | 226 +++++++++++++++++++++
 docs/advanced/extend-adapter.md            |  26 ++-
 docs/advanced/middleware.md                | 198 +++++++-----------
 docs/advanced/multimodal-content.md        |  25 ++-
 docs/advanced/observability.md             |   2 +-
 docs/advanced/otel.md                      |   4 +-
 docs/advanced/per-model-type-safety.md     |  19 +-
 docs/advanced/runtime-adapter-switching.md |   6 +-
 docs/advanced/tree-shaking.md              |  14 +-
 docs/advanced/typed-options.md             |   4 +-
 docs/chat/agentic-cycle.md                 |  43 +++-
 docs/chat/streaming.md                     |  36 ++--
 docs/chat/thinking-content.md              |  38 ++--
 docs/community-adapters/cencori.md         |  39 ++--
 docs/comparison/vercel-ai-sdk.md           |   6 +-
 docs/config.json                           |  23 ++-
 docs/getting-started/quick-start.md        |  10 +-
 docs/media/audio-generation.md             |   7 +-
 docs/media/generation-hooks.md             |   2 +-
 docs/media/generations.md                  |  21 ++
 docs/media/image-generation.md             |  11 +-
 docs/media/text-to-speech.md               |  10 +-
 docs/media/transcription.md                |  32 ++-
 docs/media/video-generation.md             |  16 +-
 docs/migration/ag-ui-compliance.md         |   6 +-
 docs/protocol/chunk-definitions.md         | 226 ++++++++++++++++++---
 docs/protocol/http-stream-protocol.md      | 139 ++++++-------
 docs/protocol/sse-protocol.md              | 120 ++++++-----
 docs/structured-outputs/one-shot.md        |  71 +++++++
 docs/structured-outputs/overview.md        |   2 +-
 docs/tools/client-tools.md                 |  65 +++---
 docs/tools/server-tools.md                 |  41 ++--
 docs/tools/tool-approval.md                |  43 +++-
 docs/tools/tool-architecture.md            | 118 +++++++----
 docs/tools/tools.md                        |  34 +++-
 37 files changed, 1181 insertions(+), 510 deletions(-)
 create mode 100644 docs/advanced/built-in-middleware.md

diff --git a/docs/adapters/anthropic.md b/docs/adapters/anthropic.md
index 0fe7bdd74..b6923db0b 100644
--- a/docs/adapters/anthropic.md
+++ b/docs/adapters/anthropic.md
@@ -254,7 +254,7 @@ const stream = chat({
 });
 ```
 
-**Supported models:** every current Claude model. `claude-3-haiku` supports
+**Supported models:** every current Claude model. `claude-haiku-3` supports
 only `web_search` (not `web_fetch`). See [Provider Tools](../tools/provider-tools.md#which-models-support-which-tools).
 
 ### `webFetchTool`
diff --git a/docs/adapters/elevenlabs.md b/docs/adapters/elevenlabs.md
index 13d4dab59..ed9e8f605 100644
--- a/docs/adapters/elevenlabs.md
+++ b/docs/adapters/elevenlabs.md
@@ -22,7 +22,7 @@ The ElevenLabs adapter is **voice-focused**. It exposes four capabilities:
 
 It does not support text `chat()` or `summarize()` — use OpenAI, Anthropic, or Gemini for those.
 
-The realtime adapter uses an **agent-based architecture** where you configure your conversational AI agent in the [ElevenLabs dashboard](https://elevenlabs.io/) (voice, personality, knowledge base, tools) and then connect to it at runtime. The adapter wraps the `@11labs/client` SDK for seamless integration with `useRealtimeChat` and `RealtimeClient`.
+The realtime adapter uses an **agent-based architecture** where you configure your conversational AI agent in the [ElevenLabs dashboard](https://elevenlabs.io/) (voice, personality, knowledge base, tools) and then connect to it at runtime. The adapter wraps the `@elevenlabs/client` SDK for seamless integration with `useRealtimeChat` and `RealtimeClient`.
 
 ## Installation
 
@@ -184,7 +184,7 @@ const chat = useRealtimeChat({
 })
 ```
 
-Tool results are automatically serialized to strings and returned to the ElevenLabs agent. The adapter converts TanStack tool definitions into the `@11labs/client` clientTools format internally.
+Tool results are automatically serialized to strings and returned to the ElevenLabs agent. The adapter converts TanStack tool definitions into the `@elevenlabs/client` clientTools format internally.
 
 ## Configuration
 
@@ -293,7 +293,7 @@ const music = await generateAudio({
 
 // Sound effects
 const sfx = await generateAudio({
-  adapter: elevenlabsAudio("sound_effects_v1"),
+  adapter: elevenlabsAudio("eleven_text_to_sound_v2"),
   prompt: "A glass shattering on concrete",
 });
 ```
diff --git a/docs/advanced/built-in-middleware.md b/docs/advanced/built-in-middleware.md
new file mode 100644
index 000000000..9fa5b2ea8
--- /dev/null
+++ b/docs/advanced/built-in-middleware.md
@@ -0,0 +1,226 @@
+---
+title: Built-in Middleware
+id: built-in-middleware
+order: 2
+description: "Ready-made TanStack AI chat() middleware — toolCacheMiddleware for caching tool results, contentGuardMiddleware for redacting streamed text, and otelMiddleware for OpenTelemetry tracing."
+keywords:
+  - tanstack ai
+  - middleware
+  - built-in middleware
+  - tool cache
+  - content guard
+  - redaction
+  - opentelemetry
+---
+
+TanStack AI ships ready-made middleware so you don't have to hand-roll the common cases. Each one is an ordinary [`ChatMiddleware`](./middleware) — drop it into the `middleware` array of any `chat()` call. This page documents every built-in.
+
+| Middleware | Import | What it does |
+|------------|--------|--------------|
+| `toolCacheMiddleware` | `@tanstack/ai/middlewares` | Cache tool-call results by name + arguments |
+| `contentGuardMiddleware` | `@tanstack/ai/middlewares` | Redact / transform / block streamed text content |
+| `otelMiddleware` | `@tanstack/ai/middlewares/otel` | Emit OpenTelemetry spans + GenAI metrics |
+
+> `toolCacheMiddleware` and `contentGuardMiddleware` are exported from the main `@tanstack/ai/middlewares` barrel. `otelMiddleware` lives on its own subpath (`@tanstack/ai/middlewares/otel`) so that importing the barrel never eagerly pulls in `@opentelemetry/api` (an optional peer dependency).
+
+## toolCacheMiddleware
+
+Caches tool call results based on tool name and arguments. When a tool is called with the same name and arguments as a previous call, the cached result is returned immediately without re-executing the tool.
+
+```typescript
+import { chat } from "@tanstack/ai";
+import { toolCacheMiddleware } from "@tanstack/ai/middlewares";
+
+const stream = chat({
+  adapter: openaiText("gpt-4o"),
+  messages,
+  tools: [weatherTool, stockTool],
+  middleware: [
+    toolCacheMiddleware({
+      ttl: 60_000, // Cache entries expire after 60 seconds
+      maxSize: 50, // Keep at most 50 entries (LRU eviction)
+      toolNames: ["getWeather"], // Only cache specific tools
+    }),
+  ],
+});
+```
+
+**Options:**
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `maxSize` | `number` | `100` | Maximum cache entries. Oldest evicted first (LRU). Only applies to the default in-memory storage. |
+| `ttl` | `number` | `Infinity` | Time-to-live in milliseconds. Expired entries are not served. |
+| `toolNames` | `string[]` | All tools | Only cache these tools. Others pass through. |
+| `keyFn` | `(toolName, args) => string` | `JSON.stringify([toolName, args])` | Custom cache key derivation. |
+| `storage` | `ToolCacheStorage` | In-memory Map | Custom storage backend. When provided, `maxSize` is ignored — the storage manages its own capacity. |
+
+**Behaviors:**
+
+- Only successful tool calls are cached — errors are never stored
+- Cache hits trigger `{ type: 'skip', result }` via `onBeforeToolCall`
+- LRU eviction: when `maxSize` is reached, the oldest entry is removed (default storage only)
+- Cache hits refresh the entry's LRU position (moved to most-recently-used)
+
+**Custom key function** — useful when you want to ignore certain arguments:
+
+```typescript
+toolCacheMiddleware({
+  keyFn: (toolName, args) => {
+    // Ignore pagination, cache by query only
+    const { page, ...rest } = args as Record<string, unknown>;
+    return JSON.stringify([toolName, rest]);
+  },
+});
+```
+
+### Custom Storage
+
+By default the cache lives in-memory and is scoped to a single `toolCacheMiddleware()` instance. Pass a `storage` option to use an external backend like Redis, localStorage, or a database. This also enables **sharing a cache across multiple `chat()` calls**.
+
+The storage interface:
+
+```typescript
+import type { ToolCacheStorage, ToolCacheEntry } from "@tanstack/ai/middlewares";
+
+interface ToolCacheStorage {
+  getItem: (key: string) => ToolCacheEntry | undefined | Promise<ToolCacheEntry | undefined>;
+  setItem: (key: string, value: ToolCacheEntry) => void | Promise<void>;
+  deleteItem: (key: string) => void | Promise<void>;
+}
+
+// ToolCacheEntry is { result: unknown, timestamp: number }
+```
+
+All methods may return a `Promise` for async backends. The middleware handles TTL checking — your storage just needs to store and retrieve entries.
+
+**Redis example:**
+
+```typescript
+import { createClient } from "redis";
+import { toolCacheMiddleware, type ToolCacheStorage } from "@tanstack/ai/middlewares";
+
+const redis = createClient();
+
+const redisStorage: ToolCacheStorage = {
+  getItem: async (key) => {
+    const raw = await redis.get(`tool-cache:${key}`);
+    return raw ? JSON.parse(raw) : undefined;
+  },
+  setItem: async (key, value) => {
+    await redis.set(`tool-cache:${key}`, JSON.stringify(value));
+  },
+  deleteItem: async (key) => {
+    await redis.del(`tool-cache:${key}`);
+  },
+};
+
+const stream = chat({
+  adapter,
+  messages,
+  tools: [weatherTool],
+  middleware: [toolCacheMiddleware({ storage: redisStorage, ttl: 60_000 })],
+});
+```
+
+**Sharing a cache across requests:**
+
+```typescript
+// Create storage once, reuse across chat() calls
+const sharedStorage: ToolCacheStorage = {
+  getItem: (key) => globalCache.get(key),
+  setItem: (key, value) => { globalCache.set(key, value); },
+  deleteItem: (key) => { globalCache.delete(key); },
+};
+
+// Both requests share the same cache
+app.post("/api/chat", async (req) => {
+  const stream = chat({
+    adapter,
+    messages: req.body.messages,
+    tools: [weatherTool],
+    middleware: [toolCacheMiddleware({ storage: sharedStorage })],
+  });
+  return toServerSentEventsResponse(stream);
+});
+```
+
+## contentGuardMiddleware
+
+Filters or transforms streamed text content as it flows through `onChunk`. Use it to redact sensitive data (SSNs, emails, API keys), enforce a profanity filter, or rewrite text on the fly. Rules are applied to `TEXT_MESSAGE_CONTENT` chunks; all other chunk types pass through untouched.
+
+```typescript
+import { chat } from "@tanstack/ai";
+import { contentGuardMiddleware } from "@tanstack/ai/middlewares";
+
+const stream = chat({
+  adapter: openaiText("gpt-4o"),
+  messages,
+  middleware: [
+    contentGuardMiddleware({
+      rules: [
+        // Regex + replacement
+        { pattern: /\b\d{3}-\d{2}-\d{4}\b/g, replacement: "[SSN REDACTED]" },
+        // Custom transform function
+        { fn: (text) => text.replaceAll("badword", "****") },
+      ],
+      strategy: "buffered",
+    }),
+  ],
+});
+```
+
+**Options:**
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `rules` | `ContentGuardRule[]` | — | **Required.** Applied in order; each rule receives the previous rule's output. A rule is either `{ pattern: RegExp; replacement: string }` or `{ fn: (text: string) => string }`. |
+| `strategy` | `'delta' \| 'buffered'` | `'buffered'` | How content is matched. See below. |
+| `bufferSize` | `number` | `50` | (Buffered only) Characters held back before emitting, so patterns spanning chunk boundaries still match. Set it ≥ the longest pattern you expect. Flushed at stream end. |
+| `blockOnMatch` | `boolean` | `false` | When `true`, drop the entire chunk if any rule changes the content (instead of emitting the filtered version). |
+| `onFiltered` | `(info: ContentFilteredInfo) => void` | — | Callback fired whenever a rule changes content. Receives `{ messageId, original, filtered, strategy }`. |
+
+**Matching strategies:**
+
+- **`'buffered'` (default)** — Accumulates content and applies rules to the settled portion, holding back a `bufferSize` look-behind window so a pattern split across two chunks (`"...123-45"` then `"-6789..."`) is still caught. The buffer is flushed when the message or run ends. Use this for anything that can span deltas — which is most redaction.
+- **`'delta'`** — Applies rules to each delta in isolation as it arrives. Fastest and lowest-latency, but a pattern split across a chunk boundary may slip through. Use only when your patterns are guaranteed to fit within a single delta.
+
+**Behaviors:**
+
+- Only `TEXT_MESSAGE_CONTENT` chunks are inspected; every other chunk type passes through.
+- A rule that doesn't change the text is a no-op — the chunk passes through unchanged.
+- With `blockOnMatch: true`, a matched chunk is dropped entirely (returns `null` from `onChunk`) rather than emitting the redacted text.
+- The `onFiltered` callback is for observability/audit — it fires with the before/after text but does not alter what is emitted.
+
+## otelMiddleware
+
+Emits vendor-neutral OpenTelemetry traces and metrics for every `chat()` call — a root span per call, a child span per agent-loop iteration, and a grandchild span per tool execution, all tagged with [GenAI semantic-convention attributes](https://opentelemetry.io/docs/specs/semconv/gen-ai/).
+
+```typescript
+import { chat } from "@tanstack/ai";
+import { otelMiddleware } from "@tanstack/ai/middlewares/otel";
+import { trace, metrics } from "@opentelemetry/api";
+
+const otel = otelMiddleware({
+  tracer: trace.getTracer("my-app"),
+  meter: metrics.getMeter("my-app"), // optional — enables GenAI histograms
+});
+
+const result = await chat({
+  adapter: openaiText("gpt-4o"),
+  messages,
+  middleware: [otel],
+});
+```
+
+`otelMiddleware` has its own configuration surface (content capture, redaction, span-name formatting, attribute enrichment, lifecycle callbacks) and requires the optional `@opentelemetry/api` peer dependency. See the dedicated [OpenTelemetry](./otel) guide for full setup, the span/metric catalogue, and all options.
+
+## Writing your own
+
+These built-ins are just `ChatMiddleware` objects — nothing about them is privileged. To build your own, see the [Middleware](./middleware) guide for the full hook reference, the context object, and composition rules.
+
+## Next Steps
+
+- [Middleware](./middleware) — the full lifecycle and hook reference
+- [OpenTelemetry](./otel) — `otelMiddleware` in depth
+- [Observability](./observability) — event-driven observability with the event client
diff --git a/docs/advanced/extend-adapter.md b/docs/advanced/extend-adapter.md
index 145432b21..55c5b4577 100644
--- a/docs/advanced/extend-adapter.md
+++ b/docs/advanced/extend-adapter.md
@@ -66,11 +66,11 @@ const model = createModel(
  
 ## Model Definition Structure
 
-Each custom model definition has three properties:
+A custom model definition (`ExtendedModelDef`) has the required properties `name`, `input`, and `modelOptions`, plus optional `features` and `tools`. The two `createModel` overloads let you fill these in two ways.
 
 ### Defining Input Modalities
 
-The `input` array specifies which content types your model supports:
+The positional form takes a model name and an `input` array specifying which content types your model supports:
 
 ```typescript
 const models = [
@@ -80,6 +80,28 @@ const models = [
 ```
 
 Available modalities: `'text'`, `'image'`, `'audio'`, `'video'`, `'document'`
+
+### Capabilities-object form
+
+To attach typed `modelOptions`, declared `features`, or provider `tools` to a custom model, use the second `createModel` overload, which takes a capabilities object as its second argument:
+
+```typescript
+import { createModel } from '@tanstack/ai'
+import type { OpenAITextProviderOptions } from '@tanstack/ai-openai'
+
+const reasoner = createModel('my-reasoner', {
+  input: ['text'],
+  features: ['reasoning', 'structured_outputs'],
+  tools: ['web_search'],
+  // Type brand for provider options — use `{} as YourOptionsType`
+  modelOptions: {} as OpenAITextProviderOptions,
+})
+```
+
+- `input` — supported input modalities (same as the positional form).
+- `features` — declared feature flags (e.g. `'reasoning'`, `'structured_outputs'`).
+- `tools` — declared provider tools (e.g. `'web_search'`).
+- `modelOptions` — a type brand for the provider options accepted by this model; the value is unused at runtime, so pass `{} as YourOptionsType`.
  
 ## Preserving Original Factory Behavior
 
diff --git a/docs/advanced/middleware.md b/docs/advanced/middleware.md
index 3200f6cba..9dd5ef0cc 100644
--- a/docs/advanced/middleware.md
+++ b/docs/advanced/middleware.md
@@ -244,6 +244,58 @@ const redactor: ChatMiddleware = {
 
 When multiple middleware define `onChunk`, chunks flow through them in order. If one middleware drops a chunk (returns `null`), subsequent middleware never see it.
 
+#### Chunk types you'll see
+
+`onChunk` receives every [AG-UI event](../protocol/chunk-definitions) the run produces — not just text. Narrow on `chunk.type` (a discriminated union) before reading type-specific fields. The common ones:
+
+| `chunk.type` | Meaning | Key fields |
+|--------------|---------|-----------|
+| `RUN_STARTED` / `RUN_FINISHED` / `RUN_ERROR` | Run lifecycle boundaries | `runId`, `finishReason`, `usage` (on finish), `message` (on error) |
+| `TEXT_MESSAGE_START` / `TEXT_MESSAGE_CONTENT` / `TEXT_MESSAGE_END` | Assistant text streaming | `messageId`, `delta` (content) |
+| `TOOL_CALL_START` / `TOOL_CALL_ARGS` / `TOOL_CALL_END` | Tool invocation streaming | `toolCallId`, `toolCallName`, `delta` (args), result on end |
+| `STEP_STARTED` / `STEP_FINISHED` | Thinking / reasoning steps | `delta`, `signature` |
+| `STATE_SNAPSHOT` / `STATE_DELTA` | Agent state sync | `snapshot`, `delta` |
+| `CUSTOM` | Extensibility events (incl. structured-output — see below) | `name`, `value` |
+
+See [AG-UI Event Definitions](../protocol/chunk-definitions) for the full catalogue and exact field shapes.
+
+#### Transforming structured-output chunks
+
+There is **no separate `onStructuredOutputChunk` hook** — and you don't need one. When `chat()` is invoked with `outputSchema`, the structured-output chunks (the JSON `TEXT_MESSAGE_CONTENT` deltas, plus the `structured-output.start` / `structured-output.complete` CUSTOM events and any finalization `RUN_ERROR`) flow through the **same `onChunk` hook** as everything else. You transform, expand, or drop them exactly like any other chunk.
+
+How you distinguish them depends on which finalization path the adapter takes:
+
+- **Separate-finalization adapters** (the legacy path — adapters that don't declare `supportsCombinedToolsAndSchema()`): `ctx.phase === 'structuredOutput'` during the finalization call. Discriminate on the phase.
+- **Native-combined adapters** (modern OpenAI Chat Completions / Responses, Claude 4.5+, Gemini 3.x, Grok 4.x — see issue #605): the schema-constrained JSON is produced on the model's natural final turn, so **`ctx.phase` stays `'modelStream'`** — the `'structuredOutput'` phase never fires. Discriminate on the CUSTOM event name (`structured-output.start` / `structured-output.complete`) instead.
+
+```typescript
+const structuredOutputObserver: ChatMiddleware = {
+  name: "structured-output-observer",
+  onChunk: (ctx, chunk) => {
+    // Separate-finalization path: the raw JSON streams as TEXT_MESSAGE_CONTENT
+    // during the 'structuredOutput' phase. Transform it like any text delta.
+    if (
+      ctx.phase === "structuredOutput" &&
+      chunk.type === "TEXT_MESSAGE_CONTENT"
+    ) {
+      return { ...chunk, delta: redact(chunk.delta) };
+    }
+
+    // Both paths: the final parsed object arrives as a CUSTOM event. On the
+    // native-combined path this is your only signal (ctx.phase never flips
+    // to 'structuredOutput'), so key off the event name, not the phase.
+    if (chunk.type === "CUSTOM" && chunk.name === "structured-output.complete") {
+      // chunk.value carries { object, raw } — observe, log, or replace it
+      console.log("structured output:", chunk.value);
+    }
+
+    // Return void to pass everything else through unchanged.
+  },
+};
+```
+
+> Why is there `onStructuredOutputConfig` but no `onStructuredOutputChunk`? Because the **config** shape genuinely differs at the structured-output boundary — it carries an `outputSchema` field that plain `ChatMiddlewareConfig` doesn't (see [onStructuredOutputConfig](#onstructuredoutputconfig)). **Chunks** are all just `StreamChunk` regardless of phase, so one `onChunk` plus `ctx.phase` (or the CUSTOM event name) covers every case — a parallel chunk hook would be redundant.
+
 ### onBeforeToolCall
 
 Called before each tool executes. The first middleware that returns a non-void decision short-circuits — remaining middleware are skipped for that tool call.
@@ -512,127 +564,15 @@ const stream = chat({
 
 ## Built-in Middleware
 
-### toolCacheMiddleware
-
-Caches tool call results based on tool name and arguments. When a tool is called with the same name and arguments as a previous call, the cached result is returned immediately without re-executing the tool.
-
-```typescript
-import { chat } from "@tanstack/ai";
-import { toolCacheMiddleware } from "@tanstack/ai/middlewares";
-
-const stream = chat({
-  adapter: openaiText("gpt-4o"),
-  messages,
-  tools: [weatherTool, stockTool],
-  middleware: [
-    toolCacheMiddleware({
-      ttl: 60_000, // Cache entries expire after 60 seconds
-      maxSize: 50, // Keep at most 50 entries (LRU eviction)
-      toolNames: ["getWeather"], // Only cache specific tools
-    }),
-  ],
-});
-```
-
-**Options:**
-
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `maxSize` | `number` | `100` | Maximum cache entries. Oldest evicted first (LRU). Only applies to the default in-memory storage. |
-| `ttl` | `number` | `Infinity` | Time-to-live in milliseconds. Expired entries are not served. |
-| `toolNames` | `string[]` | All tools | Only cache these tools. Others pass through. |
-| `keyFn` | `(toolName, args) => string` | `JSON.stringify([toolName, args])` | Custom cache key derivation. |
-| `storage` | `ToolCacheStorage` | In-memory Map | Custom storage backend. When provided, `maxSize` is ignored — the storage manages its own capacity. |
-
-**Behaviors:**
-
-- Only successful tool calls are cached — errors are never stored
-- Cache hits trigger `{ type: 'skip', result }` via `onBeforeToolCall`
-- LRU eviction: when `maxSize` is reached, the oldest entry is removed (default storage only)
-- Cache hits refresh the entry's LRU position (moved to most-recently-used)
-
-**Custom key function** — useful when you want to ignore certain arguments:
-
-```typescript
-toolCacheMiddleware({
-  keyFn: (toolName, args) => {
-    // Ignore pagination, cache by query only
-    const { page, ...rest } = args as Record<string, unknown>;
-    return JSON.stringify([toolName, rest]);
-  },
-});
-```
-
-#### Custom Storage
+TanStack AI ships ready-made middleware for common cases — caching tool results, redacting streamed text, and OpenTelemetry tracing:
 
-By default the cache lives in-memory and is scoped to a single `toolCacheMiddleware()` instance. Pass a `storage` option to use an external backend like Redis, localStorage, or a database. This also enables **sharing a cache across multiple `chat()` calls**.
+| Middleware | Import | What it does |
+|------------|--------|--------------|
+| `toolCacheMiddleware` | `@tanstack/ai/middlewares` | Cache tool-call results by name + arguments |
+| `contentGuardMiddleware` | `@tanstack/ai/middlewares` | Redact / transform / block streamed text content |
+| `otelMiddleware` | `@tanstack/ai/middlewares/otel` | Emit OpenTelemetry spans + GenAI metrics |
 
-The storage interface:
-
-```typescript
-import type { ToolCacheStorage, ToolCacheEntry } from "@tanstack/ai/middlewares";
-
-interface ToolCacheStorage {
-  getItem: (key: string) => ToolCacheEntry | undefined | Promise<ToolCacheEntry | undefined>;
-  setItem: (key: string, value: ToolCacheEntry) => void | Promise<void>;
-  deleteItem: (key: string) => void | Promise<void>;
-}
-
-// ToolCacheEntry is { result: unknown, timestamp: number }
-```
-
-All methods may return a `Promise` for async backends. The middleware handles TTL checking — your storage just needs to store and retrieve entries.
-
-**Redis example:**
-
-```typescript
-import { createClient } from "redis";
-import { toolCacheMiddleware, type ToolCacheStorage } from "@tanstack/ai/middlewares";
-
-const redis = createClient();
-
-const redisStorage: ToolCacheStorage = {
-  getItem: async (key) => {
-    const raw = await redis.get(`tool-cache:${key}`);
-    return raw ? JSON.parse(raw) : undefined;
-  },
-  setItem: async (key, value) => {
-    await redis.set(`tool-cache:${key}`, JSON.stringify(value));
-  },
-  deleteItem: async (key) => {
-    await redis.del(`tool-cache:${key}`);
-  },
-};
-
-const stream = chat({
-  adapter,
-  messages,
-  tools: [weatherTool],
-  middleware: [toolCacheMiddleware({ storage: redisStorage, ttl: 60_000 })],
-});
-```
-
-**Sharing a cache across requests:**
-
-```typescript
-// Create storage once, reuse across chat() calls
-const sharedStorage: ToolCacheStorage = {
-  getItem: (key) => globalCache.get(key),
-  setItem: (key, value) => { globalCache.set(key, value); },
-  deleteItem: (key) => { globalCache.delete(key); },
-};
-
-// Both requests share the same cache
-app.post("/api/chat", async (req) => {
-  const stream = chat({
-    adapter,
-    messages: req.body.messages,
-    tools: [weatherTool],
-    middleware: [toolCacheMiddleware({ storage: sharedStorage })],
-  });
-  return toServerSentEventsResponse(stream);
-});
-```
+See [Built-in Middleware](./built-in-middleware) for full options and examples for each. The recipes below show how to build your own.
 
 ## Recipes
 
@@ -758,7 +698,7 @@ const errorRecovery: ChatMiddleware = {
 
 ## TypeScript Types
 
-All middleware types are exported from `@tanstack/ai`:
+The core middleware types are exported from `@tanstack/ai`:
 
 ```typescript
 import type {
@@ -770,19 +710,33 @@ import type {
   ToolCallHookContext,
   BeforeToolCallDecision,
   AfterToolCallInfo,
+  IterationInfo,
+  ToolPhaseCompleteInfo,
   UsageInfo,
   FinishInfo,
   AbortInfo,
   ErrorInfo,
+} from "@tanstack/ai";
+```
+
+The option/type surfaces for the [built-in middleware](./built-in-middleware) are exported from the `@tanstack/ai/middlewares` subpath (not the main barrel):
+
+```typescript
+import type {
   ToolCacheMiddlewareOptions,
   ToolCacheStorage,
   ToolCacheEntry,
-} from "@tanstack/ai";
+  ContentGuardMiddlewareOptions,
+  ContentGuardRule,
+  ContentFilteredInfo,
+} from "@tanstack/ai/middlewares";
 ```
 
 ## Next Steps
 
+- [Built-in Middleware](./built-in-middleware) — `toolCacheMiddleware`, `contentGuardMiddleware`, `otelMiddleware`
+- [OpenTelemetry](./otel) — emit traces and metrics via `otelMiddleware`
+- [Observability](./observability) — Event-driven observability with the event client
 - [Tools](../tools/tools) — Learn about the isomorphic tool system
 - [Agentic Cycle](../chat/agentic-cycle) — Understand the multi-step agent loop
-- [Observability](./observability) — Event-driven observability with the event client
 - [Streaming](../chat/streaming) — How streaming works in TanStack AI
diff --git a/docs/advanced/multimodal-content.md b/docs/advanced/multimodal-content.md
index 86d561fdb..a712151e1 100644
--- a/docs/advanced/multimodal-content.md
+++ b/docs/advanced/multimodal-content.md
@@ -99,7 +99,7 @@ OpenAI supports images and audio in their vision and audio models:
 ```typescript
 import { openaiText } from '@tanstack/ai-openai'
 
-const adapter = openaiText()
+const adapter = openaiText('gpt-5.2')
 
 // Image with detail level metadata
 const message = {
@@ -117,7 +117,7 @@ const message = {
 
 **Supported modalities by model:**
 - `gpt-5.2`, `gpt-5-mini`: text, image
-- `gpt-5.2-audio-preview`: text, image, audio
+- `gpt-4o-audio`: text, audio
 
 ### Anthropic
 
@@ -126,7 +126,7 @@ Anthropic's Claude models support images and PDF documents:
 ```typescript
 import { anthropicText } from '@tanstack/ai-anthropic'
 
-const adapter = anthropicText()
+const adapter = anthropicText('claude-sonnet-4-5')
 
 // Image with mimeType in source
 const imageMessage = {
@@ -164,7 +164,7 @@ Google's Gemini models support a wide range of modalities:
 ```typescript
 import { geminiText } from '@tanstack/ai-gemini'
 
-const adapter = geminiText()
+const adapter = geminiText('gemini-2.0-flash')
 
 // Image with mimeType in source
 const message = {
@@ -180,8 +180,7 @@ const message = {
 ```
 
 **Supported modalities:**
-- `gemini-1.5-pro`, `gemini-1.5-flash`: text, image, audio, video, document
-- `gemini-2.0-flash`: text, image, audio, video, document
+- `gemini-2.0-flash`: text, image, audio, video
 
 ### Ollama
 
@@ -190,7 +189,9 @@ Ollama supports images in compatible models:
 ```typescript
 import { ollamaText } from '@tanstack/ai-ollama'
 
-const adapter = ollamaText('http://localhost:11434')
+// `ollamaText(model)` takes a model name. The host is read from the
+// `OLLAMA_HOST` environment variable (defaults to http://localhost:11434).
+const adapter = ollamaText('llama3.2-vision')
 
 // Image as base64
 const message = {
@@ -290,7 +291,7 @@ import type {
 // Provider-specific metadata types
 import type { OpenAIImageMetadata } from '@tanstack/ai-openai'
 import type { AnthropicImageMetadata } from '@tanstack/ai-anthropic'
-import type { GeminiMediaMetadata } from '@tanstack/ai-gemini'
+import type { GeminiImageMetadata } from '@tanstack/ai-gemini'
 ```
 
 ### Validating Dynamic Messages
@@ -311,7 +312,10 @@ const ContentPartSchema = z.discriminatedUnion('type', [
 ])
 
 const MessageSchema = z.object({
-  role: z.enum(['user', 'assistant', 'system']),
+  // `ModelMessage.role` is 'user' | 'assistant' | 'tool' — there is no
+  // 'system' role. System instructions are passed separately via the
+  // `systemPrompts` option on `chat()`, not as messages.
+  role: z.enum(['user', 'assistant', 'tool']),
   content: z.union([z.string(), z.array(ContentPartSchema)]),
 })
 
@@ -477,8 +481,7 @@ function ChatWithFileUpload() {
         { type: 'text', content: `Please analyze this ${type}` },
         {
           type,
-          source: { type: 'data', value: base64 },
-          metadata: { mimeType: file.type }
+          source: { type: 'data', value: base64, mimeType: file.type }
         }
       ]
     })
diff --git a/docs/advanced/observability.md b/docs/advanced/observability.md
index a9f9b0d54..8eb993f87 100644
--- a/docs/advanced/observability.md
+++ b/docs/advanced/observability.md
@@ -1,7 +1,7 @@
 ---
 title: Observability
 id: observability
-order: 2
+order: 4
 description: "Subscribe to TanStack AI events for observability and debugging — tool calls, streaming chunks, usage, and errors via the type-safe event client."
 keywords:
   - tanstack ai
diff --git a/docs/advanced/otel.md b/docs/advanced/otel.md
index a8c14673b..c6bf683e0 100644
--- a/docs/advanced/otel.md
+++ b/docs/advanced/otel.md
@@ -1,7 +1,7 @@
 ---
 title: OpenTelemetry
 id: otel
-order: 4
+order: 3
 description: "Emit vendor-neutral OpenTelemetry traces and metrics from every TanStack AI chat() call, following the OTel GenAI semantic conventions."
 keywords:
   - tanstack ai
@@ -29,7 +29,7 @@ Wire up your OTel SDK however you already do (e.g. `@opentelemetry/sdk-node`). T
 ```ts
 import { chat } from '@tanstack/ai'
 import { otelMiddleware } from '@tanstack/ai/middlewares/otel'
-import { openaiText } from '@tanstack/ai-openai/adapters'
+import { openaiText } from '@tanstack/ai-openai'
 import { trace, metrics } from '@opentelemetry/api'
 
 const otel = otelMiddleware({
diff --git a/docs/advanced/per-model-type-safety.md b/docs/advanced/per-model-type-safety.md
index 27468ac38..a7ed8e2d3 100644
--- a/docs/advanced/per-model-type-safety.md
+++ b/docs/advanced/per-model-type-safety.md
@@ -15,8 +15,16 @@ keywords:
 
 The AI SDK provides **model-specific type safety** for `modelOptions`. Each model's capabilities determine which model options are allowed, and TypeScript will enforce this at compile time.
 
+> **Tip:** For structured outputs, most users should prefer the first-class `chat({ outputSchema })` option over the raw provider `text` option shown below — it works across providers and validates the result for you. The raw `text` option is for when you need provider-specific control.
+
 ## How It Works
- 
+
+Each adapter factory captures the model literal as a type parameter — `openaiText<TModel>(model)` — so the adapter carries the exact model you selected at the type level.
+
+The `modelOptions` you pass are then resolved against a per-model map (`ResolveProviderOptions<TModel>`). Each model's entry declares only the options that model actually supports. A model without a structured-output capability simply has no `text` property in its resolved options type, so TypeScript's excess-property checking rejects `text` for that model — at compile time, with zero runtime cost.
+
+This is the same mechanism described in [Typed Pre-Configured Options](./typed-options) (which captures these resolved options in a reusable object) and [Extend Adapter](./extend-adapter) (which lets you attach the same typed `modelOptions` to custom models).
+
 ## Usage Examples
 
 ### ✅ Correct Usage
@@ -32,9 +40,12 @@ const validCall = chat({
   modelOptions: {
     // OK - text is included for gpt-5
     text: {
-      type: "json_schema",
-      json_schema: {
-        /* ... */
+      format: {
+        type: "json_schema",
+        name: "my_schema",
+        schema: {
+          /* JSON Schema object */
+        },
       },
     },
   },
diff --git a/docs/advanced/runtime-adapter-switching.md b/docs/advanced/runtime-adapter-switching.md
index 96f4cafcf..ace15588c 100644
--- a/docs/advanced/runtime-adapter-switching.md
+++ b/docs/advanced/runtime-adapter-switching.md
@@ -53,10 +53,10 @@ const adapter1 = openaiText('gpt-5.2')
 const adapter2 = new OpenAITextAdapter({ apiKey: process.env.OPENAI_API_KEY }, 'gpt-5.2')
 
 // The model is stored on the adapter
-console.log(adapter1.selectedModel) // 'gpt-5.2'
+console.log(adapter1.model) // 'gpt-5.2'
 ```
 
-When you pass an adapter to `chat()`, it uses the model from `adapter.selectedModel`. This means:
+When you pass an adapter to `chat()`, it uses the model from `adapter.model`. This means:
 
 - **Full autocomplete** - When typing the model name, TypeScript knows valid options
 - **Type validation** - Invalid model names cause compile errors
@@ -79,7 +79,7 @@ type Provider = 'openai' | 'anthropic' | 'gemini' | 'ollama'
 // Define adapters with their models
 const adapters = {
   anthropic: () => anthropicText('claude-sonnet-4-5'),
-  gemini: () => geminiText('gemini-2.0-flash-exp'),
+  gemini: () => geminiText('gemini-2.0-flash'),
   ollama: () => ollamaText('mistral:7b'),
   openai: () => openaiText('gpt-5.2'),
 }
diff --git a/docs/advanced/tree-shaking.md b/docs/advanced/tree-shaking.md
index 9f34060cc..e8107bdcb 100644
--- a/docs/advanced/tree-shaking.md
+++ b/docs/advanced/tree-shaking.md
@@ -170,7 +170,7 @@ The tree-shakeable design doesn't sacrifice type safety. Each adapter provides f
 ```ts
 import { openaiText, type OpenAIChatModel } from '@tanstack/ai-openai'
 
-const adapter = openaiText()
+const adapter = openaiText('gpt-5.2')
 
 // TypeScript knows the exact models supported
 const model: OpenAIChatModel = 'gpt-5.2' // ✓ Valid
@@ -186,6 +186,7 @@ import {
   createChatOptions,
   createImageOptions
 } from '@tanstack/ai'
+import { openaiText } from '@tanstack/ai-openai'
 
 // Only import what you need
 const chatOptions = createChatOptions({
@@ -222,12 +223,7 @@ import { openaiText } from '@tanstack/ai-openai'
 
 ### Real-World Impact
 
-For a typical chat application:
-
-- **Monolithic approach**: ~200KB+ (all activities + all adapters)
-- **Tree-shakeable approach**: ~50KB (only chat + one adapter)
-
-That's a **75% reduction** in bundle size for most applications!
+For a typical chat application, importing a single activity and one adapter pulls in substantially less code than bundling every activity and every provider adapter. Because each activity and adapter lives in its own side-effect-free module, your bundler drops everything you don't reference — so the more providers and activities the library supports, the larger the difference between a focused import and a namespace import.
 
 ## How It Works
 
@@ -282,10 +278,10 @@ Each adapter type implements a specific interface:
 All adapters have a `kind` property that indicates their type:
 
 ```ts
-const chatAdapter = openaiText()
+const chatAdapter = openaiText('gpt-5.2')
 console.log(chatAdapter.kind) // 'text'
 
-const summarizeAdapter = openaiSummarize()
+const summarizeAdapter = openaiSummarize('gpt-5-mini')
 console.log(summarizeAdapter.kind) // 'summarize'
 ```
 
diff --git a/docs/advanced/typed-options.md b/docs/advanced/typed-options.md
index c9c4392d0..000187dd7 100644
--- a/docs/advanced/typed-options.md
+++ b/docs/advanced/typed-options.md
@@ -42,7 +42,7 @@ const chatOptions = createChatOptions({
 const stream = chat({ ...chatOptions, messages })
 ```
 
-Without the helper you'd have to either inline the configuration at every call site, or type the object yourself with `TextActivityOptions<...>` and resolve the generics manually — `createChatOptions` does that for you.
+Without the helper you'd have to either inline the configuration at every call site, or hand-write the full chat options type with its adapter/model generics resolved manually — `createChatOptions` does that for you.
 
 ## When to reach for it
 
@@ -153,7 +153,7 @@ The same pattern works for `createVideoOptions`, `createSpeechOptions`, `createT
 ## What the helper does NOT do
 
 - **No runtime behavior.** `createChatOptions(opts)` is `opts`. There is no validation, freezing, cloning, or memoization. If you mutate the returned object after creation, the next call sees the mutation. Treat the result as immutable by convention.
-- **No partial typing.** The helper expects the full options shape it'll be spread into. If you need to build options up incrementally, type the intermediate state yourself (`Partial<TextActivityOptions<...>>`) and only call the helper at the boundary where the shape is complete.
+- **No partial typing.** The helper expects the full options shape it'll be spread into. If you need to build options up incrementally, type the intermediate state yourself (a `Partial<>` of the full chat options shape) and only call the helper at the boundary where the shape is complete.
 - **No request execution.** The helper does not call the model. Only the activity function (`chat`, `generateImage`, …) makes the request.
 
 ## Related
diff --git a/docs/chat/agentic-cycle.md b/docs/chat/agentic-cycle.md
index bdabba19f..43c42c977 100644
--- a/docs/chat/agentic-cycle.md
+++ b/docs/chat/agentic-cycle.md
@@ -86,7 +86,7 @@ Here's a real-world example of the agentic cycle:
 - Tool returns: `[{id: "F1", price: 450}, {id: "F2", price: 480}]`
 
 **Cycle 2**: LLM analyzes results and calls `bookFlight({flightId: "F1"})`
-- Tool requires approval (sensitive operation)
+- Tool requires approval (sensitive operation) — see [Tool Approval](../tools/tool-approval)
 - User approves
 - Tool returns: `{bookingId: "B123", confirmed: true}`
 
@@ -96,6 +96,10 @@ Here's a real-world example of the agentic cycle:
 ### Code Example: Agentic Weather Assistant
 
 ```typescript
+import { chat, toolDefinition, toServerSentEventsResponse } from "@tanstack/ai";
+import { openaiText } from "@tanstack/ai-openai";
+import { z } from "zod";
+
 // Tool definitions
 const getWeatherDef = toolDefinition({
   name: "get_weather",
@@ -148,3 +152,40 @@ export async function POST(request: Request) {
 1. LLM calls `get_weather({city: "San Francisco"})` → Returns `{temp: 62, conditions: "cloudy"}`
 2. LLM calls `get_clothing_advice({temperature: 62, conditions: "cloudy"})` → Returns `{recommendation: "Light jacket recommended"}`
 3. LLM generates: "The weather in San Francisco is 62°F and cloudy. I recommend wearing a light jacket."
+
+The loop continues only while the model's finish reason is `tool_calls` (with pending tool calls) **and** the agent loop strategy permits another iteration; it ends as soon as the model returns a normal `stop` finish reason.
+
+### Controlling the loop
+
+By default the loop is bounded by `maxIterations(5)` — after five iterations it stops even if the model would keep calling tools. Override this with the `agentLoopStrategy` option:
+
+```typescript
+import { chat } from "@tanstack/ai";
+import { maxIterations } from "@tanstack/ai";
+
+const stream = chat({
+  adapter: openaiText("gpt-5.2"),
+  messages,
+  tools: [getWeather, getClothingAdvice],
+  agentLoopStrategy: maxIterations(3), // default is 5
+});
+```
+
+Other built-in strategies:
+
+- **`untilFinishReason([...])`** — continue until the model returns one of the given finish reasons (e.g. `untilFinishReason(["stop", "length"])`).
+- **`combineStrategies([...])`** — combine multiple strategies with AND logic; the loop continues only while every strategy agrees.
+
+A strategy is just a function that receives `{ iterationCount, finishReason, messages }` and returns `true` to allow another iteration or `false` to stop, so you can also write your own:
+
+```typescript
+const stream = chat({
+  adapter: openaiText("gpt-5.2"),
+  messages,
+  tools: [getWeather, getClothingAdvice],
+  agentLoopStrategy: combineStrategies([
+    maxIterations(10),
+    ({ messages }) => messages.length < 100,
+  ]),
+});
+```
diff --git a/docs/chat/streaming.md b/docs/chat/streaming.md
index a11bd2ca2..1cd54178e 100644
--- a/docs/chat/streaming.md
+++ b/docs/chat/streaming.md
@@ -89,18 +89,17 @@ TanStack AI implements the [AG-UI Protocol](https://docs.ag-ui.com/introduction)
 
 ### Thinking Chunks
 
-Thinking/reasoning is represented by AG-UI events `STEP_STARTED` and `STEP_FINISHED`. They stream separately from the final response text:
+Adapters emit reasoning as both the canonical `REASONING_MESSAGE_*` events and the older `STEP_STARTED` / `STEP_FINISHED` events. Rather than parsing those raw events yourself, read the reconciled `ThinkingPart` from `message.parts` — the stream processor merges both event families into a single part for you:
 
 ```typescript
-for await (const chunk of stream) {
-  if (chunk.type === "STEP_FINISHED") {
-    console.log("Thinking:", chunk.content); // Accumulated thinking content
-    console.log("Delta:", chunk.delta); // Incremental thinking token
+for (const part of message.parts) {
+  if (part.type === "thinking") {
+    console.log("Thinking:", part.content); // Accumulated thinking content
   }
 }
 ```
 
-Thinking content is automatically converted to `ThinkingPart` in `UIMessage` objects. It is UI-only and excluded from messages sent back to the model.
+Thinking content is automatically converted to `ThinkingPart` in `UIMessage` objects. It is UI-only and excluded from messages sent back to the model. See [Thinking & Reasoning](./thinking-content) for the full rendering pattern.
 
 ## Connection Adapters
 
@@ -128,23 +127,21 @@ const { messages } = useChat({
 
 ### Custom Stream
 
-```typescript
-import { stream } from "@tanstack/ai-react";
+For a fully custom request, use the `fetcher` transport. The fetcher receives the request input plus an `AbortSignal`, and returns a `Response` (whose SSE body the client parses) or an `AsyncIterable<StreamChunk>`. It may return that value synchronously, as a `Promise`, or as an `async function*`:
 
+```typescript
 const { messages } = useChat({
-  connection: stream(async (messages, data, signal) => {
-    // Custom streaming implementation
-    const response = await fetch("/api/chat", {
+  fetcher: ({ messages, data }, { signal }) =>
+    fetch("/api/chat", {
       method: "POST",
       body: JSON.stringify({ messages, ...data }),
       signal,
-    });
-    // Return async iterable
-    return processStream(response);
-  }),
+    }),
 });
 ```
 
+> **Note:** The lower-level `stream()` connection adapter takes a factory that must return an `AsyncIterable<StreamChunk>` **synchronously** (e.g. a generator) — it does not accept an `async (...) => {...}` function that returns a `Promise`. Prefer the `fetcher` transport above unless you specifically need the connection adapter.
+
 ## Monitoring Stream Progress
 
 You can monitor stream progress with callbacks:
@@ -174,6 +171,15 @@ const { stop } = useChat({
 stop();
 ```
 
+Calling `stop()` aborts the underlying fetch; the resulting `AbortError` is expected and normal. This differs from a connection being cut mid-line: a truncated stream throws a `StreamTruncatedError` and moves the client into its `error` state. See [Connection Adapters](./connection-adapters) for the underlying behavior.
+
+On the server, pass an `AbortController` to `toServerSentEventsResponse(stream, { abortController })` so the chat run is cancelled when the client disconnects:
+
+```typescript
+const abortController = new AbortController();
+return toServerSentEventsResponse(stream, { abortController });
+```
+
 ## Best Practices
 
 1. **Handle loading states** - Use `isLoading` to show loading indicators
diff --git a/docs/chat/thinking-content.md b/docs/chat/thinking-content.md
index 831f2ce4e..de39e2a6e 100644
--- a/docs/chat/thinking-content.md
+++ b/docs/chat/thinking-content.md
@@ -20,16 +20,18 @@ Thinking content is **UI-only**. It is never sent back to the model in subsequen
 
 ## How It Works
 
-When a model emits reasoning tokens, the adapter converts them into AG-UI `STEP_STARTED` and `STEP_FINISHED` events. The stream processor accumulates these into a single `ThinkingPart` on the assistant's `UIMessage`:
+When a model emits reasoning tokens, the adapter emits AG-UI events for them. Adapters emit `REASONING_MESSAGE_*` events (the preferred, canonical form) **and** the older `STEP_STARTED` / `STEP_FINISHED` events. The stream processor reconciles both into a single `ThinkingPart` on the assistant's `UIMessage`, deduplicating overlapping content. You should rely on the `ThinkingPart` in `message.parts` rather than hand-parsing the raw events:
 
 ```typescript
 interface ThinkingPart {
   type: "thinking";
   content: string;
+  stepId?: string;
+  signature?: string;
 }
 ```
 
-The `ThinkingPart` appears in `UIMessage.parts` alongside `TextPart` and `ToolCallPart` entries. Each `STEP_FINISHED` event carries an incremental `delta` and the full accumulated `content`, so you always have both the latest token and the complete thinking so far.
+The `ThinkingPart` appears in `UIMessage.parts` alongside `TextPart` and `ToolCallPart` entries. As reasoning tokens arrive, its `content` accumulates token by token.
 
 ## Enabling Thinking
 
@@ -37,34 +39,38 @@ How you enable thinking depends on the provider.
 
 ### Anthropic (Extended Thinking)
 
-Pass the `thinking` option in `providerOptions`. You must specify `budget_tokens` (minimum 1024):
+Pass the `thinking` option in `modelOptions`. You must specify `budget_tokens` (minimum 1024). Validation also requires `budget_tokens` to be **less than** `maxTokens`, so set `maxTokens` high enough to leave room for the visible response:
 
 ```typescript
 import { chat } from "@tanstack/ai";
 import { anthropicText } from "@tanstack/ai-anthropic";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-20250514"),
+  adapter: anthropicText("claude-sonnet-4-5"),
   messages,
-  providerOptions: {
+  maxTokens: 32000,
+  modelOptions: {
+    // budget_tokens must satisfy 1024 <= budget_tokens < maxTokens
     thinking: { type: "enabled", budget_tokens: 10000 },
   },
 });
 ```
 
-For Claude Opus 4.6 and later, you can use adaptive thinking, where the model decides how much to think:
+For Claude Opus 4.6 and later, you can use adaptive thinking, where the model decides how much to think. On these models you pair `thinking: { type: "adaptive" }` with a top-level `effort`:
 
 ```typescript
 const stream = chat({
-  adapter: anthropicText("claude-opus-4-6-20250514"),
+  adapter: anthropicText("claude-opus-4-6"),
   messages,
-  providerOptions: {
+  modelOptions: {
     thinking: { type: "adaptive" },
     effort: "high", // 'max' | 'high' | 'medium' | 'low'
   },
 });
 ```
 
+> **Claude 4.7+:** Adaptive effort moved under `modelOptions: { output_config: { effort: "high" } }`, and `thinking: { type: "enabled", budget_tokens }` is deprecated in favor of `thinking: { type: "adaptive" }` there. The top-level `effort` field shown above is the Opus 4.6 form; earlier models continue to accept the legacy top-level `effort` / `thinking.type: "enabled"` shape.
+
 ### OpenAI (Reasoning Models)
 
 OpenAI o-series models (o1, o3, o3-mini, o3-pro) perform reasoning automatically. You can control the depth with the `reasoning` option:
@@ -76,9 +82,9 @@ import { openaiText } from "@tanstack/ai-openai";
 const stream = chat({
   adapter: openaiText("o3-mini"),
   messages,
-  providerOptions: {
+  modelOptions: {
     reasoning: {
-      effort: "medium", // 'low' | 'medium' | 'high'
+      effort: "medium", // 'none' | 'minimal' | 'low' | 'medium' | 'high'
       summary: "auto", // 'auto' | 'detailed'
     },
   },
@@ -87,13 +93,13 @@ const stream = chat({
 
 When `reasoning.summary` is set, the adapter streams reasoning summary text as thinking content. Without it, reasoning tokens are still used internally but may not be surfaced depending on the model.
 
-GPT-5 and later models also support reasoning when you set the `effort` to a non-`none` value:
+GPT-5 and later models also support reasoning. Their `reasoning.effort` accepts `"none" | "minimal" | "low" | "medium" | "high"`, and reasoning activates on any non-`none` value:
 
 ```typescript
 const stream = chat({
-  adapter: openaiText("gpt-5"),
+  adapter: openaiText("gpt-5.2"),
   messages,
-  providerOptions: {
+  modelOptions: {
     reasoning: { effort: "high" },
   },
 });
@@ -134,12 +140,12 @@ Thinking content streams **before** the final text response. As reasoning tokens
 
 The typical streaming order is:
 
-1. `STEP_STARTED` -- marks the beginning of a thinking block
-2. `STEP_FINISHED` (repeated) -- each carries a `delta` with the new token and `content` with the full thinking so far
+1. The reasoning block begins (`REASONING_MESSAGE_START`, plus a legacy `STEP_STARTED`)
+2. Reasoning tokens stream in (`REASONING_MESSAGE_CONTENT`, plus legacy `STEP_FINISHED` events), accumulating into `ThinkingPart.content`
 3. `TEXT_MESSAGE_START` -- the model begins its visible response
 4. `TEXT_MESSAGE_CONTENT` (repeated) -- the response text streams in
 
-The stream processor handles all of this for you. If you use `useChat` from `@tanstack/ai-react` (or the Solid/Vue/Svelte equivalents), your `messages` array updates automatically with both thinking and text parts as they arrive.
+Adapters emit both the canonical `REASONING_MESSAGE_*` events and the older `STEP_*` events; the stream processor reconciles them into one `ThinkingPart` so you never have to hand-parse the raw events. If you use `useChat` from `@tanstack/ai-react` (or the Solid/Vue/Svelte equivalents), your `messages` array updates automatically with both thinking and text parts as they arrive.
 
 ## Next Steps
 
diff --git a/docs/community-adapters/cencori.md b/docs/community-adapters/cencori.md
index f1de31217..64ea46197 100644
--- a/docs/community-adapters/cencori.md
+++ b/docs/community-adapters/cencori.md
@@ -33,7 +33,7 @@ for await (const chunk of chat({
   adapter,
   messages: [{ role: "user", content: "Hello!" }],
 })) {
-  if (chunk.type === "content") {
+  if (chunk.type === "TEXT_MESSAGE_CONTENT") {
     console.log(chunk.delta);
   }
 }
@@ -64,9 +64,9 @@ for await (const chunk of chat({
   adapter,
   messages: [{ role: "user", content: "Tell me a story" }],
 })) {
-  if (chunk.type === "content") {
+  if (chunk.type === "TEXT_MESSAGE_CONTENT") {
     process.stdout.write(chunk.delta);
-  } else if (chunk.type === "done") {
+  } else if (chunk.type === "RUN_FINISHED") {
     console.log("\nDone:", chunk.finishReason);
   }
 }
@@ -76,27 +76,32 @@ for await (const chunk of chat({
 ## Tool Calling
 
 ```typescript
-import { chat } from "@tanstack/ai";
+import { chat, toolDefinition } from "@tanstack/ai";
 import { cencori } from "@cencori/ai-sdk/tanstack";
+import { z } from "zod";
 
 const adapter = cencori("gpt-4o");
 
+const getWeatherDef = toolDefinition({
+  name: "getWeather",
+  description: "Get weather for a location",
+  inputSchema: z.object({ location: z.string() }),
+});
+
+const getWeather = getWeatherDef.server(async ({ location }) => {
+  // Look up the weather for `location`
+  return { temperature: 72, conditions: "Sunny" };
+});
+
 for await (const chunk of chat({
   adapter,
   messages: [{ role: "user", content: "What's the weather in NYC?" }],
-  tools: {
-    getWeather: {
-      name: "getWeather",
-      description: "Get weather for a location",
-      inputSchema: {
-        type: "object",
-        properties: { location: { type: "string" } },
-      },
-    },
-  },
+  tools: [getWeather],
 })) {
-  if (chunk.type === "tool_call") {
-    console.log("Tool call:", chunk.toolCall);
+  if (chunk.type === "TOOL_CALL_START") {
+    console.log("Tool call:", chunk.toolName);
+  } else if (chunk.type === "TOOL_CALL_END") {
+    console.log("Tool result:", chunk.result);
   }
 }
 ```
@@ -139,6 +144,8 @@ All responses use the same unified format regardless of provider.
 | DeepSeek | `deepseek-v3.2`, `deepseek-reasoner` |
 | + More | Groq, Cohere, Perplexity, Together, Qwen, OpenRouter |
 
+> **Note:** Cencori is an external package and its catalogue changes over time. Verify the model ids above against [Cencori's current catalogue](https://cencori.com/docs) before relying on them.
+
 ## Environment Variables
 
 ```bash
diff --git a/docs/comparison/vercel-ai-sdk.md b/docs/comparison/vercel-ai-sdk.md
index 415a4731c..4de745f86 100644
--- a/docs/comparison/vercel-ai-sdk.md
+++ b/docs/comparison/vercel-ai-sdk.md
@@ -310,7 +310,11 @@ The available hooks cover the full lifecycle:
 
 Middleware compose naturally. `onConfig` pipes through each middleware in order. `onChunk` pipes chunks through each middleware (if one drops a chunk, later middleware never see it). `onBeforeToolCall` uses first-win semantics: the first middleware that returns a decision short-circuits the rest.
 
-TanStack AI also ships `toolCacheMiddleware` built-in, which caches tool results by name and arguments with configurable TTL, LRU eviction, and pluggable storage backends (Redis, localStorage, etc.).
+TanStack AI also ships `toolCacheMiddleware` built-in (imported from the `@tanstack/ai/middlewares` subpath), which caches tool results by name and arguments with configurable TTL, LRU eviction, and pluggable storage backends (Redis, localStorage, etc.).
+
+```ts
+import { toolCacheMiddleware } from '@tanstack/ai/middlewares'
+```
 
 Vercel AI SDK takes a different approach: `wrapLanguageModel()` wraps a model instance with middleware that can intercept and transform calls. It ships several built-in middleware (`extractReasoningMiddleware`, `simulateStreamingMiddleware`, `defaultSettingsMiddleware`), but these operate at the model level rather than the application level. There's no equivalent to TanStack AI's tool call interception, chunk-level stream processing, or lifecycle hooks like `onBeforeToolCall` and `onAfterToolCall`.
 
diff --git a/docs/config.json b/docs/config.json
index fc80a89cb..c9ce782c5 100644
--- a/docs/config.json
+++ b/docs/config.json
@@ -194,19 +194,15 @@
       ]
     },
     {
-      "label": "Advanced",
+      "label": "Middleware",
       "children": [
         {
           "label": "Middleware",
           "to": "advanced/middleware"
         },
         {
-          "label": "Runtime Context",
-          "to": "advanced/runtime-context"
-        },
-        {
-          "label": "Debug Logging",
-          "to": "advanced/debug-logging"
+          "label": "Built-in Middleware",
+          "to": "advanced/built-in-middleware"
         },
         {
           "label": "OpenTelemetry",
@@ -215,6 +211,19 @@
         {
           "label": "Observability",
           "to": "advanced/observability"
+        }
+      ]
+    },
+    {
+      "label": "Advanced",
+      "children": [
+        {
+          "label": "Runtime Context",
+          "to": "advanced/runtime-context"
+        },
+        {
+          "label": "Debug Logging",
+          "to": "advanced/debug-logging"
         },
         {
           "label": "Multimodal Content",
diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md
index f7360af84..e078bcb4b 100644
--- a/docs/getting-started/quick-start.md
+++ b/docs/getting-started/quick-start.md
@@ -150,7 +150,7 @@ import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
 export function Chat() {
   const [input, setInput] = useState("");
 
-  const { messages, sendMessage, isLoading } = useChat({
+  const { messages, sendMessage, isLoading, error } = useChat({
     connection: fetchServerSentEvents("/api/chat"),
   });
 
@@ -198,6 +198,13 @@ export function Chat() {
         ))}
       </div>
 
+      {/* Error */}
+      {error && (
+        <p role="alert" className="px-4 text-red-600">
+          {error.message}
+        </p>
+      )}
+
       {/* Input */}
       <form onSubmit={handleSubmit} className="p-4 border-t">
         <div className="flex gap-2">
@@ -261,6 +268,7 @@ import { z } from 'zod'
 const getProductsDef = toolDefinition({
   name: 'getProducts',
   inputSchema: z.object({ query: z.string() }),
+  outputSchema: z.array(z.object({ id: z.string(), name: z.string() })),
 })
 
 const getProducts = getProductsDef.server(async ({ query }) => {
diff --git a/docs/media/audio-generation.md b/docs/media/audio-generation.md
index f4cf90db6..65937ca21 100644
--- a/docs/media/audio-generation.md
+++ b/docs/media/audio-generation.md
@@ -32,7 +32,7 @@ const result = await generateAudio({
   prompt: 'Uplifting indie pop with layered vocals and jangly guitars',
 })
 
-console.log(result.audio.url) // URL to the generated audio file
+console.log(result.audio.b64Json) // Base64-encoded audio bytes (Gemini)
 console.log(result.audio.contentType) // e.g. "audio/mpeg"
 ```
 
@@ -215,6 +215,9 @@ FAL_KEY=your-fal-api-key
 Or pass it explicitly to the adapter:
 
 ```typescript
-geminiAudio('lyria-3-pro-preview', { apiKey: 'your-key' })
+import { createGeminiAudio } from '@tanstack/ai-gemini'
+import { falAudio } from '@tanstack/ai-fal'
+
+createGeminiAudio('lyria-3-pro-preview', 'your-key')
 falAudio('fal-ai/diffrhythm', { apiKey: 'your-key' })
 ```
diff --git a/docs/media/generation-hooks.md b/docs/media/generation-hooks.md
index d9adadc6a..8120e9e9c 100644
--- a/docs/media/generation-hooks.md
+++ b/docs/media/generation-hooks.md
@@ -211,7 +211,7 @@ The `generate` function accepts a `TranscriptionGenerateInput`:
 
 | Field | Type | Description |
 |-------|------|-------------|
-| `audio` | `string \| File \| Blob` | Audio data -- base64 string, File, or Blob (required) |
+| `audio` | `string \| File \| Blob \| ArrayBuffer` | Audio data -- base64 string, File, Blob, or ArrayBuffer (required) |
 | `language` | `string` | Language in ISO-639-1 format (e.g., `"en"`) |
 | `prompt` | `string` | Optional prompt to guide the transcription |
 | `responseFormat` | `'json' \| 'text' \| 'srt' \| 'verbose_json' \| 'vtt'` | Output format |
diff --git a/docs/media/generations.md b/docs/media/generations.md
index cc94a28db..69038d12b 100644
--- a/docs/media/generations.md
+++ b/docs/media/generations.md
@@ -178,6 +178,27 @@ If the function throws, a `RUN_ERROR` event is emitted instead:
 
 This is the same event protocol used by chat streaming, so the same transport layer (`toServerSentEventsResponse`, `fetchServerSentEvents`) works for both.
 
+When the server emits `RUN_ERROR`, the client surfaces it on `error` (and sets `status` to `'error'`). Use the `onError` callback to react, and render `error?.message` in your UI:
+
+```tsx
+const { generate, result, error, status } = useGenerateImage({
+  connection: fetchServerSentEvents('/api/generate/image'),
+  onError: (err) => console.error('Generation failed:', err.message),
+})
+
+return (
+  <div>
+    <button onClick={() => generate({ prompt: 'A sunset over mountains' })}>
+      Generate
+    </button>
+    {status === 'error' && <p role="alert">Error: {error?.message}</p>}
+    {result?.images.map((img, i) => (
+      <img key={i} src={img.url || `data:image/png;base64,${img.b64Json}`} />
+    ))}
+  </div>
+)
+```
+
 ## Common Hook API
 
 All generation hooks share the same interface:
diff --git a/docs/media/image-generation.md b/docs/media/image-generation.md
index 4b884f7bd..d8af2e816 100644
--- a/docs/media/image-generation.md
+++ b/docs/media/image-generation.md
@@ -34,10 +34,7 @@ Image generation is handled by image adapters that follow the same tree-shakeabl
 import { generateImage } from '@tanstack/ai'
 import { openaiImage } from '@tanstack/ai-openai'
 
-// Create an image adapter (uses OPENAI_API_KEY from environment)
-const adapter = openaiImage()
-
-// Generate an image
+// Generate an image (the adapter uses OPENAI_API_KEY from environment)
 const result = await generateImage({
   adapter: openaiImage('dall-e-3'),
   prompt: 'A beautiful sunset over mountains',
@@ -437,7 +434,7 @@ The `useGenerateImage` hook accepts:
 | `fetcher` | `(input) => Promise<ImageGenerationResult \| Response>` | Direct async function, or server function returning an SSE `Response` |
 | `id` | `string` | Unique identifier for this instance |
 | `body` | `Record<string, any>` | Additional body parameters (connection mode) |
-| `onResult` | `(result) => void` | Callback when images are generated |
+| `onResult` | `(result) => TOutput \| null \| void` | Callback when images are generated. Optionally return a transformed value to store as `result` |
 | `onError` | `(error) => void` | Callback on error |
 | `onProgress` | `(progress, message?) => void` | Progress updates (0-100) |
 
@@ -471,8 +468,8 @@ import { createOpenaiImage } from '@tanstack/ai-openai'
 import { createGeminiImage } from '@tanstack/ai-gemini'
 
 // OpenAI
-const openaiAdapter = createOpenaiImage('your-openai-api-key')
+const openaiAdapter = createOpenaiImage('dall-e-3', 'your-openai-api-key')
 
 // Gemini
-const geminiAdapter = createGeminiImage('your-google-api-key')
+const geminiAdapter = createGeminiImage('imagen-4.0-generate-001', 'your-google-api-key')
 ```
diff --git a/docs/media/text-to-speech.md b/docs/media/text-to-speech.md
index ed960d0d4..1fdb0b1f5 100644
--- a/docs/media/text-to-speech.md
+++ b/docs/media/text-to-speech.md
@@ -94,8 +94,10 @@ console.log(result.format) // e.g. "wav"
 const result = await generateSpeech({
   adapter: falSpeech('fal-ai/elevenlabs/tts/eleven-v3'),
   text: 'Welcome to TanStack AI.',
+  // The fal adapter maps top-level `voice`/`speed` into the model input;
+  // `modelOptions` is reserved for model-specific keys.
+  voice: 'Rachel',
   modelOptions: {
-    voice: 'Rachel',
     stability: 0.5,
   },
 })
@@ -152,15 +154,17 @@ const result = await generateSpeech({
   text: 'High quality speech synthesis',
   voice: 'nova',
   format: 'mp3',
+  speed: 1.0, // top-level option, 0.25 to 4.0
   modelOptions: {
-    speed: 1.0, // 0.25 to 4.0
+    instructions: 'Speak in a calm, measured tone', // GPT-4o audio models only
   },
 })
 ```
 
+> **Note:** `voice`, `format`, and `speed` are top-level `generateSpeech` options, not `modelOptions` keys.
+
 | Option | Type | Description |
 |--------|------|-------------|
-| `speed` | `number` | Playback speed (0.25 to 4.0, default 1.0) |
 | `instructions` | `string` | Voice style instructions (GPT-4o audio models only) |
 
 > **Note:** The `instructions` and `stream_format` options are only available with `gpt-4o-audio-preview` and `gpt-4o-mini-audio-preview` models, not with `tts-1` or `tts-1-hd`.
diff --git a/docs/media/transcription.md b/docs/media/transcription.md
index eaf64dfad..471a6dece 100644
--- a/docs/media/transcription.md
+++ b/docs/media/transcription.md
@@ -33,10 +33,7 @@ Currently supported:
 import { generateTranscription } from '@tanstack/ai'
 import { openaiTranscription } from '@tanstack/ai-openai'
 
-// Create a transcription adapter (uses OPENAI_API_KEY from environment)
-const adapter = openaiTranscription()
-
-// Transcribe audio from a file
+// Transcribe audio from a file (the adapter uses OPENAI_API_KEY from environment)
 const audioFile = new File([audioBuffer], 'audio.mp3', { type: 'audio/mpeg' })
 
 const result = await generateTranscription({
@@ -148,7 +145,8 @@ const result = await generateTranscription({
 | `response_format` | `string` | Output format: "json", "text", "srt", "verbose_json", "vtt" |
 | `temperature` | `number` | Sampling temperature (0 to 1) |
 | `prompt` | `string` | Optional text to guide transcription style |
-| `include` | `string[]` | Timestamp granularity: ["word"], ["segment"], or both |
+| `timestamp_granularities` | `Array<'word' \| 'segment'>` | Timestamp granularity to populate (requires `response_format: 'verbose_json'`) |
+| `include` | `string[]` | Additional values to include in the response (e.g., `logprobs`) |
 
 ### Response Formats
 
@@ -172,15 +170,17 @@ interface TranscriptionResult {
   language?: string    // Detected/specified language
   duration?: number    // Audio duration in seconds
   segments?: Array<{   // Timestamped segments
+    id: number         // Segment identifier
     start: number      // Start time in seconds
     end: number        // End time in seconds
     text: string       // Segment text
-    words?: Array<{    // Word-level timestamps
-      word: string
-      start: number
-      end: number
-      confidence?: number
-    }>
+    confidence?: number // Confidence score (0-1), if available
+    speaker?: string    // Speaker identifier, if diarization is enabled
+  }>
+  words?: Array<{      // Word-level timestamps (top-level)
+    word: string
+    start: number
+    end: number
   }>
 }
 ```
@@ -193,8 +193,6 @@ import { openaiTranscription } from '@tanstack/ai-openai'
 import { readFile } from 'fs/promises'
 
 async function transcribeAudio(filepath: string) {
-  const adapter = openaiTranscription()
-  
   // Read the audio file
   const audioBuffer = await readFile(filepath)
   const audioFile = new File(
@@ -210,7 +208,7 @@ async function transcribeAudio(filepath: string) {
     language: 'en',
     modelOptions: {
       response_format: 'verbose_json',
-      include: ['segment', 'word'],
+      timestamp_granularities: ['segment', 'word'],
     },
   })
 
@@ -475,7 +473,7 @@ The `useTranscription` hook accepts:
 |--------|------|-------------|
 | `connection` | `ConnectionAdapter` | Streaming transport (SSE, HTTP stream, custom) |
 | `fetcher` | `(input) => Promise<TranscriptionResult \| Response>` | Direct async function, or server function returning an SSE `Response` |
-| `onResult` | `(result) => void` | Callback when transcription completes |
+| `onResult` | `(result) => TOutput \| null \| void` | Callback when transcription completes. Optionally return a transformed value to store as `result` |
 | `onError` | `(error) => void` | Callback on error |
 | `onProgress` | `(progress, message?) => void` | Progress updates (0-100) |
 
@@ -525,7 +523,7 @@ The transcription adapter uses:
 ```typescript
 import { createOpenaiTranscription } from '@tanstack/ai-openai'
 
-const adapter = createOpenaiTranscription('your-openai-api-key')
+const adapter = createOpenaiTranscription('whisper-1', 'your-openai-api-key')
 ```
 
 ## Best Practices
@@ -540,5 +538,5 @@ const adapter = createOpenaiTranscription('your-openai-api-key')
 
 5. **Prompting**: Use the `prompt` option to provide context or expected vocabulary (e.g., technical terms, names).
 
-6. **Timestamps**: Request `verbose_json` format and enable `include: ['word', 'segment']` when you need timing information for captions or synchronization.
+6. **Timestamps**: Request `verbose_json` format and enable `timestamp_granularities: ['word', 'segment']` when you need timing information for captions or synchronization.
 
diff --git a/docs/media/video-generation.md b/docs/media/video-generation.md
index b42e88b6b..0c5985cc4 100644
--- a/docs/media/video-generation.md
+++ b/docs/media/video-generation.md
@@ -45,10 +45,7 @@ Currently supported:
 import { generateVideo } from '@tanstack/ai'
 import { openaiVideo } from '@tanstack/ai-openai'
 
-// Create a video adapter (uses OPENAI_API_KEY from environment)
-const adapter = openaiVideo()
-
-// Start a video generation job
+// Start a video generation job (the adapter uses OPENAI_API_KEY from environment)
 const { jobId, model } = await generateVideo({
   adapter: openaiVideo('sora-2'),
   prompt: 'A golden retriever puppy playing in a field of sunflowers',
@@ -89,7 +86,6 @@ const result = await getVideoJobStatus({
 
 if (result.status === 'completed' && result.url) {
   console.log('Video URL:', result.url)
-  console.log('Expires at:', result.expiresAt)
 }
 ```
 
@@ -99,9 +95,7 @@ if (result.status === 'completed' && result.url) {
 import { generateVideo, getVideoJobStatus } from '@tanstack/ai'
 import { openaiVideo } from '@tanstack/ai-openai'
 
-async function generateVideo(prompt: string) {
-  const adapter = openaiVideo()
-
+async function createAndAwaitVideo(prompt: string) {
   // 1. Create the job
   const { jobId } = await generateVideo({
     adapter: openaiVideo('sora-2'),
@@ -145,7 +139,7 @@ async function generateVideo(prompt: string) {
 }
 
 // Usage
-const videoUrl = await generateVideo('A cat playing piano in a jazz bar')
+const videoUrl = await createAndAwaitVideo('A cat playing piano in a jazz bar')
 console.log('Video ready:', videoUrl)
 ```
 
@@ -342,7 +336,7 @@ The `useGenerateVideo` hook accepts all common options plus video-specific callb
 |--------|------|-------------|
 | `connection` | `ConnectionAdapter` | Streaming transport (SSE, HTTP stream, custom) |
 | `fetcher` | `(input) => Promise<VideoGenerateResult \| Response>` | Direct async function, or server function returning an SSE `Response` |
-| `onResult` | `(result) => void` | Callback when video is ready |
+| `onResult` | `(result) => TOutput \| null \| void` | Callback when video is ready. Optionally return a transformed value to store as `result` |
 | `onError` | `(error) => void` | Callback on error |
 | `onProgress` | `(progress, message?) => void` | Progress updates (0-100) |
 | `onJobCreated` | `(jobId: string) => void` | Callback when the job is created |
@@ -414,6 +408,8 @@ const { jobId } = await generateVideo({
 
 ## Response Types
 
+> **Note:** The interfaces below are the underlying adapter-level types. The `getVideoJobStatus()` helper returns a single merged object, `{ status, progress?, url?, error? }` — it does not return `jobId` or `expiresAt`.
+
 ### VideoJobResult (from create)
 
 ```typescript
diff --git a/docs/migration/ag-ui-compliance.md b/docs/migration/ag-ui-compliance.md
index 91d47d1f1..fe4d7bc51 100644
--- a/docs/migration/ag-ui-compliance.md
+++ b/docs/migration/ag-ui-compliance.md
@@ -106,7 +106,7 @@ Keep reading `body.messages` and pass it through. `chat()` accepts mixed `UIMess
 
 ```ts
 import { chat, toServerSentEventsResponse } from '@tanstack/ai'
-import { openaiText } from '@tanstack/ai-openai/adapters'
+import { openaiText } from '@tanstack/ai-openai'
 
 export async function POST(req: Request) {
   const body = await req.json()
@@ -139,7 +139,7 @@ import {
   chatParamsFromRequest,
   toServerSentEventsResponse,
 } from '@tanstack/ai'
-import { openaiText } from '@tanstack/ai-openai/adapters'
+import { openaiText } from '@tanstack/ai-openai'
 
 export async function POST(req: Request) {
   const params = await chatParamsFromRequest(req)
@@ -171,7 +171,7 @@ import {
   mergeAgentTools,
   toServerSentEventsResponse,
 } from '@tanstack/ai'
-import { openaiText } from '@tanstack/ai-openai/adapters'
+import { openaiText } from '@tanstack/ai-openai'
 
 export async function POST(req: Request) {
   const params = await chatParamsFromRequest(req)
diff --git a/docs/protocol/chunk-definitions.md b/docs/protocol/chunk-definitions.md
index 0ff513821..60c7cec3c 100644
--- a/docs/protocol/chunk-definitions.md
+++ b/docs/protocol/chunk-definitions.md
@@ -33,22 +33,35 @@ interface BaseAGUIEvent {
 
 ```typescript
 type AGUIEventType =
-  | 'RUN_STARTED'           // Run lifecycle begins
-  | 'RUN_FINISHED'          // Run completed successfully
-  | 'RUN_ERROR'             // Error occurred
-  | 'TEXT_MESSAGE_START'    // Text message begins
-  | 'TEXT_MESSAGE_CONTENT'  // Text content streaming
-  | 'TEXT_MESSAGE_END'      // Text message completes
-  | 'TOOL_CALL_START'       // Tool invocation begins
-  | 'TOOL_CALL_ARGS'        // Tool arguments streaming
-  | 'TOOL_CALL_END'         // Tool call completes (with result)
-  | 'STEP_STARTED'          // Thinking/reasoning step begins
-  | 'STEP_FINISHED'         // Thinking/reasoning step completes
-  | 'STATE_SNAPSHOT'        // Full state synchronization
-  | 'STATE_DELTA'           // Incremental state update
-  | 'CUSTOM';               // Custom extensibility events
+  | 'RUN_STARTED'                 // Run lifecycle begins
+  | 'RUN_FINISHED'                // Run completed successfully
+  | 'RUN_ERROR'                   // Error occurred
+  | 'TEXT_MESSAGE_START'          // Text message begins
+  | 'TEXT_MESSAGE_CONTENT'        // Text content streaming
+  | 'TEXT_MESSAGE_END'            // Text message completes
+  | 'TOOL_CALL_START'             // Tool invocation begins
+  | 'TOOL_CALL_ARGS'              // Tool arguments streaming
+  | 'TOOL_CALL_END'               // Tool call completes
+  | 'TOOL_CALL_RESULT'            // Tool execution result
+  | 'STEP_STARTED'                // Thinking/reasoning step begins
+  | 'STEP_FINISHED'               // Thinking/reasoning step completes
+  | 'REASONING_START'             // Reasoning begins for a message
+  | 'REASONING_MESSAGE_START'     // Reasoning message begins
+  | 'REASONING_MESSAGE_CONTENT'   // Reasoning content streaming
+  | 'REASONING_MESSAGE_END'       // Reasoning message completes
+  | 'REASONING_END'               // Reasoning ends for a message
+  | 'REASONING_ENCRYPTED_VALUE'   // Encrypted reasoning payload
+  | 'MESSAGES_SNAPSHOT'           // Full conversation transcript snapshot
+  | 'STATE_SNAPSHOT'              // Full state synchronization
+  | 'STATE_DELTA'                 // Incremental state update
+  | 'CUSTOM';                     // Custom extensibility events
 ```
 
+> The exported `EventType` enum (`@tanstack/ai`) carries a few additional
+> internal/transitional members (e.g. `TEXT_MESSAGE_CHUNK`, `TOOL_CALL_CHUNK`,
+> `THINKING_*`, `ACTIVITY_*`, `RAW`). The events above are the ones that appear
+> on the wire for a normal chat run.
+
 Only AG-UI event types are supported; previous legacy chunk formats are no longer accepted.
 
 ## AG-UI Event Definitions
@@ -255,22 +268,31 @@ interface TokenUsage {
 
 Emitted when an error occurs during a run.
 
+> **Canonical vs deprecated shape.** The AG-UI-canonical form carries
+> `message` and `code` at the **top level** of the event. The nested `error`
+> object is a TanStack AI backward-compatibility alias and is `@deprecated`;
+> prefer reading the top-level fields. Note that the wire emitter
+> (`toServerSentEventsStream` / `toHttpStream`) still emits the nested `error`
+> form, so consumers should accept either until the alias is removed.
+
 ```typescript
 interface RunErrorEvent extends BaseAGUIEvent {
   type: 'RUN_ERROR';
+  message: string;     // Canonical (AG-UI)
+  code?: string;       // Canonical (AG-UI)
   runId?: string;
-  error: {
+  /** @deprecated Use top-level `message`/`code`. Still emitted on the wire. */
+  error?: {
     message: string;
     code?: string;
   };
 }
 ```
 
-**Example:**
+**Example (as emitted on the wire — nested `error`):**
 ```json
 {
   "type": "RUN_ERROR",
-  "runId": "run_abc123",
   "model": "gpt-4o",
   "timestamp": 1701234567890,
   "error": {
@@ -344,8 +366,10 @@ Emitted when a tool call starts.
 interface ToolCallStartEvent extends BaseAGUIEvent {
   type: 'TOOL_CALL_START';
   toolCallId: string;
-  toolName: string;
-  index?: number;      // Index for parallel tool calls
+  toolCallName: string;  // Canonical (AG-UI)
+  /** @deprecated Use `toolCallName` instead. */
+  toolName: string;      // Deprecated alias, still emitted
+  index?: number;        // Index for parallel tool calls
 }
 ```
 
@@ -374,9 +398,41 @@ Emitted when a tool call completes.
 interface ToolCallEndEvent extends BaseAGUIEvent {
   type: 'TOOL_CALL_END';
   toolCallId: string;
-  toolName: string;
-  input?: unknown;     // Final parsed input arguments
-  result?: string;     // Tool execution result (if executed)
+  toolCallName?: string;  // Canonical (AG-UI)
+  /** @deprecated Use `toolCallName` instead. */
+  toolName?: string;      // Deprecated alias
+  input?: unknown;        // Final parsed input arguments (TanStack AI internal)
+  result?: string | ContentPart[]; // Tool execution result (TanStack AI internal)
+}
+```
+
+---
+
+### TOOL_CALL_RESULT
+
+Emitted when a tool's execution result is available. AG-UI carries this as a
+distinct event from `TOOL_CALL_END`: `TOOL_CALL_END` closes the call's
+argument stream, while `TOOL_CALL_RESULT` delivers the executed tool's output
+as a `tool`-role message.
+
+```typescript
+interface ToolCallResultEvent extends BaseAGUIEvent {
+  type: 'TOOL_CALL_RESULT';
+  messageId: string;   // ID of the resulting tool-role message
+  toolCallId: string;  // The tool call this result answers
+  content: string;     // Serialized tool result
+  role?: 'tool';
+}
+```
+
+**Example:**
+```json
+{
+  "type": "TOOL_CALL_RESULT",
+  "messageId": "msg_tool_1",
+  "toolCallId": "call_xyz",
+  "content": "{\"temperature\":72,\"conditions\":\"sunny\"}",
+  "timestamp": 1701234567894
 }
 ```
 
@@ -389,7 +445,9 @@ Emitted when a thinking/reasoning step starts.
 ```typescript
 interface StepStartedEvent extends BaseAGUIEvent {
   type: 'STEP_STARTED';
-  stepId: string;
+  stepName: string;    // Canonical (AG-UI)
+  /** @deprecated Use `stepName` instead. */
+  stepId?: string;     // Deprecated alias
   stepType?: string;   // e.g., 'thinking', 'planning'
 }
 ```
@@ -403,9 +461,113 @@ Emitted when a thinking/reasoning step finishes.
 ```typescript
 interface StepFinishedEvent extends BaseAGUIEvent {
   type: 'STEP_FINISHED';
-  stepId: string;
-  delta?: string;      // Incremental thinking content
-  content?: string;    // Full accumulated thinking content
+  stepName: string;    // Canonical (AG-UI)
+  /** @deprecated Use `stepName` instead. */
+  stepId?: string;     // Deprecated alias
+  delta?: string;      // Incremental thinking content (TanStack AI internal)
+  content?: string;    // Full accumulated thinking content (TanStack AI internal)
+}
+```
+
+---
+
+## Reasoning Events
+
+AG-UI defines a dedicated reasoning event family for thinking/reasoning models.
+**These `REASONING_MESSAGE_*` events are the AG-UI-canonical path for reasoning
+content.** During a transition period, adapters also emit `STEP_FINISHED` with
+the same thinking deltas as a backward-compatibility duplicate; the stream
+processor de-duplicates by ignoring `STEP_FINISHED` thinking deltas once it has
+seen reasoning events for a message (see
+`packages/ai/src/activities/chat/stream/processor.ts`). Prefer
+`REASONING_MESSAGE_*` in new consumers.
+
+All reasoning events extend `BaseAGUIEvent`. TanStack AI adds an optional
+`model?` field; the canonical fields come from `@ag-ui/core`.
+
+### REASONING_START
+
+Reasoning begins for a message.
+
+```typescript
+interface ReasoningStartEvent extends BaseAGUIEvent {
+  type: 'REASONING_START';
+  messageId: string;
+}
+```
+
+### REASONING_MESSAGE_START
+
+A reasoning message begins.
+
+```typescript
+interface ReasoningMessageStartEvent extends BaseAGUIEvent {
+  type: 'REASONING_MESSAGE_START';
+  messageId: string;
+  role: 'reasoning';
+}
+```
+
+### REASONING_MESSAGE_CONTENT
+
+Incremental reasoning content (streaming tokens).
+
+```typescript
+interface ReasoningMessageContentEvent extends BaseAGUIEvent {
+  type: 'REASONING_MESSAGE_CONTENT';
+  messageId: string;
+  delta: string;
+}
+```
+
+### REASONING_MESSAGE_END
+
+A reasoning message completes.
+
+```typescript
+interface ReasoningMessageEndEvent extends BaseAGUIEvent {
+  type: 'REASONING_MESSAGE_END';
+  messageId: string;
+}
+```
+
+### REASONING_END
+
+Reasoning ends for a message.
+
+```typescript
+interface ReasoningEndEvent extends BaseAGUIEvent {
+  type: 'REASONING_END';
+  messageId: string;
+}
+```
+
+### REASONING_ENCRYPTED_VALUE
+
+Carries an encrypted/opaque reasoning payload (e.g. provider-encrypted thinking
+that can be replayed but not read).
+
+```typescript
+interface ReasoningEncryptedValueEvent extends BaseAGUIEvent {
+  type: 'REASONING_ENCRYPTED_VALUE';
+  subtype: string;
+  entityId: string;
+  encryptedValue: string;
+}
+```
+
+---
+
+## MESSAGES_SNAPSHOT
+
+Delivers a full snapshot of the conversation transcript. Unlike
+`STATE_SNAPSHOT` (which carries arbitrary application state),
+`MESSAGES_SNAPSHOT` specifically carries the message list.
+
+```typescript
+interface MessagesSnapshotEvent extends BaseAGUIEvent {
+  type: 'MESSAGES_SNAPSHOT';
+  messages: Message[];  // @ag-ui/core Message[] — use converters for UIMessage
 }
 ```
 
@@ -496,11 +658,19 @@ type StreamChunk =
   | ToolCallStartEvent
   | ToolCallArgsEvent
   | ToolCallEndEvent
+  | ToolCallResultEvent
   | StepStartedEvent
   | StepFinishedEvent
+  | MessagesSnapshotEvent
   | StateSnapshotEvent
   | StateDeltaEvent
-  | CustomEvent;
+  | CustomEvent
+  | ReasoningStartEvent
+  | ReasoningMessageStartEvent
+  | ReasoningMessageContentEvent
+  | ReasoningMessageEndEvent
+  | ReasoningEndEvent
+  | ReasoningEncryptedValueEvent;
 ```
 
 This enables type-safe handling in TypeScript:
@@ -515,7 +685,7 @@ function handleChunk(chunk: StreamChunk) {
       console.log(chunk.content);
       break;
     case 'TOOL_CALL_START':
-      console.log(chunk.toolName);
+      console.log(chunk.toolCallName);
       break;
     // ... other cases
   }
diff --git a/docs/protocol/http-stream-protocol.md b/docs/protocol/http-stream-protocol.md
index 0318557bb..b0cfa4e43 100644
--- a/docs/protocol/http-stream-protocol.md
+++ b/docs/protocol/http-stream-protocol.md
@@ -20,7 +20,7 @@ HTTP streaming with newline-delimited JSON (NDJSON) is a simpler protocol than S
 
 This protocol is **less common** than SSE for TanStack AI applications, but supported for flexibility.
 
-This document describes how TanStack AI transmits StreamChunks over raw HTTP streaming (newline-delimited JSON), an alternative to Server-Sent Events.
+This document describes how TanStack AI transmits [AG-UI events](./chunk-definitions) over raw HTTP streaming (newline-delimited JSON), an alternative to Server-Sent Events.
 
 ---
 
@@ -35,17 +35,21 @@ This document describes how TanStack AI transmits StreamChunks over raw HTTP str
 Content-Type: application/json
 ```
 
-**Body:**
+**Body:** The current `@tanstack/ai-client` POSTs an AG-UI `RunAgentInput` object — `threadId`, `runId`, `messages`, `tools`, `forwardedProps`, etc. The legacy `data` field is still emitted alongside `forwardedProps` as a deprecation bridge. See [Migrating to AG-UI Client-to-Server Compliance](../migration/ag-ui-compliance) for the full wire shape.
+
 ```json
 {
+  "threadId": "thread-abc",
+  "runId": "run-123",
   "messages": [
     {
       "role": "user",
       "content": "Hello, how are you?"
     }
   ],
-  "data": {
-    // Optional additional data
+  "tools": [],
+  "forwardedProps": {
+    // Optional client-supplied options
   }
 }
 ```
@@ -66,16 +70,16 @@ Content-Type: application/json
 Transfer-Encoding: chunked
 ```
 
-**Body:** Stream of newline-delimited JSON chunks
+**Body:** Stream of newline-delimited JSON, one [AG-UI event](./chunk-definitions) per line
 
 ---
 
 ## Stream Format
 
-Each StreamChunk is transmitted as a single line of JSON followed by a newline (`\n`):
+Each [AG-UI event](./chunk-definitions) is transmitted as a single line of JSON followed by a newline (`\n`):
 
 ```
-{JSON_ENCODED_CHUNK}\n
+{JSON_ENCODED_EVENT}\n
 ```
 
 ### Key Points
@@ -83,30 +87,34 @@ Each StreamChunk is transmitted as a single line of JSON followed by a newline (
 1. **One JSON object per line**
 2. **Each line ends with `\n`**
 3. **No prefixes** (unlike SSE's `data:` prefix)
-4. **No blank lines between chunks** (unlike SSE's `\n\n`)
-5. **Stream ends when connection closes** (no `[DONE]` marker)
+4. **No blank lines between events** (unlike SSE's `\n\n`)
+5. **Stream ends when connection closes** (no `[DONE]` marker — `RUN_FINISHED` is the terminal event)
 
 ### Examples
 
-#### Content Chunks
+#### Text Content
 
 ```json
-{"type":"content","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567890,"delta":"Hello","content":"Hello","role":"assistant"}
-{"type":"content","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567891,"delta":" world","content":"Hello world","role":"assistant"}
-{"type":"content","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567892,"delta":"!","content":"Hello world!","role":"assistant"}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" world","timestamp":1701234567891}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"!","timestamp":1701234567892}
 ```
 
 #### Tool Call
 
+A tool call streams as `TOOL_CALL_START` → `TOOL_CALL_ARGS` → `TOOL_CALL_END`, optionally followed by `TOOL_CALL_RESULT`:
+
 ```json
-{"type":"tool_call","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567893,"toolCall":{"id":"call_xyz","type":"function","function":{"name":"get_weather","arguments":"{\"location\":\"SF\"}"}},"index":0}
-{"type":"tool_result","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567894,"toolCallId":"call_xyz","content":"{\"temperature\":72,\"conditions\":\"sunny\"}"}
+{"type":"TOOL_CALL_START","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567893}
+{"type":"TOOL_CALL_ARGS","toolCallId":"call_xyz","delta":"{\"location\":\"SF\"}","timestamp":1701234567894}
+{"type":"TOOL_CALL_END","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567895}
+{"type":"TOOL_CALL_RESULT","messageId":"msg_2","toolCallId":"call_xyz","content":"{\"temperature\":72,\"conditions\":\"sunny\"}","timestamp":1701234567896}
 ```
 
-#### Stream Completion
+#### Run Completion
 
 ```json
-{"type":"done","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567895,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":15,"totalTokens":25}}
+{"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567897,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":15,"totalTokens":25}}
 ```
 
 ---
@@ -138,16 +146,17 @@ Transfer-Encoding: chunked
 The server sends newline-delimited JSON:
 
 ```json
-{"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567890,"delta":"The","content":"The"}
-{"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567891,"delta":" weather","content":"The weather"}
-{"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567892,"delta":" is","content":"The weather is"}
-{"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567893,"delta":" sunny","content":"The weather is sunny"}
-{"type":"done","id":"msg_1","model":"gpt-5.2","timestamp":1701234567894,"finishReason":"stop"}
+{"type":"RUN_STARTED","runId":"run_123","timestamp":1701234567889}
+{"type":"TEXT_MESSAGE_START","messageId":"msg_1","role":"assistant","timestamp":1701234567890}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"The","timestamp":1701234567890}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" weather is sunny","timestamp":1701234567891}
+{"type":"TEXT_MESSAGE_END","messageId":"msg_1","timestamp":1701234567893}
+{"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567894,"finishReason":"stop"}
 ```
 
 ### 4. Stream Completion
 
-Server closes the connection. No special marker needed (unlike SSE's `[DONE]`).
+`RUN_FINISHED` is the terminal event of a successful run; the server then closes the connection. No special marker is sent (neither transport uses a `[DONE]` sentinel).
 
 ---
 
@@ -155,13 +164,13 @@ Server closes the connection. No special marker needed (unlike SSE's `[DONE]`).
 
 ### Server-Side Errors
 
-If an error occurs during generation, send an error chunk:
+If an error occurs during generation, TanStack AI's HTTP-stream helpers emit a `RUN_ERROR` event, then close the connection:
 
 ```json
-{"type":"error","id":"msg_1","model":"gpt-5.2","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}
+{"type":"RUN_ERROR","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}
 ```
 
-Then close the connection.
+> **Canonical shape.** The AG-UI-canonical form carries `message` and `code` at the top level of the event. The wire emitter still nests them under `error` (shown above) as a backward-compatibility bridge; new consumers should prefer the top-level fields. See [Chunk Definitions → RUN_ERROR](./chunk-definitions#run_error).
 
 ### Connection Errors
 
@@ -176,51 +185,41 @@ Unlike SSE, HTTP streaming does not provide automatic reconnection:
 
 ### Server-Side (Node.js/TypeScript)
 
-#### Using TanStack AI (Custom Stream)
+#### Using TanStack AI
 
-TanStack AI doesn't provide a built-in NDJSON formatter, but you can create one easily:
+TanStack AI provides built-in NDJSON helpers — `toHttpResponse(stream, init?)` returns a ready-to-return `Response`, and `toHttpStream(stream, abortController?)` returns the raw `ReadableStream` if you need to set your own headers or wrap it. Both are exported from `@tanstack/ai`, emit one AG-UI event per line, close the connection when the stream ends (`RUN_FINISHED` is terminal), and emit a `RUN_ERROR` event on a thrown error.
 
 ```typescript
-import { chat } from '@tanstack/ai';
+import { chat, toHttpResponse } from '@tanstack/ai';
 import { openaiText } from '@tanstack/ai-openai';
 
 export async function POST(request: Request) {
   const { messages } = await request.json();
-  const encoder = new TextEncoder();
 
   const stream = chat({
     adapter: openaiText('gpt-5.2'),
     messages,
   });
 
-  const readableStream = new ReadableStream({
-    async start(controller) {
-      try {
-        for await (const chunk of stream) {
-          // Send as newline-delimited JSON
-          const line = JSON.stringify(chunk) + '\n';
-          controller.enqueue(encoder.encode(line));
-        }
-        controller.close();
-      } catch (error: any) {
-        const errorChunk = {
-          type: 'error',
-          error: {
-            message: error.message || 'Unknown error',
-            code: error.code,
-          },
-        };
-        controller.enqueue(encoder.encode(JSON.stringify(errorChunk) + '\n'));
-        controller.close();
-      }
-    },
-  });
+  // Emits newline-delimited AG-UI events; sets NDJSON-friendly defaults.
+  return toHttpResponse(stream);
+}
+```
+
+If you need the raw stream (e.g. to add custom headers), use `toHttpStream`:
 
-  return new Response(readableStream, {
-    headers: {
-      'Content-Type': 'application/x-ndjson',
-      'Cache-Control': 'no-cache',
-    },
+```typescript
+import { chat, toHttpStream } from '@tanstack/ai';
+import { openaiText } from '@tanstack/ai-openai';
+
+export async function POST(request: Request) {
+  const { messages } = await request.json();
+  const abortController = new AbortController();
+
+  const stream = chat({ adapter: openaiText('gpt-5.2'), messages });
+
+  return new Response(toHttpStream(stream, abortController), {
+    headers: { 'Content-Type': 'application/x-ndjson' },
   });
 }
 ```
@@ -252,11 +251,12 @@ app.post('/api/chat', async (req, res) => {
       res.write(JSON.stringify(chunk) + '\n');
     }
   } catch (error: any) {
-    const errorChunk = {
-      type: 'error',
+    const errorEvent = {
+      type: 'RUN_ERROR',
+      timestamp: Date.now(),
       error: { message: error.message },
     };
-    res.write(JSON.stringify(errorChunk) + '\n');
+    res.write(JSON.stringify(errorEvent) + '\n');
   } finally {
     res.end();
   }
@@ -276,11 +276,11 @@ const { messages, sendMessage } = useChat({
 ```
 
 **What `fetchHttpStream()` does:**
-1. Makes POST request with messages
-2. Reads response body as stream
+1. Makes a POST request with the AG-UI `RunAgentInput` body
+2. Reads the response body as a stream
 3. Splits by newlines
 4. Parses each line as JSON
-5. Yields StreamChunk objects
+5. Yields `StreamChunk` (AG-UI event) objects
 
 ### Manual Implementation (Advanced)
 
@@ -340,7 +340,7 @@ if (buffer.trim()) {
 | Overhead | Lower (no prefixes) | Higher (`data:` prefix) |
 | Auto-reconnect | ❌ No | ✅ Yes |
 | Browser API | ❌ No (manual) | ✅ Yes (EventSource) |
-| Completion marker | ❌ No (close connection) | ✅ Yes (`[DONE]`) |
+| Completion marker | ❌ No (close connection after `RUN_FINISHED`) | ❌ No (close connection after `RUN_FINISHED`) |
 | Debugging | Easy (plain JSON lines) | Easy (plain text) |
 | Use case | Custom protocols, lower overhead | Standard streaming, reconnection needed |
 
@@ -368,9 +368,9 @@ The `-N` flag disables buffering to see real-time output.
 
 **Example Output:**
 ```json
-{"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567890,"delta":"Hello","content":"Hello"}
-{"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567891,"delta":" there","content":"Hello there"}
-{"type":"done","id":"msg_1","model":"gpt-5.2","timestamp":1701234567892,"finishReason":"stop"}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}
+{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" there","timestamp":1701234567891}
+{"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567892,"finishReason":"stop"}
 ```
 
 ### Validating NDJSON
@@ -399,8 +399,9 @@ done
 
 1. **No Auto-Reconnect** - Must implement manually
 2. **No Browser API** - Can't use EventSource
-3. **No Completion Marker** - Must rely on connection close
-4. **Less Common** - SSE is more standard for streaming
+3. **Less Common** - SSE is more standard for streaming
+
+(Both transports rely on connection close after `RUN_FINISHED`; neither uses a `[DONE]` marker.)
 
 ---
 
diff --git a/docs/protocol/sse-protocol.md b/docs/protocol/sse-protocol.md
index 1fec1b74a..64fa9e0ac 100644
--- a/docs/protocol/sse-protocol.md
+++ b/docs/protocol/sse-protocol.md
@@ -19,7 +19,7 @@ Server-Sent Events (SSE) is a standard HTTP-based protocol for server-to-client
 - ✅ **Wide support** - Works in all modern browsers
 - ✅ **Efficient** - Single long-lived HTTP connection
 
-This document describes how TanStack AI transmits StreamChunks over Server-Sent Events (SSE), the recommended protocol for most use cases.
+This document describes how TanStack AI transmits [AG-UI events](./chunk-definitions) over Server-Sent Events (SSE), the recommended protocol for most use cases.
 
 ## Protocol Specification
 
@@ -32,17 +32,21 @@ This document describes how TanStack AI transmits StreamChunks over Server-Sent
 Content-Type: application/json
 ```
 
-**Body:**
+**Body:** The current `@tanstack/ai-client` POSTs an AG-UI `RunAgentInput` object — `threadId`, `runId`, `messages`, `tools`, `forwardedProps`, etc. The legacy `data` field is still emitted alongside `forwardedProps` as a deprecation bridge. See [Migrating to AG-UI Client-to-Server Compliance](../migration/ag-ui-compliance) for the full wire shape and migration tiers.
+
 ```json
 {
+  "threadId": "thread-abc",
+  "runId": "run-123",
   "messages": [
     {
       "role": "user",
       "content": "Hello, how are you?"
     }
   ],
-  "data": {
-    // Optional additional data
+  "tools": [],
+  "forwardedProps": {
+    // Optional client-supplied options
   }
 }
 ```
@@ -58,43 +62,50 @@ Cache-Control: no-cache
 Connection: keep-alive
 ```
 
-**Body:** Stream of SSE events
+**Body:** Stream of SSE events — each event is a single [AG-UI event](./chunk-definitions) JSON object.
 
 ---
 
 ## SSE Format
 
-Each StreamChunk is transmitted as an SSE event with the following format:
+Each [AG-UI event](./chunk-definitions) is transmitted as an SSE event with the following format:
 
 ```
-data: {JSON_ENCODED_CHUNK}\n\n
+data: {JSON_ENCODED_EVENT}\n\n
 ```
 
 ### Key Points
 
 1. **Each event starts with `data: `**
-2. **Followed by the JSON-encoded chunk**
+2. **Followed by the JSON-encoded AG-UI event**
 3. **Ends with double newline `\n\n`**
 4. **No event names or IDs** (not required for our use case)
 
 ### Examples
 
-#### Content Chunk
+#### Text Content
 
 ```
-data: {"type":"content","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567890,"delta":"Hello","content":"Hello","role":"assistant"}\n\n
+data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}\n\n
 ```
 
-#### Tool Call Chunk
+#### Tool Call
+
+A tool call streams as a `TOOL_CALL_START` → `TOOL_CALL_ARGS` → `TOOL_CALL_END` sequence, optionally followed by a `TOOL_CALL_RESULT` once the tool runs:
 
 ```
-data: {"type":"tool_call","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567891,"toolCall":{"id":"call_xyz","type":"function","function":{"name":"get_weather","arguments":"{\"location\":\"SF\"}"}},"index":0}\n\n
+data: {"type":"TOOL_CALL_START","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567891}\n\n
+data: {"type":"TOOL_CALL_ARGS","toolCallId":"call_xyz","delta":"{\"location\":\"SF\"}","timestamp":1701234567892}\n\n
+data: {"type":"TOOL_CALL_END","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567893}\n\n
+data: {"type":"TOOL_CALL_RESULT","messageId":"msg_2","toolCallId":"call_xyz","content":"{\"temperature\":72,\"conditions\":\"sunny\"}","timestamp":1701234567894}\n\n
 ```
 
-#### Done Chunk
+#### Run Completion
+
+`RUN_FINISHED` is the terminal event of a successful run:
 
 ```
-data: {"type":"done","id":"chatcmpl-abc123","model":"gpt-5.2","timestamp":1701234567892,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":5,"totalTokens":15}}\n\n
+data: {"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567895,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":5,"totalTokens":15}}\n\n
 ```
 
 ---
@@ -123,27 +134,23 @@ Cache-Control: no-cache
 Connection: keep-alive
 ```
 
-### 3. Server Streams Chunks
+### 3. Server Streams Events
 
-The server sends multiple `data:` events as chunks are generated:
+The server sends multiple `data:` events as the run progresses:
 
 ```
-data: {"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567890,"delta":"The","content":"The"}\n\n
-data: {"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567891,"delta":" weather","content":"The weather"}\n\n
-data: {"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567892,"delta":" is","content":"The weather is"}\n\n
-data: {"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567893,"delta":" sunny","content":"The weather is sunny"}\n\n
-data: {"type":"done","id":"msg_1","model":"gpt-5.2","timestamp":1701234567894,"finishReason":"stop"}\n\n
+data: {"type":"RUN_STARTED","runId":"run_123","timestamp":1701234567889}\n\n
+data: {"type":"TEXT_MESSAGE_START","messageId":"msg_1","role":"assistant","timestamp":1701234567890}\n\n
+data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"The","timestamp":1701234567890}\n\n
+data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" weather","timestamp":1701234567891}\n\n
+data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" is sunny","timestamp":1701234567892}\n\n
+data: {"type":"TEXT_MESSAGE_END","messageId":"msg_1","timestamp":1701234567893}\n\n
+data: {"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567894,"finishReason":"stop"}\n\n
 ```
 
 ### 4. Stream Completion
 
-After the final chunk, the server sends a completion marker:
-
-```
-data: [DONE]\n\n
-```
-
-Then closes the connection.
+`RUN_FINISHED` is the terminal event of a successful run. There is **no** `[DONE]` sentinel — after `RUN_FINISHED` the server simply closes the connection, and the client treats connection close as end-of-stream.
 
 ---
 
@@ -151,13 +158,13 @@ Then closes the connection.
 
 ### Server-Side Errors
 
-If an error occurs during generation, send an error chunk:
+If an error occurs during generation, TanStack AI's SSE helpers emit a `RUN_ERROR` event, then close the connection:
 
 ```
-data: {"type":"error","id":"msg_1","model":"gpt-5.2","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}\n\n
+data: {"type":"RUN_ERROR","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}\n\n
 ```
 
-Then close the connection.
+> **Canonical shape.** The AG-UI-canonical form carries `message` and `code` at the top level of the event. The wire emitter still nests them under `error` (shown above) as a backward-compatibility bridge; new consumers should prefer the top-level fields. See [Chunk Definitions → RUN_ERROR](./chunk-definitions#run_error).
 
 ### Connection Errors
 
@@ -193,10 +200,10 @@ export async function POST(request: Request) {
 
 **What `toServerSentEventsResponse()` does:**
 1. Creates a `ReadableStream` from the async iterable
-2. Wraps each chunk as `data: {JSON}\n\n`
-3. Sends `data: [DONE]\n\n` at the end
+2. Wraps each AG-UI event as `data: {JSON}\n\n`
+3. On the stream ending, closes the connection (no `[DONE]` marker — `RUN_FINISHED` is the terminal event)
 4. Sets proper SSE headers
-5. Handles errors and cleanup
+5. On a thrown error, emits a `RUN_ERROR` event and closes the connection
 
 ### Client-Side (Browser/Node.js)
 
@@ -211,12 +218,12 @@ const { messages, sendMessage } = useChat({
 ```
 
 **What `fetchServerSentEvents()` does:**
-1. Makes POST request with messages
-2. Reads response body as stream
+1. Makes a POST request with the AG-UI `RunAgentInput` body
+2. Reads the response body as a stream
 3. Parses SSE format (`data:` prefix)
-4. Deserializes JSON chunks
-5. Yields StreamChunk objects
-6. Stops on `[DONE]` marker
+4. Deserializes each line into an AG-UI event
+5. Yields `StreamChunk` (AG-UI event) objects
+6. Ends when the connection closes (after `RUN_FINISHED`)
 
 ### Manual Implementation (Advanced)
 
@@ -227,7 +234,7 @@ If you need custom handling:
 export async function POST(request: Request) {
   const { messages } = await request.json();
   const encoder = new TextEncoder();
-  
+
   const stream = new ReadableStream({
     async start(controller) {
       try {
@@ -235,14 +242,15 @@ export async function POST(request: Request) {
           const sseData = `data: ${JSON.stringify(chunk)}\n\n`;
           controller.enqueue(encoder.encode(sseData));
         }
-        controller.enqueue(encoder.encode('data: [DONE]\n\n'));
+        // No [DONE] marker — the stream's RUN_FINISHED event is terminal.
         controller.close();
       } catch (error) {
-        const errorChunk = {
-          type: 'error',
-          error: { message: error.message }
+        const errorEvent = {
+          type: 'RUN_ERROR',
+          timestamp: Date.now(),
+          error: { message: (error as Error).message },
         };
-        controller.enqueue(encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`));
+        controller.enqueue(encoder.encode(`data: ${JSON.stringify(errorEvent)}\n\n`));
         controller.close();
       }
     }
@@ -281,10 +289,10 @@ while (true) {
   for (const line of lines) {
     if (line.startsWith('data: ')) {
       const data = line.slice(6);
-      if (data === '[DONE]') continue;
-      
-      const chunk = JSON.parse(data);
-      // Handle chunk...
+
+      const event = JSON.parse(data);
+      // Handle the AG-UI event...
+      // (RUN_FINISHED signals the run is complete; the stream ends on close)
     }
   }
 }
@@ -312,15 +320,15 @@ The `-N` flag disables buffering to see real-time output.
 
 **Example Output:**
 ```
-data: {"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567890,"delta":"Hello","content":"Hello"}
-
-data: {"type":"content","id":"msg_1","model":"gpt-5.2","timestamp":1701234567891,"delta":" there","content":"Hello there"}
+data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}
 
-data: {"type":"done","id":"msg_1","model":"gpt-5.2","timestamp":1701234567892,"finishReason":"stop"}
+data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" there","timestamp":1701234567891}
 
-data: [DONE]
+data: {"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567892,"finishReason":"stop"}
 ```
 
+The connection closes after `RUN_FINISHED` — there is no `[DONE]` line.
+
 ---
 
 ## Advantages of SSE
@@ -346,8 +354,8 @@ data: [DONE]
 ## Best Practices
 
 1. **Always set proper headers** - `Content-Type`, `Cache-Control`, `Connection`
-2. **Send `[DONE]` marker** - Helps client know when to close
-3. **Handle errors gracefully** - Send error chunk before closing
+2. **Treat `RUN_FINISHED` as terminal** - There is no `[DONE]` marker; close the connection after it
+3. **Handle errors gracefully** - Emit a `RUN_ERROR` event before closing
 4. **Use compression** - Enable gzip/brotli at the reverse proxy level
 5. **Set timeouts** - Prevent hanging connections
 6. **Monitor connection count** - Watch for connection leaks
diff --git a/docs/structured-outputs/one-shot.md b/docs/structured-outputs/one-shot.md
index 9aee5db78..df9e00812 100644
--- a/docs/structured-outputs/one-shot.md
+++ b/docs/structured-outputs/one-shot.md
@@ -177,6 +177,77 @@ try {
 
 Provider-level errors (auth failure, rate limit, network) throw the same way — wrap the call in `try` / `catch` to handle both.
 
+## Consuming the result on the client
+
+The `await chat({ outputSchema })` call above returns a `Promise<T>` — ideal for a server route, a script, or a CLI. There are two ways that typed object reaches a browser.
+
+### As plain JSON (no hook)
+
+If the client only needs the finished object and you don't want progressive UI, resolve the promise on the server and return it as JSON. The browser fetches it like any other endpoint — no TanStack client API, no `partial` / `final`:
+
+```typescript
+// server route
+export async function POST(request: Request) {
+  const { text } = await request.json();
+  const person = await chat({
+    adapter: openaiText("gpt-5.2"),
+    messages: [{ role: "user", content: `Extract the person info: ${text}` }],
+    outputSchema: PersonSchema,
+  });
+  return Response.json(person); // typed object → JSON
+}
+```
+
+```typescript
+// client
+const res = await fetch("/api/extract-person", {
+  method: "POST",
+  body: JSON.stringify({ text }),
+});
+const person = (await res.json()) as z.infer<typeof PersonSchema>;
+```
+
+This is the most literal one-shot shape: one request, one object back. You own the fetch and the typing; the hook isn't involved.
+
+### With `useChat` — typed `final` (and optional `partial`)
+
+When you want the hook ergonomics — managed `isLoading` state, a schema-typed result, optional field-by-field fill — read `final` off `useChat({ outputSchema })`. Because `useChat` consumes a stream, the server switches to the streaming shape (`stream: true` + `toServerSentEventsResponse`); the client still treats it as "one object, when it's ready":
+
+```tsx
+import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
+
+function PersonExtractor() {
+  // `final` is `z.infer<typeof PersonSchema> | null`.
+  // `partial` is `DeepPartial<z.infer<typeof PersonSchema>>`.
+  const { sendMessage, isLoading, final, partial } = useChat({
+    connection: fetchServerSentEvents("/api/extract-person"),
+    outputSchema: PersonSchema,
+  });
+
+  return (
+    <div>
+      <button
+        disabled={isLoading}
+        onClick={() => sendMessage("Extract: John Doe, 30, john@example.com")}
+      >
+        Extract
+      </button>
+
+      {/* One-shot UI: just render the validated object when it lands. */}
+      {final && <PersonCard person={final} />}
+    </div>
+  );
+}
+```
+
+- **`final`** — `T | null`. The validated terminal object, populated when the run completes. For a one-shot UI, render off `final` and you're done.
+- **`partial`** — `DeepPartial<T>`. The same object filling in field by field as the JSON streams. Ignore it if you only care about the finished result; reach for it when you want a progressive form. The [Streaming UIs](./streaming) guide covers that pattern in depth.
+- **The schema on `useChat`** is for client-side TypeScript inference (and progressive parsing of `partial`). Validation still runs on the server against the schema you pass to `chat({ outputSchema })`.
+
+For non-streaming adapters (Anthropic, Gemini, Ollama), the object arrives as a single event — `partial` stays `{}` and `final` snaps in one step. The consumer code above is identical regardless of adapter.
+
+> Want the result to fill in field-by-field, or to keep a history of objects across turns? Those are the [Streaming UIs](./streaming) and [Multi-Turn Chat](./multi-turn) journeys — both build on this same `useChat({ outputSchema })` surface.
+
 ## Best Practices
 
 1. **Use descriptive field names and descriptions.** The model uses them as hints.
diff --git a/docs/structured-outputs/overview.md b/docs/structured-outputs/overview.md
index e53cbc203..dddec66ad 100644
--- a/docs/structured-outputs/overview.md
+++ b/docs/structured-outputs/overview.md
@@ -66,7 +66,7 @@ Pick the journey that matches what you're building. The four guides under "Struc
 
 | You want to… | Read |
 |---|---|
-| Extract one structured object from a single prompt (script, server endpoint, CLI) | [One-Shot Extraction](./one-shot) |
+| Extract one structured object from a single prompt — and consume it server-side (script, endpoint, CLI) or in a browser via `final` | [One-Shot Extraction](./one-shot) |
 | Build a UI that fills in field-by-field as the model streams (progressive form, live card, typewriter preview) | [Streaming UIs](./streaming) |
 | Let users iterate on a structured object across multiple turns — each turn produces a new typed object and history stays renderable | [Multi-Turn Chat](./multi-turn) |
 | Combine structured output with tool calls (agent loop that runs tools first, then returns a typed object) | [With Tools](./with-tools) |
diff --git a/docs/tools/client-tools.md b/docs/tools/client-tools.md
index 34df9e71f..84cd4f626 100644
--- a/docs/tools/client-tools.md
+++ b/docs/tools/client-tools.md
@@ -101,7 +101,7 @@ To give the LLM access to client tools, pass the tool definitions (not implement
 
 ```typescript
 // api/chat/route.ts
-import { chat, toServerSentEventsStream } from "@tanstack/ai";
+import { chat, toServerSentEventsResponse } from "@tanstack/ai";
 import { openaiText } from "@tanstack/ai-openai";
 import { updateUIDef, saveToLocalStorageDef } from "@/tools/definitions";
 
@@ -114,7 +114,7 @@ export async function POST(request: Request) {
     tools: [updateUIDef, saveToLocalStorageDef], // Pass definitions
   });
 
-  return toServerSentEventsStream(stream);
+  return toServerSentEventsResponse(stream);
 }
 ```
 
@@ -128,37 +128,35 @@ import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
 import { 
   clientTools, 
   createChatClientOptions, 
-  type InferChatMessages 
+  type InferChatMessages,
+  type ToolCallPart,
 } from "@tanstack/ai-client";
 import { updateUIDef, saveToLocalStorageDef } from "@/tools/definitions";
-import { useState } from "react";
 
-function ChatComponent() {
-  const [notification, setNotification] = useState(null);
-
-  // Step 1: Create client implementations
-  const updateUI = updateUIDef.client((input) => {
-    // Update React state - fully typed!
-    setNotification({ message: input.message, type: input.type });
-    return { success: true };
-  });
+// Step 1: Create client implementations (module scope)
+const updateUI = updateUIDef.client((input) => {
+  // Update UI state - fully typed!
+  showNotification({ message: input.message, type: input.type });
+  return { success: true };
+});
 
-  const saveToLocalStorage = saveToLocalStorageDef.client((input) => {
-    localStorage.setItem(input.key, input.value);
-    return { saved: true };
-  });
+const saveToLocalStorage = saveToLocalStorageDef.client((input) => {
+  localStorage.setItem(input.key, input.value);
+  return { saved: true };
+});
 
-  // Step 2: Create typed tools array (no 'as const' needed!)
-  const tools = clientTools(updateUI, saveToLocalStorage);
+// Step 2: Create typed tools array (no 'as const' needed!)
+const tools = clientTools(updateUI, saveToLocalStorage);
 
-  const chatOptions = createChatClientOptions({
-    connection: fetchServerSentEvents("/api/chat"),
-    tools,
-  });
+const chatOptions = createChatClientOptions({
+  connection: fetchServerSentEvents("/api/chat"),
+  tools,
+});
 
-  // Step 3: Infer message types for full type safety
-  type ChatMessages = InferChatMessages<typeof chatOptions>;
+// Step 3: Infer message types for full type safety
+type ChatMessages = InferChatMessages<typeof chatOptions>;
 
+function ChatComponent() {
   const { messages, sendMessage, isLoading } = useChat(chatOptions);
 
   // Step 4: Render with full type safety
@@ -167,11 +165,6 @@ function ChatComponent() {
       {messages.map((message) => (
         <MessageComponent key={message.id} message={message} />
       ))}
-      {notification && (
-        <div className={`notification ${notification.type}`}>
-          {notification.message}
-        </div>
-      )}
     </div>
   );
 }
@@ -273,18 +266,20 @@ messages.forEach((message) => {
 ```
 
 ## Tool States
-Client tools go through a small set of observable lifecycle states you can surface in the UI to indicate progress:
+
+A `tool-call` part moves through a small set of observable `ToolCallState` values you can surface in the UI to indicate progress:
 
 - `awaiting-input` — the model intends to call the tool but arguments haven't arrived yet.
 - `input-streaming` — the model is streaming the tool arguments (partial input may be available).
 - `input-complete` — all arguments have been received and the tool can run.
 - `approval-requested` — the tool is waiting for user approval before it can run.
 - `approval-responded` — the user has approved or denied the tool call.
-- `complete` — the tool finished; `part.output` contains the result (or error details).
 
-Use these states to show loading indicators, streaming progress, and final success/error feedback. The example below maps each state to a simple UI message.
+The `ToolCallState` union includes a `complete` value, but the runtime never transitions a tool-call part to it — a finished call settles at `input-complete`. Once the tool runs, the result appears two ways: `part.output` becomes populated on the tool-call part, and a sibling `tool-result` part is emitted whose own `state` is `complete` or `error` (the `error` case carries `part.error`). Use the tool-call states for loading/streaming progress and the tool-result part for final success/error feedback.
 
 ```typescript
+import type { ToolCallPart } from "@tanstack/ai-client";
+
 function ToolCallDisplay({ part }: { part: ToolCallPart }) {
   if (part.state === "awaiting-input") {
     return <div>🔄 Waiting for arguments...</div>;
@@ -298,7 +293,9 @@ function ToolCallDisplay({ part }: { part: ToolCallPart }) {
     return <div>✓ Arguments received, running tool...</div>;
   }
 
-  if (part.state === "complete") {
+  // Completion shows up as a populated `part.output` (and as a sibling
+  // `tool-result` part whose state is `complete` / `error`).
+  if (part.output) {
     return <div>✅ Tool complete</div>;
   }
   
diff --git a/docs/tools/server-tools.md b/docs/tools/server-tools.md
index 5333ffbcd..f56e0562e 100644
--- a/docs/tools/server-tools.md
+++ b/docs/tools/server-tools.md
@@ -47,16 +47,17 @@ sequenceDiagram
    - Added to the conversation history
 5. **Continuation**: The chat continues with the tool result, allowing the LLM to generate a response based on the result
   
-## Automatic vs. Manual Execution
+## Automatic Execution and Approval Pauses
 
 **Automatic (Default):**
 - Server tools with an `execute` function run automatically
 - Results are added to the conversation immediately
 - No client-side handling required
 
-**Manual (Advanced):**
-- You can handle tool calls manually by intercepting the stream
-- Useful for custom orchestration or approval flows
+**Approval-gated:**
+- Tools marked `needsApproval: true` still execute automatically — but only *after* the user approves
+- The run pauses at the `approval-requested` state and resumes (executing the tool, or skipping it on denial) once the client sends an approval response
+- See [Tool Approval Flow](./tool-approval) for the full pattern
 
 ## Server Tool Definition
 
@@ -171,7 +172,7 @@ export async function POST(request: Request) {
 Server tools can receive typed runtime context as their second argument. Use this for request-scoped dependencies like authenticated users, database clients, tenant IDs, or audit loggers.
 
 ```typescript
-import { chat, toolDefinition } from "@tanstack/ai";
+import { chat, toolDefinition, toServerSentEventsResponse } from "@tanstack/ai";
 import { openaiText } from "@tanstack/ai-openai";
 import { z } from "zod";
 
@@ -199,15 +200,25 @@ const getCurrentUser = toolDefinition({
   return { name: user?.name ?? null };
 });
 
-chat({
-  adapter: openaiText("gpt-5.2"),
-  messages,
-  tools: [getCurrentUser],
-  context: {
-    userId: session.user.id,
-    db,
-  },
-});
+export async function POST(request: Request) {
+  const { messages } = await request.json();
+  // `session` and `db` come from your own app setup (auth middleware,
+  // a DB client, etc.) — they are not provided by TanStack AI.
+  const session = await getSession(request);
+  const db = getDb();
+
+  const stream = chat({
+    adapter: openaiText("gpt-5.2"),
+    messages,
+    tools: [getCurrentUser],
+    context: {
+      userId: session.user.id,
+      db,
+    },
+  });
+
+  return toServerSentEventsResponse(stream);
+}
 ```
 
 If a server tool declares a context generic, `chat()` requires a compatible `context` value. Untyped tools keep working and receive `unknown` context.
@@ -311,6 +322,8 @@ const getUserData = getUserDataDef.server(async ({ userId }) => {
 });
 ```
 
+**Throwing vs. returning an error:** if your `.server()` function throws, the SDK catches it and surfaces it as a tool-result *error* (the model sees the failure but you lose control over the message). Returning a structured `{ error }` shape keeps the model in control of how to recover and is usually preferable. Either way, when an `outputSchema` is defined the returned value is validated against it (Zod) before being added to the conversation — so include the `error` field in your `outputSchema` if you return it.
+
 ## Using JSON Schema
 
 If you have existing JSON Schema definitions or prefer not to use Zod, you can define tool schemas using raw JSON Schema objects:
diff --git a/docs/tools/tool-approval.md b/docs/tools/tool-approval.md
index b92279b4a..b73c7c69a 100644
--- a/docs/tools/tool-approval.md
+++ b/docs/tools/tool-approval.md
@@ -20,7 +20,8 @@ The tool approval flow allows you to require user approval before executing sens
 3. **`input-complete`** — All arguments received
 4. **`approval-requested`** — Waiting for user approval (only if `needsApproval: true`)
 5. **`approval-responded`** — User approved or denied
-6. **`complete`** — Tool finished executing (result available, or denial recorded)
+
+After `approval-responded` the call executes (if approved). Although `complete` exists in the `ToolCallState` union, the runtime never transitions the tool-call part to it — the result surfaces as a populated `part.output` plus a sibling `tool-result` part whose own state is `complete` or `error`.
 
 When a tool requires approval, the typical flow is:
 
@@ -148,7 +149,17 @@ function ChatComponent() {
 Here's a more complete approval UI component:
 
 ```typescript
-function ApprovalPrompt({ part, onApprove, onDeny }) {
+import type { ToolCallPart } from "@tanstack/ai-client";
+
+function ApprovalPrompt({
+  part,
+  onApprove,
+  onDeny,
+}: {
+  part: ToolCallPart;
+  onApprove: () => void;
+  onDeny: () => void;
+}) {
   // When tools are passed via `clientTools(...)`, `part.input` is the
   // parsed, fully-typed argument object. Otherwise parse `part.arguments`.
   const args = part.input ?? JSON.parse(part.arguments);
@@ -182,11 +193,34 @@ function ApprovalPrompt({ part, onApprove, onDeny }) {
 }
 ```
 
+Wire it up from your message renderer. Note the `id` you pass is the **approval id** (`part.approval.id`), not the tool call id:
+
+```typescript
+{part.type === "tool-call" &&
+  part.state === "approval-requested" &&
+  part.approval && (
+    <ApprovalPrompt
+      part={part}
+      onApprove={() =>
+        addToolApprovalResponse({ id: part.approval!.id, approved: true })
+      }
+      onDeny={() =>
+        addToolApprovalResponse({ id: part.approval!.id, approved: false })
+      }
+    />
+  )}
+```
+
 ## Client Tools with Approval
 
 Client tools can also require approval:
 
 ```typescript
+import { toolDefinition } from "@tanstack/ai";
+import { z } from "zod";
+import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
+import { clientTools } from "@tanstack/ai-client";
+
 // tools/definitions.ts
 const deleteLocalDataDef = toolDefinition({
   name: "delete_local_data",
@@ -209,7 +243,10 @@ const deleteLocalData = deleteLocalDataDef.client((input) => {
 
 const { messages, addToolApprovalResponse } = useChat({
   connection: fetchServerSentEvents("/api/chat"),
-  tools: [deleteLocalData], // Automatic execution after approval
+  // Wrap client tools in `clientTools(...)` so literal tool-name inference is
+  // preserved — this is what lets `part.name === "delete_local_data"` narrow
+  // `part.input` / `part.output` to this tool's types.
+  tools: clientTools(deleteLocalData), // Automatic execution after approval
 });
 ```
 
diff --git a/docs/tools/tool-architecture.md b/docs/tools/tool-architecture.md
index 1ca50afbc..26a0caef8 100644
--- a/docs/tools/tool-architecture.md
+++ b/docs/tools/tool-architecture.md
@@ -99,9 +99,11 @@ export async function POST(request: Request) {
 **Client (React Component):**
 
 ```typescript
+import { useState } from "react";
 import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
 
 function ChatComponent() {
+  const [input, setInput] = useState("");
   const { messages, sendMessage, isLoading } = useChat({
     connection: fetchServerSentEvents("/api/chat"),
   });
@@ -111,7 +113,18 @@ function ChatComponent() {
       {messages.map((message) => (
         <div key={message.id}>{/* Render message */}</div>
       ))}
-      <input onSubmit={(e) => sendMessage(e.target.value)} />
+      <form
+        onSubmit={(e) => {
+          e.preventDefault();
+          sendMessage(input);
+          setInput("");
+        }}
+      >
+        <input value={input} onChange={(e) => setInput(e.target.value)} />
+        <button type="submit" disabled={isLoading}>
+          Send
+        </button>
+      </form>
     </div>
   );
 }
@@ -121,20 +134,30 @@ function ChatComponent() {
 
 Tools progress through different states during their lifecycle. Understanding these states helps build robust UIs and debug tool execution.
 
+> **Two parts, two state sets — this page is the canonical reference.** Call states (`awaiting-input`, `input-streaming`, `input-complete`, `approval-requested`, `approval-responded`) live on the **`tool-call`** part as `part.state`. There is no `complete`/`error`/`executing`/`cancelled` value on the call part. The *result* lives on a separate sibling **`tool-result`** part whose own `state` is `streaming`, `complete`, or `error`; the resolved value is also mirrored onto the call part's `part.output`.
+
+The diagram below is conceptual: the nodes after `approval-responded` (executing, success, error, cancelled) are **not** `ToolCallState` values — they correspond to the sibling `tool-result` part's state (`complete` / `error`) and the call part's `output` field.
+
 ```mermaid
 stateDiagram-v2
-    [*] --> AwaitingInput: tool_call received
-    AwaitingInput --> InputStreaming: partial arguments
-    InputStreaming --> InputComplete: all arguments received
-    InputComplete --> ApprovalRequested: needsApproval=true
-    InputComplete --> Executing: needsApproval=false
-    ApprovalRequested --> Executing: user approves
-    ApprovalRequested --> Cancelled: user denies
-    Executing --> OutputAvailable: success
-    Executing --> OutputError: error
-    OutputAvailable --> [*]
-    OutputError --> [*]
-    Cancelled --> [*]
+    state "tool-call part (ToolCallState)" as Call {
+        [*] --> AwaitingInput: tool_call received
+        AwaitingInput --> InputStreaming: partial arguments
+        InputStreaming --> InputComplete: all arguments received
+        InputComplete --> ApprovalRequested: needsApproval=true
+        ApprovalRequested --> ApprovalResponded: user approves / denies
+    }
+    InputComplete --> ResultComplete: needsApproval=false, success
+    ApprovalResponded --> ResultComplete: approved + success (output set)
+    ApprovalResponded --> ResultError: approved + error
+    ApprovalResponded --> Denied: user denied (no execution)
+    state "tool-result part" as Results {
+        ResultComplete: complete
+        ResultError: error
+    }
+    ResultComplete --> [*]
+    ResultError --> [*]
+    Denied --> [*]
 ```
 
 ### Call States
@@ -158,10 +181,19 @@ stateDiagram-v2
 ### Monitoring Tool States in React
 
 ```typescript
+import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
+import { clientTools, createChatClientOptions } from "@tanstack/ai-client";
+import { getWeather, sendEmail } from "./tools";
+
+// Wiring `tools` is what lets `part.name` / `part.input` / `part.output`
+// narrow to each tool's types below.
+const chatOptions = createChatClientOptions({
+  connection: fetchServerSentEvents("/api/chat"),
+  tools: clientTools(getWeather, sendEmail),
+});
+
 function ChatComponent() {
-  const { messages } = useChat({
-    connection: fetchServerSentEvents("/api/chat"),
-  });
+  const { messages } = useChat(chatOptions);
 
   return (
     <div>
@@ -258,32 +290,34 @@ const { messages, addToolApprovalResponse } = useChat({
   connection: fetchServerSentEvents("/api/chat"),
 });
 
-// In your render:
-{part.state === "approval-requested" && (
-  <div>
-    <p>Approve sending email to {part.input.to}?</p>
-    <button
-      onClick={() =>
-        addToolApprovalResponse({
-          id: part.approval.id,
-          approved: true,
-        })
-      }
-    >
-      Approve
-    </button>
-    <button
-      onClick={() =>
-        addToolApprovalResponse({
-          id: part.approval.id,
-          approved: false,
-        })
-      }
-    >
-      Deny
-    </button>
-  </div>
-)}
+// In your render (guard `type` and `approval` so `part.approval.id` is safe):
+{part.type === "tool-call" &&
+  part.state === "approval-requested" &&
+  part.approval && (
+    <div>
+      <p>Approve sending email to {part.input.to}?</p>
+      <button
+        onClick={() =>
+          addToolApprovalResponse({
+            id: part.approval.id,
+            approved: true,
+          })
+        }
+      >
+        Approve
+      </button>
+      <button
+        onClick={() =>
+          addToolApprovalResponse({
+            id: part.approval.id,
+            approved: false,
+          })
+        }
+      >
+        Deny
+      </button>
+    </div>
+  )}
 ```
 
 ### Hybrid Tools (Server + Client)
diff --git a/docs/tools/tools.md b/docs/tools/tools.md
index 3d4ffdd56..a4b9718b6 100644
--- a/docs/tools/tools.md
+++ b/docs/tools/tools.md
@@ -289,14 +289,21 @@ const addToCartClient = addToCartDef.client((input) => {
 });
 ```
 
-On the server, pass the definition (for client execution) or server implementation:
+On the server, pass either the definition (for client execution) or the server implementation — in separate `chat()` calls:
 
 ```typescript
+// Pass the definition: the client will execute the tool
 chat({
   adapter: openaiText("gpt-5.2"),
   messages,
-  tools: [addToCartDef], // Client will execute, or
-  tools: [addToCartServer], // Server will execute
+  tools: [addToCartDef],
+});
+
+// Or pass the server implementation: the server will execute the tool
+chat({
+  adapter: openaiText("gpt-5.2"),
+  messages,
+  tools: [addToCartServer],
 });
 ```
 
@@ -329,6 +336,25 @@ messages.forEach((message) => {
 4. **Result is returned** - To the model as a tool result message
 5. **Model continues** - Uses the result to generate a response
 
+## Progress Events and Runtime Context
+
+A server tool's `.server()` implementation receives a second argument, the `ToolExecutionContext` — `{ context, toolCallId, emitCustomEvent }`. Use `emitCustomEvent` to stream typed progress to the client while the tool runs, and `context` to read request-scoped dependencies (auth, DB clients, etc.):
+
+```typescript
+const importData = importDataDef.server(async (input, { context, emitCustomEvent }) => {
+  emitCustomEvent("progress", { step: 1, total: 3 });
+  const rows = await context.db.read(input.source);
+
+  emitCustomEvent("progress", { step: 2, total: 3 });
+  await context.db.write(rows);
+
+  emitCustomEvent("progress", { step: 3, total: 3 });
+  return { imported: rows.length };
+});
+```
+
+See [Server Tools](./server-tools) for the full runtime-context pattern.
+
 ## Tool States
 
 Tools go through different states during execution:
@@ -339,6 +365,8 @@ Tools go through different states during execution:
 - `approval-requested` - Tool requires user approval (if `needsApproval: true`)
 - `approval-responded` - User has approved/denied
 
+Once arguments (and approval, if required) are in, the result appears as `part.output` on the tool-call part and as a separate sibling `tool-result` part whose `state` is `complete` or `error`. See [Tool Architecture](./tool-architecture) for the full state model.
+
 > **Tip:** If your use case involves calling multiple tools with complex logic (filtering, aggregation, parallel calls), consider [Code Mode](../code-mode/code-mode) — it lets the LLM write a TypeScript program that orchestrates tools in a single execution instead of one tool call at a time.
 
 ## Next Steps

From e6b85820a48be00964adbe09dcd5322cd823b1a2 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 12:29:35 +0200
Subject: [PATCH 2/7] docs: address CR-round findings (correctness + latest
 models)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A 7+1-agent confirmation review of the docs PR surfaced further
source-accuracy issues (and caught one regression the first fix pass
introduced). All verified against packages/ source:

- tools/server-tools: JSON-schema tool input is `unknown` (not `any`);
  samples now narrow/cast args.
- thinking-content: drop the adaptive-thinking / output_config.effort
  example — those option types are not wired into any model's typed
  modelOptions; document the `{ type: 'enabled', budget_tokens }` form.
- multimodal-content: correct the Anthropic modality bullets (no
  `claude-3*` ids; Claude Haiku 3 supports documents).
- comparison: fix the ImagePart (`source: { type:'url', url }`) and
  TextPart (`content`) shapes in the flagship example.
- chunk-definitions: RUN_STARTED/RUN_FINISHED `threadId` is required;
  add REASONING_MESSAGE_CHUNK to the internal-members note.
- media: createOpenaiVideo needs a model arg; video `seconds` is a
  string union; transcription `responseFormat`/`prompt` are top-level
  (not modelOptions); drop the non-existent gpt-4o-mini-audio-preview
  TTS model; add the Audio row to the generations table.
- advanced: typed-options gpt-image-1 size must be a GptImageSize.
- observability: aiEventClient imports from @tanstack/ai-event-client
  (the @tanstack/ai/event-client subpath does not exist).
- adapters: revert claude-haiku-3 -> claude-3-haiku (the id passed to
  anthropicText); clarify max_tokens auto-adjust; @elevenlabs/client
  (not @11labs/client); elevenlabs agentId optional, debug is DebugOption.
- structured-outputs: Standard JSON Schema /json-schema link; Zod v4.2+.

Model ids touched in these fixes use the latest per provider from
model-meta.ts (gpt-5.5, claude-sonnet-4-6, etc.).
---
 docs/adapters/anthropic.md          |  4 ++--
 docs/adapters/elevenlabs.md         |  6 +++---
 docs/advanced/multimodal-content.md |  5 +++--
 docs/advanced/observability.md      |  4 ++--
 docs/advanced/typed-options.md      |  2 +-
 docs/chat/thinking-content.md       | 21 +++------------------
 docs/comparison/vercel-ai-sdk.md    |  8 ++++----
 docs/media/generations.md           |  1 +
 docs/media/text-to-speech.md        |  3 +--
 docs/media/transcription.md         | 14 +++++++-------
 docs/media/video-generation.md      |  4 ++--
 docs/protocol/chunk-definitions.md  |  8 +++++---
 docs/structured-outputs/one-shot.md |  2 +-
 docs/structured-outputs/overview.md |  2 +-
 docs/tools/client-tools.md          |  2 +-
 docs/tools/server-tools.md          |  5 +++--
 docs/tools/tools.md                 |  6 +++---
 17 files changed, 43 insertions(+), 54 deletions(-)

diff --git a/docs/adapters/anthropic.md b/docs/adapters/anthropic.md
index b6923db0b..59d3c9f47 100644
--- a/docs/adapters/anthropic.md
+++ b/docs/adapters/anthropic.md
@@ -140,7 +140,7 @@ modelOptions: {
 }
 ```
 
-**Note:** `max_tokens` must be greater than `budget_tokens`. The adapter automatically adjusts `max_tokens` if needed.
+**Note:** `max_tokens` must be greater than `budget_tokens`. The adapter automatically raises the top-level `maxTokens` when your `budget_tokens` meets or exceeds it. If you instead set `max_tokens` inside `modelOptions`, keep it greater than `budget_tokens` yourself — that path is validated and throws rather than auto-adjusting.
 
 ### Prompt Caching
 
@@ -254,7 +254,7 @@ const stream = chat({
 });
 ```
 
-**Supported models:** every current Claude model. `claude-haiku-3` supports
+**Supported models:** every current Claude model. `claude-3-haiku` supports
 only `web_search` (not `web_fetch`). See [Provider Tools](../tools/provider-tools.md#which-models-support-which-tools).
 
 ### `webFetchTool`
diff --git a/docs/adapters/elevenlabs.md b/docs/adapters/elevenlabs.md
index ed9e8f605..dab794aa5 100644
--- a/docs/adapters/elevenlabs.md
+++ b/docs/adapters/elevenlabs.md
@@ -194,7 +194,7 @@ Used on the **server** to generate a signed WebSocket URL.
 
 | Option | Type | Required | Description |
 |--------|------|----------|-------------|
-| `agentId` | `string` | Yes | Agent ID configured in the ElevenLabs dashboard |
+| `agentId` | `string` | No\* | Agent ID configured in the ElevenLabs dashboard. \*Falls back to `ELEVENLABS_AGENT_ID`; required only if that env var is unset |
 | `overrides.voiceId` | `string` | No | Custom voice ID to override the agent's default voice |
 | `overrides.systemPrompt` | `string` | No | Custom system prompt to override the agent's default |
 | `overrides.firstMessage` | `string` | No | First message the agent speaks when the session starts |
@@ -207,7 +207,7 @@ Used on the **client** to establish the connection.
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `connectionMode` | `'websocket' \| 'webrtc'` | auto-detect | Transport protocol for the connection |
-| `debug` | `boolean` | `false` | Enable debug logging |
+| `debug` | `boolean \| DebugConfig` | `false` | Enable debug logging — pass `true` for all categories, or a `DebugConfig` to select categories/sink |
 
 ## Differences from OpenAI Realtime
 
@@ -218,7 +218,7 @@ ElevenLabs and OpenAI take different approaches to realtime voice:
 | **Configuration** | Agent-based. Configure voice, personality, and knowledge in the ElevenLabs dashboard or via `overrides` at token time. | Session-based. Configure `instructions`, `voice`, `temperature`, etc. per session via `useRealtimeChat` options. |
 | **Token type** | Signed WebSocket URL (valid 30 minutes) | Ephemeral API token (valid ~10 minutes) |
 | **Transport** | WebSocket (default) or WebRTC | WebRTC |
-| **Audio handling** | `@11labs/client` SDK manages audio capture and playback automatically | TanStack AI manages WebRTC peer connection and audio tracks |
+| **Audio handling** | `@elevenlabs/client` SDK manages audio capture and playback automatically | TanStack AI manages WebRTC peer connection and audio tracks |
 | **VAD** | Handled by ElevenLabs server-side | Supports `server`, `semantic`, and `manual` modes |
 | **Runtime updates** | Session config is set at creation time and cannot be changed mid-session | Supports `updateSession()` for mid-session config changes |
 | **Image input** | Not supported | Supported via `sendImage()` |
diff --git a/docs/advanced/multimodal-content.md b/docs/advanced/multimodal-content.md
index a712151e1..8d6753f30 100644
--- a/docs/advanced/multimodal-content.md
+++ b/docs/advanced/multimodal-content.md
@@ -154,8 +154,9 @@ const docMessage = {
 ```
 
 **Supported modalities:**
-- Claude 3 models: text, image
-- Claude 3.5 models: text, image, document (PDF)
+- Most Claude models (e.g. `claude-haiku-3`, `claude-haiku-4-5`, `claude-sonnet-4-6`, `claude-opus-4.8`): text, image, and document (PDF)
+
+Check each model's `supports.input` in `@tanstack/ai-anthropic`'s `model-meta.ts` for the authoritative per-model list.
 
 ### Gemini
 
diff --git a/docs/advanced/observability.md b/docs/advanced/observability.md
index 8eb993f87..a036edd59 100644
--- a/docs/advanced/observability.md
+++ b/docs/advanced/observability.md
@@ -53,7 +53,7 @@ subscribe on the server/client respectfully.
 
 Here is an example for the server:
 ```ts
-import { aiEventClient } from "@tanstack/ai/event-client";
+import { aiEventClient } from "@tanstack/ai-event-client";
 
 // server.ts file or wherever the root of your server is
 aiEventClient.on("text:request:started", e => {
@@ -70,7 +70,7 @@ Listening on the client is the same approach, just subscribe to the events:
 
 ```tsx
 // App.tsx
-import { aiEventClient } from "@tanstack/ai/event-client";
+import { aiEventClient } from "@tanstack/ai-event-client";
 
 const App = () => {
   useEffect(() => {
diff --git a/docs/advanced/typed-options.md b/docs/advanced/typed-options.md
index 000187dd7..e75ffd788 100644
--- a/docs/advanced/typed-options.md
+++ b/docs/advanced/typed-options.md
@@ -138,7 +138,7 @@ import { openaiImage } from '@tanstack/ai-openai'
 
 const heroImageOptions = createImageOptions({
   adapter: openaiImage('gpt-image-1'),
-  size: '1792x1024',
+  size: '1536x1024',
   numberOfImages: 1,
 })
 
diff --git a/docs/chat/thinking-content.md b/docs/chat/thinking-content.md
index de39e2a6e..e6fb8acf9 100644
--- a/docs/chat/thinking-content.md
+++ b/docs/chat/thinking-content.md
@@ -39,38 +39,23 @@ How you enable thinking depends on the provider.
 
 ### Anthropic (Extended Thinking)
 
-Pass the `thinking` option in `modelOptions`. You must specify `budget_tokens` (minimum 1024). Validation also requires `budget_tokens` to be **less than** `maxTokens`, so set `maxTokens` high enough to leave room for the visible response:
+Pass the `thinking` option in `modelOptions` with `type: "enabled"` and a `budget_tokens` (minimum 1024). If `budget_tokens` is not below `maxTokens`, the adapter automatically raises `maxTokens` so there is room for the visible response in addition to the thinking budget:
 
 ```typescript
 import { chat } from "@tanstack/ai";
 import { anthropicText } from "@tanstack/ai-anthropic";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages,
   maxTokens: 32000,
   modelOptions: {
-    // budget_tokens must satisfy 1024 <= budget_tokens < maxTokens
+    // budget_tokens must be at least 1024
     thinking: { type: "enabled", budget_tokens: 10000 },
   },
 });
 ```
 
-For Claude Opus 4.6 and later, you can use adaptive thinking, where the model decides how much to think. On these models you pair `thinking: { type: "adaptive" }` with a top-level `effort`:
-
-```typescript
-const stream = chat({
-  adapter: anthropicText("claude-opus-4-6"),
-  messages,
-  modelOptions: {
-    thinking: { type: "adaptive" },
-    effort: "high", // 'max' | 'high' | 'medium' | 'low'
-  },
-});
-```
-
-> **Claude 4.7+:** Adaptive effort moved under `modelOptions: { output_config: { effort: "high" } }`, and `thinking: { type: "enabled", budget_tokens }` is deprecated in favor of `thinking: { type: "adaptive" }` there. The top-level `effort` field shown above is the Opus 4.6 form; earlier models continue to accept the legacy top-level `effort` / `thinking.type: "enabled"` shape.
-
 ### OpenAI (Reasoning Models)
 
 OpenAI o-series models (o1, o3, o3-mini, o3-pro) perform reasoning automatically. You can control the depth with the `reasoning` option:
diff --git a/docs/comparison/vercel-ai-sdk.md b/docs/comparison/vercel-ai-sdk.md
index 4de745f86..bba82a2d8 100644
--- a/docs/comparison/vercel-ai-sdk.md
+++ b/docs/comparison/vercel-ai-sdk.md
@@ -63,14 +63,14 @@ Each provider adapter contains a comprehensive `model-meta.ts` that maps every m
 import { chat } from '@tanstack/ai'
 import { openaiText } from '@tanstack/ai-openai'
 
-// TypeScript knows gpt-5.2 supports text + image input
+// TypeScript knows gpt-5.5 supports text + image input
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages: [{
     role: 'user',
     content: [
-      { type: 'text', text: 'What is in this image?' },
-      { type: 'image', url: 'https://example.com/photo.jpg' },
+      { type: 'text', content: 'What is in this image?' },
+      { type: 'image', source: { type: 'url', url: 'https://example.com/photo.jpg' } },
     ],
   }],
 })
diff --git a/docs/media/generations.md b/docs/media/generations.md
index 69038d12b..f4e8627fc 100644
--- a/docs/media/generations.md
+++ b/docs/media/generations.md
@@ -249,6 +249,7 @@ const { result } = useGenerateSpeech({
 | Activity | Server Function | Client Hook (React) | Guide |
 |----------|----------------|---------------------|-------|
 | Image generation | `generateImage()` | `useGenerateImage()` | [Image Generation](./image-generation) |
+| Audio generation | `generateAudio()` | `useGenerateAudio()` | [Audio Generation](./audio-generation) |
 | Text-to-speech | `generateSpeech()` | `useGenerateSpeech()` | [Text-to-Speech](./text-to-speech) |
 | Transcription | `generateTranscription()` | `useTranscription()` | [Transcription](./transcription) |
 | Summarization | `summarize()` | `useSummarize()` | - |
diff --git a/docs/media/text-to-speech.md b/docs/media/text-to-speech.md
index 1fdb0b1f5..f04a15dcb 100644
--- a/docs/media/text-to-speech.md
+++ b/docs/media/text-to-speech.md
@@ -167,7 +167,7 @@ const result = await generateSpeech({
 |--------|------|-------------|
 | `instructions` | `string` | Voice style instructions (GPT-4o audio models only) |
 
-> **Note:** The `instructions` and `stream_format` options are only available with `gpt-4o-audio-preview` and `gpt-4o-mini-audio-preview` models, not with `tts-1` or `tts-1-hd`.
+> **Note:** The `instructions` and `stream_format` options are only available with the `gpt-4o-audio-preview` model, not with `tts-1` or `tts-1-hd`.
 
 ## Response Format
 
@@ -455,7 +455,6 @@ TypeScript automatically infers the result type from your `onResult` return valu
 | `tts-1` | Standard | Fast | Real-time applications |
 | `tts-1-hd` | High | Slower | Production audio |
 | `gpt-4o-audio-preview` | Highest | Variable | Advanced voice control |
-| `gpt-4o-mini-audio-preview` | High | Fast | Balanced quality/speed |
 
 ### Gemini Models
 
diff --git a/docs/media/transcription.md b/docs/media/transcription.md
index 471a6dece..fb6e9c66c 100644
--- a/docs/media/transcription.md
+++ b/docs/media/transcription.md
@@ -132,22 +132,22 @@ Whisper supports many languages. Common codes include:
 const result = await generateTranscription({
   adapter: openaiTranscription('whisper-1'),
   audio: audioFile,
+  responseFormat: 'verbose_json', // Top-level: detailed output with timestamps
+  prompt: 'Technical terms: API, SDK, CLI', // Top-level: guide transcription
   modelOptions: {
-    response_format: 'verbose_json', // Get detailed output with timestamps
-    temperature: 0, // Lower = more deterministic
-    prompt: 'Technical terms: API, SDK, CLI', // Guide transcription
+    temperature: 0, // Lower = more deterministic (provider option)
   },
 })
 ```
 
 | Option | Type | Description |
 |--------|------|-------------|
-| `response_format` | `string` | Output format: "json", "text", "srt", "verbose_json", "vtt" |
 | `temperature` | `number` | Sampling temperature (0 to 1) |
-| `prompt` | `string` | Optional text to guide transcription style |
-| `timestamp_granularities` | `Array<'word' \| 'segment'>` | Timestamp granularity to populate (requires `response_format: 'verbose_json'`) |
+| `timestamp_granularities` | `Array<'word' \| 'segment'>` | Timestamp granularity to populate (requires top-level `responseFormat: 'verbose_json'`) |
 | `include` | `string[]` | Additional values to include in the response (e.g., `logprobs`) |
 
+> `responseFormat` and `prompt` are **top-level** options on `generateTranscription`, not `modelOptions` keys.
+
 ### Response Formats
 
 | Format | Description |
@@ -206,8 +206,8 @@ async function transcribeAudio(filepath: string) {
     adapter: openaiTranscription('whisper-1'),
     audio: audioFile,
     language: 'en',
+    responseFormat: 'verbose_json',
     modelOptions: {
-      response_format: 'verbose_json',
       timestamp_granularities: ['segment', 'word'],
     },
   })
diff --git a/docs/media/video-generation.md b/docs/media/video-generation.md
index 0c5985cc4..4af93f020 100644
--- a/docs/media/video-generation.md
+++ b/docs/media/video-generation.md
@@ -401,7 +401,7 @@ const { jobId } = await generateVideo({
   duration: 8,           // 4, 8, or 12 seconds
   modelOptions: {
     size: '1280x720',    // Alternative way to specify size
-    seconds: 8,          // Alternative way to specify duration
+    seconds: '8',        // Alternative way to specify duration ('4' | '8' | '12')
   }
 })
 ```
@@ -504,7 +504,7 @@ For production use or when you need explicit control:
 ```typescript
 import { createOpenaiVideo } from '@tanstack/ai-openai'
 
-const adapter = createOpenaiVideo('your-openai-api-key')
+const adapter = createOpenaiVideo('sora-2', 'your-openai-api-key')
 ```
 
 ## Differences from Image Generation
diff --git a/docs/protocol/chunk-definitions.md b/docs/protocol/chunk-definitions.md
index 60c7cec3c..10bb8f0e8 100644
--- a/docs/protocol/chunk-definitions.md
+++ b/docs/protocol/chunk-definitions.md
@@ -59,7 +59,7 @@ type AGUIEventType =
 
 > The exported `EventType` enum (`@tanstack/ai`) carries a few additional
 > internal/transitional members (e.g. `TEXT_MESSAGE_CHUNK`, `TOOL_CALL_CHUNK`,
-> `THINKING_*`, `ACTIVITY_*`, `RAW`). The events above are the ones that appear
+> `REASONING_MESSAGE_CHUNK`, `THINKING_*`, `ACTIVITY_*`, `RAW`). The events above are the ones that appear
 > on the wire for a normal chat run.
 
 Only AG-UI event types are supported; previous legacy chunk formats are no longer accepted.
@@ -74,7 +74,7 @@ Emitted when a run begins. This is the first event in any streaming response.
 interface RunStartedEvent extends BaseAGUIEvent {
   type: 'RUN_STARTED';
   runId: string;           // Unique identifier for this run
-  threadId?: string;       // Optional thread/conversation ID
+  threadId: string;        // Thread/conversation ID (required; always emitted)
 }
 ```
 
@@ -83,7 +83,8 @@ interface RunStartedEvent extends BaseAGUIEvent {
 {
   "type": "RUN_STARTED",
   "runId": "run_abc123",
-  "model": "gpt-4o",
+  "threadId": "thread_abc123",
+  "model": "gpt-5.5",
   "timestamp": 1701234567890
 }
 ```
@@ -107,6 +108,7 @@ Emitted when a run completes successfully.
 interface RunFinishedEvent extends BaseAGUIEvent {
   type: 'RUN_FINISHED';
   runId: string;
+  threadId: string;                                                           // required (inherited from AG-UI base)
   finishReason?: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null; // TanStack AI addition
   usage?: TokenUsage;                                                         // TanStack AI addition
 }
diff --git a/docs/structured-outputs/one-shot.md b/docs/structured-outputs/one-shot.md
index df9e00812..ecdf9a996 100644
--- a/docs/structured-outputs/one-shot.md
+++ b/docs/structured-outputs/one-shot.md
@@ -66,7 +66,7 @@ The TypeScript type of `person` above is `{ name: string; age: number; email: st
 
 ## Field Descriptions
 
-Field descriptions tell the model what data to extract. They become part of the JSON Schema sent to the provider — the model sees them as hints. In Zod 4+ use `.meta()`:
+Field descriptions tell the model what data to extract. They become part of the JSON Schema sent to the provider — the model sees them as hints. In Zod v4.2+ use `.meta()`:
 
 ```typescript
 const ProductSchema = z.object({
diff --git a/docs/structured-outputs/overview.md b/docs/structured-outputs/overview.md
index dddec66ad..d7f8bd84a 100644
--- a/docs/structured-outputs/overview.md
+++ b/docs/structured-outputs/overview.md
@@ -37,7 +37,7 @@ person.age;  // number
 
 ## Schema Libraries
 
-TanStack AI accepts any library that implements [Standard JSON Schema](https://standardschema.dev/):
+TanStack AI accepts any library that implements [Standard JSON Schema](https://standardschema.dev/json-schema):
 
 - [Zod](https://zod.dev/) (v4.2+)
 - [ArkType](https://arktype.io/)
diff --git a/docs/tools/client-tools.md b/docs/tools/client-tools.md
index 84cd4f626..52a76424c 100644
--- a/docs/tools/client-tools.md
+++ b/docs/tools/client-tools.md
@@ -365,6 +365,6 @@ chat({ adapter: openaiText('gpt-5.2'), messages: [], tools: [addToCartServer] })
 
 ## Next Steps
 
-- [How Tools Work](./tools) - Deep dive into the tool architecture
+- [How Tools Work](./tool-architecture) - Deep dive into the tool architecture
 - [Server Tools](./server-tools) - Learn about server-side tool execution
 - [Tool Approval Flow](./tool-approval) - Add approval workflows for sensitive operations
diff --git a/docs/tools/server-tools.md b/docs/tools/server-tools.md
index f56e0562e..dc38ab141 100644
--- a/docs/tools/server-tools.md
+++ b/docs/tools/server-tools.md
@@ -359,9 +359,10 @@ const getUserDataDef = toolDefinition({
   outputSchema,
 });
 
-// When using JSON Schema, args is typed as `any`
+// When using JSON Schema, args is typed as `unknown` — narrow or cast before use
 const getUserData = getUserDataDef.server(async (args) => {
-  const user = await db.users.findUnique({ where: { id: args.userId } });
+  const { userId } = args as { userId: string };
+  const user = await db.users.findUnique({ where: { id: userId } });
   return { name: user.name, email: user.email };
 });
 ```
diff --git a/docs/tools/tools.md b/docs/tools/tools.md
index a4b9718b6..9b2cdd178 100644
--- a/docs/tools/tools.md
+++ b/docs/tools/tools.md
@@ -89,7 +89,7 @@ const inputSchema: JSONSchema = {
 };
 ```
 
-> **Note:** When using JSON Schema, TypeScript will infer `any` for input/output types since JSON Schema cannot provide compile-time type information. Zod schemas are recommended for full type safety.
+> **Note:** When using JSON Schema, TypeScript infers `unknown` for input/output types (it cannot derive types from a JSON Schema at compile time), so you must narrow or cast `args` before use. Zod schemas are recommended for full type safety.
 
 ## Tool Definition
 
@@ -172,9 +172,9 @@ const getWeatherDef = toolDefinition({
   outputSchema,
 });
 
-// Create server implementation (args is typed as `any` with JSON Schema)
+// Create server implementation (args is typed as `unknown` with JSON Schema)
 const getWeatherServer = getWeatherDef.server(async (args) => {
-  const { location, unit } = args;
+  const { location, unit } = args as { location: string; unit?: string };
   const response = await fetch(
     `https://api.weather.com/v1/current?location=${location}&unit=${unit || "fahrenheit"}`
   );

From 769a4b20eb74dca02a81dd8662cf056a02a01b54 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 12:36:30 +0200
Subject: [PATCH 3/7] docs: use latest per-provider models in examples

Sweep example model ids across the PR's docs to the latest available
per provider, sourced from each adapter's model-meta.ts:
- OpenAI: gpt-5.2 -> gpt-5.5, gpt-5-mini -> gpt-5.4-mini
- Anthropic: claude-sonnet-4-5 -> claude-sonnet-4-6,
  claude-opus-4-6 -> claude-opus-4.8
- Gemini: gemini-2.0-flash -> gemini-3-flash-preview,
  image -> gemini-3.1-flash-image-preview, tts -> gemini-3.1-flash-tts-preview

Every replacement id was verified present in model-meta.ts. Intentional
cases preserved: negative/capability-contrast examples
(per-model-type-safety), the claude-3-haiku web_search note, model
enumeration/availability tables, DALL-E and o-series demos, and the
Cencori pass-through ids (external provider, no in-repo model-meta).
---
 docs/adapters/anthropic.md                 | 34 +++++++++++-----------
 docs/advanced/multimodal-content.md        | 10 +++----
 docs/advanced/runtime-adapter-switching.md | 30 +++++++++----------
 docs/advanced/tree-shaking.md              | 18 ++++++------
 docs/advanced/typed-options.md             |  4 +--
 docs/chat/agentic-cycle.md                 |  6 ++--
 docs/chat/streaming.md                     |  4 +--
 docs/chat/thinking-content.md              |  2 +-
 docs/comparison/vercel-ai-sdk.md           |  8 ++---
 docs/getting-started/quick-start.md        |  6 ++--
 docs/media/text-to-speech.md               |  4 +--
 docs/protocol/http-stream-protocol.md      |  6 ++--
 docs/protocol/sse-protocol.md              |  4 +--
 docs/structured-outputs/multi-turn.md      |  2 +-
 docs/structured-outputs/one-shot.md        | 10 +++----
 docs/structured-outputs/overview.md        |  2 +-
 docs/structured-outputs/streaming.md       |  4 +--
 docs/structured-outputs/with-tools.md      |  2 +-
 docs/tools/client-tools.md                 |  6 ++--
 docs/tools/provider-tools.md               |  2 +-
 docs/tools/server-tools.md                 |  6 ++--
 docs/tools/tool-approval.md                |  2 +-
 docs/tools/tool-architecture.md            |  2 +-
 docs/tools/tools.md                        |  6 ++--
 24 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/docs/adapters/anthropic.md b/docs/adapters/anthropic.md
index 59d3c9f47..787afb2e8 100644
--- a/docs/adapters/anthropic.md
+++ b/docs/adapters/anthropic.md
@@ -28,7 +28,7 @@ import { chat } from "@tanstack/ai";
 import { anthropicText } from "@tanstack/ai-anthropic";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Hello!" }],
 });
 ```
@@ -39,7 +39,7 @@ const stream = chat({
 import { chat } from "@tanstack/ai";
 import { createAnthropicChat } from "@tanstack/ai-anthropic";
 
-const adapter = createAnthropicChat("claude-sonnet-4-5", process.env.ANTHROPIC_API_KEY!, {
+const adapter = createAnthropicChat("claude-sonnet-4-6", process.env.ANTHROPIC_API_KEY!, {
   // ... your config options
 });
 
@@ -58,7 +58,7 @@ const config: Omit<AnthropicTextConfig, "apiKey"> = {
   baseURL: "https://api.anthropic.com", // Optional, for custom endpoints
 };
 
-const adapter = createAnthropicChat("claude-sonnet-4-5", process.env.ANTHROPIC_API_KEY!, config);
+const adapter = createAnthropicChat("claude-sonnet-4-6", process.env.ANTHROPIC_API_KEY!, config);
 ```
  
 
@@ -72,7 +72,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: anthropicText("claude-sonnet-4-5"),
+    adapter: anthropicText("claude-sonnet-4-6"),
     messages,
   });
 
@@ -101,7 +101,7 @@ const searchDatabase = searchDatabaseDef.server(async ({ query }) => {
 });
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages,
   tools: [searchDatabase],
 });
@@ -113,7 +113,7 @@ Anthropic supports various provider-specific options. Sampling parameters live h
 
 ```typescript
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages,
   modelOptions: {
     max_tokens: 4096,
@@ -148,7 +148,7 @@ Cache prompts for better performance and reduced costs:
 
 ```typescript
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [
     {
       role: "user",
@@ -177,7 +177,7 @@ import { summarize } from "@tanstack/ai";
 import { anthropicSummarize } from "@tanstack/ai-anthropic";
 
 const result = await summarize({
-  adapter: anthropicSummarize("claude-sonnet-4-5"),
+  adapter: anthropicSummarize("claude-sonnet-4-6"),
   text: "Your long text to summarize...",
   maxLength: 100,
   style: "concise", // "concise" | "bullet-points" | "paragraph"
@@ -204,7 +204,7 @@ Creates an Anthropic chat adapter.
 
 **Parameters:**
 
-- `model` - Claude model id (e.g. `"claude-sonnet-4-5"`, `"claude-opus-4-6"`)
+- `model` - Claude model id (e.g. `"claude-sonnet-4-6"`, `"claude-opus-4.8"`)
 - `config?.baseURL` - Custom base URL (optional)
 
 ### `anthropicSummarize(model, config?)` / `createAnthropicSummarize(model, apiKey, config?)`
@@ -242,7 +242,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { webSearchTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-opus-4-6"),
+  adapter: anthropicText("claude-opus-4.8"),
   messages: [{ role: "user", content: "What's new in AI this week?" }],
   tools: [
     webSearchTool({
@@ -269,7 +269,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { webFetchTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Summarise https://example.com" }],
   tools: [webFetchTool()],
 });
@@ -289,7 +289,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { codeExecutionTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Plot a histogram of [1,2,2,3,3,3]" }],
   tools: [
     codeExecutionTool({ name: "code_execution", type: "code_execution_20250825" }),
@@ -311,7 +311,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { computerUseTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Open the browser and go to example.com" }],
   tools: [
     computerUseTool({
@@ -338,7 +338,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { bashTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "List all TypeScript files in src/" }],
   tools: [bashTool({ name: "bash", type: "bash_20250124" })],
 });
@@ -358,7 +358,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { textEditorTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Fix the bug in src/index.ts" }],
   tools: [
     textEditorTool({ type: "text_editor_20250124", name: "str_replace_editor" }),
@@ -380,7 +380,7 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 import { memoryTool } from "@tanstack/ai-anthropic/tools";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Remember that I prefer metric units" }],
   tools: [memoryTool()],
 });
@@ -402,7 +402,7 @@ import { customTool } from "@tanstack/ai-anthropic/tools";
 import { z } from "zod";
 
 const stream = chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Look up user 42" }],
   tools: [
     customTool(
diff --git a/docs/advanced/multimodal-content.md b/docs/advanced/multimodal-content.md
index 8d6753f30..eb68ebc7f 100644
--- a/docs/advanced/multimodal-content.md
+++ b/docs/advanced/multimodal-content.md
@@ -71,7 +71,7 @@ import { chat } from '@tanstack/ai'
 import { openaiText } from '@tanstack/ai-openai'
 
 const response = await chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages: [
     {
       role: 'user',
@@ -99,7 +99,7 @@ OpenAI supports images and audio in their vision and audio models:
 ```typescript
 import { openaiText } from '@tanstack/ai-openai'
 
-const adapter = openaiText('gpt-5.2')
+const adapter = openaiText('gpt-5.5')
 
 // Image with detail level metadata
 const message = {
@@ -126,7 +126,7 @@ Anthropic's Claude models support images and PDF documents:
 ```typescript
 import { anthropicText } from '@tanstack/ai-anthropic'
 
-const adapter = anthropicText('claude-sonnet-4-5')
+const adapter = anthropicText('claude-sonnet-4-6')
 
 // Image with mimeType in source
 const imageMessage = {
@@ -165,7 +165,7 @@ Google's Gemini models support a wide range of modalities:
 ```typescript
 import { geminiText } from '@tanstack/ai-gemini'
 
-const adapter = geminiText('gemini-2.0-flash')
+const adapter = geminiText('gemini-3-flash-preview')
 
 // Image with mimeType in source
 const message = {
@@ -326,7 +326,7 @@ const BodySchema = z.object({ messages: z.array(MessageSchema) })
 const { messages } = BodySchema.parse(await request.json())
 
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages,
 })
 ```
diff --git a/docs/advanced/runtime-adapter-switching.md b/docs/advanced/runtime-adapter-switching.md
index ace15588c..c26044f3e 100644
--- a/docs/advanced/runtime-adapter-switching.md
+++ b/docs/advanced/runtime-adapter-switching.md
@@ -29,8 +29,8 @@ type Provider = 'openai' | 'anthropic'
 
 // Define adapters with their models - autocomplete works here!
 const adapters = {
-  anthropic: () => anthropicText('claude-sonnet-4-5'),  // ✅ Autocomplete!
-  openai: () => openaiText('gpt-5.2'),  // ✅ Autocomplete!
+  anthropic: () => anthropicText('claude-sonnet-4-6'),  // ✅ Autocomplete!
+  openai: () => openaiText('gpt-5.5'),  // ✅ Autocomplete!
 }
 
 // In your request handler:
@@ -49,11 +49,11 @@ Each adapter factory function accepts a model name as its first argument and ret
 
 ```typescript
 // These are equivalent:
-const adapter1 = openaiText('gpt-5.2')
-const adapter2 = new OpenAITextAdapter({ apiKey: process.env.OPENAI_API_KEY }, 'gpt-5.2')
+const adapter1 = openaiText('gpt-5.5')
+const adapter2 = new OpenAITextAdapter({ apiKey: process.env.OPENAI_API_KEY }, 'gpt-5.5')
 
 // The model is stored on the adapter
-console.log(adapter1.model) // 'gpt-5.2'
+console.log(adapter1.model) // 'gpt-5.5'
 ```
 
 When you pass an adapter to `chat()`, it uses the model from `adapter.model`. This means:
@@ -78,10 +78,10 @@ type Provider = 'openai' | 'anthropic' | 'gemini' | 'ollama'
 
 // Define adapters with their models
 const adapters = {
-  anthropic: () => anthropicText('claude-sonnet-4-5'),
-  gemini: () => geminiText('gemini-2.0-flash'),
+  anthropic: () => anthropicText('claude-sonnet-4-6'),
+  gemini: () => geminiText('gemini-3-flash-preview'),
   ollama: () => ollamaText('mistral:7b'),
-  openai: () => openaiText('gpt-5.2'),
+  openai: () => openaiText('gpt-5.5'),
 }
 
 export const Route = createFileRoute('/api/chat')({
@@ -121,7 +121,7 @@ import { geminiImage } from '@tanstack/ai-gemini'
 
 const imageAdapters = {
   openai: () => openaiImage('gpt-image-1'),
-  gemini: () => geminiImage('gemini-2.0-flash-preview-image-generation'),
+  gemini: () => geminiImage('gemini-3.1-flash-image-preview'),
 }
 
 // Usage
@@ -142,8 +142,8 @@ import { openaiSummarize } from '@tanstack/ai-openai'
 import { anthropicSummarize } from '@tanstack/ai-anthropic'
 
 const summarizeAdapters = {
-  openai: () => openaiSummarize('gpt-5-mini'),
-  anthropic: () => anthropicSummarize('claude-sonnet-4-5'),
+  openai: () => openaiSummarize('gpt-5.4-mini'),
+  anthropic: () => anthropicSummarize('claude-sonnet-4-6'),
 }
 
 // Usage
@@ -168,12 +168,12 @@ let model
 switch (provider) {
   case 'anthropic':
     adapter = anthropicText()
-    model = 'claude-sonnet-4-5'
+    model = 'claude-sonnet-4-6'
     break
   case 'openai':
   default:
     adapter = openaiText()
-    model = 'gpt-5.2'
+    model = 'gpt-5.5'
     break
 }
 
@@ -188,8 +188,8 @@ const stream = chat({
 
 ```typescript
 const adapters = {
-  anthropic: () => anthropicText('claude-sonnet-4-5'),
-  openai: () => openaiText('gpt-5.2'),
+  anthropic: () => anthropicText('claude-sonnet-4-6'),
+  openai: () => openaiText('gpt-5.5'),
 }
 
 const stream = chat({
diff --git a/docs/advanced/tree-shaking.md b/docs/advanced/tree-shaking.md
index e8107bdcb..0018ca9ce 100644
--- a/docs/advanced/tree-shaking.md
+++ b/docs/advanced/tree-shaking.md
@@ -51,7 +51,7 @@ import { chat } from '@tanstack/ai'
 import { openaiText } from '@tanstack/ai-openai'
 
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages: [{ role: 'user', content: 'Hello!' }],
 })
 ```
@@ -118,7 +118,7 @@ import { openaiText } from '@tanstack/ai-openai'
 
 // Chat generation - returns AsyncIterable<StreamChunk>
 const chatResult = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages: [{ role: 'user', content: 'Hello!' }],
 })
 
@@ -151,12 +151,12 @@ import {
 
 // Each activity is independent
 const chatResult = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages: [{ role: 'user', content: 'Hello!' }],
 })
 
 const summarizeResult = await summarize({
-  adapter: openaiSummarize('gpt-5-mini'),
+  adapter: openaiSummarize('gpt-5.4-mini'),
   text: 'Long text to summarize...',
 })
 ```
@@ -170,10 +170,10 @@ The tree-shakeable design doesn't sacrifice type safety. Each adapter provides f
 ```ts
 import { openaiText, type OpenAIChatModel } from '@tanstack/ai-openai'
 
-const adapter = openaiText('gpt-5.2')
+const adapter = openaiText('gpt-5.5')
 
 // TypeScript knows the exact models supported
-const model: OpenAIChatModel = 'gpt-5.2' // ✓ Valid
+const model: OpenAIChatModel = 'gpt-5.5' // ✓ Valid
 const model2: OpenAIChatModel = 'invalid' // ✗ Type error
 ```
 
@@ -190,7 +190,7 @@ import { openaiText } from '@tanstack/ai-openai'
 
 // Only import what you need
 const chatOptions = createChatOptions({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
 })
 ```
 
@@ -278,10 +278,10 @@ Each adapter type implements a specific interface:
 All adapters have a `kind` property that indicates their type:
 
 ```ts
-const chatAdapter = openaiText('gpt-5.2')
+const chatAdapter = openaiText('gpt-5.5')
 console.log(chatAdapter.kind) // 'text'
 
-const summarizeAdapter = openaiSummarize('gpt-5-mini')
+const summarizeAdapter = openaiSummarize('gpt-5.4-mini')
 console.log(summarizeAdapter.kind) // 'summarize'
 ```
 
diff --git a/docs/advanced/typed-options.md b/docs/advanced/typed-options.md
index e75ffd788..7ff2d4d8f 100644
--- a/docs/advanced/typed-options.md
+++ b/docs/advanced/typed-options.md
@@ -27,7 +27,7 @@ import { chat, createChatOptions } from '@tanstack/ai'
 import { openaiText } from '@tanstack/ai-openai'
 
 const chatOptions = createChatOptions({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   // modelOptions, systemPrompts, tools — all type-checked against the
   // adapter+model pair above. Sampling params (temperature, top_p,
   // max_output_tokens, …) live inside modelOptions, under each provider's
@@ -89,7 +89,7 @@ const lookupOrder = lookupOrderDef.server(async ({ orderId }) => {
 })
 
 export const supportChatOptions = createChatOptions({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   systemPrompts: ['You are a customer-support assistant for Acme Corp.'],
   tools: [lookupOrder],
   modelOptions: {
diff --git a/docs/chat/agentic-cycle.md b/docs/chat/agentic-cycle.md
index 43c42c977..430991a28 100644
--- a/docs/chat/agentic-cycle.md
+++ b/docs/chat/agentic-cycle.md
@@ -137,7 +137,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [getWeather, getClothingAdvice],
   });
@@ -164,7 +164,7 @@ import { chat } from "@tanstack/ai";
 import { maxIterations } from "@tanstack/ai";
 
 const stream = chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   tools: [getWeather, getClothingAdvice],
   agentLoopStrategy: maxIterations(3), // default is 5
@@ -180,7 +180,7 @@ A strategy is just a function that receives `{ iterationCount, finishReason, mes
 
 ```typescript
 const stream = chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   tools: [getWeather, getClothingAdvice],
   agentLoopStrategy: combineStrategies([
diff --git a/docs/chat/streaming.md b/docs/chat/streaming.md
index 1cd54178e..9c6fc0456 100644
--- a/docs/chat/streaming.md
+++ b/docs/chat/streaming.md
@@ -24,7 +24,7 @@ import { chat } from "@tanstack/ai";
 import { openaiText } from "@tanstack/ai-openai";
 
 const stream = chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
 });
 
@@ -46,7 +46,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
   });
 
diff --git a/docs/chat/thinking-content.md b/docs/chat/thinking-content.md
index e6fb8acf9..939699eec 100644
--- a/docs/chat/thinking-content.md
+++ b/docs/chat/thinking-content.md
@@ -82,7 +82,7 @@ GPT-5 and later models also support reasoning. Their `reasoning.effort` accepts
 
 ```typescript
 const stream = chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   modelOptions: {
     reasoning: { effort: "high" },
diff --git a/docs/comparison/vercel-ai-sdk.md b/docs/comparison/vercel-ai-sdk.md
index bba82a2d8..12e735b37 100644
--- a/docs/comparison/vercel-ai-sdk.md
+++ b/docs/comparison/vercel-ai-sdk.md
@@ -57,7 +57,7 @@ This article compares the two SDKs from TanStack AI's perspective, with honest a
 
 When you select a provider and model, TypeScript narrows the exact options, capabilities, and input modalities available for that specific model - not a union of everything the provider supports.
 
-Each provider adapter contains a comprehensive `model-meta.ts` that maps every model to its capabilities: supported input modalities, context windows, and provider-specific options. When you write `openaiText('gpt-5.2')`, the type system knows exactly what that model can do.
+Each provider adapter contains a comprehensive `model-meta.ts` that maps every model to its capabilities: supported input modalities, context windows, and provider-specific options. When you write `openaiText('gpt-5.5')`, the type system knows exactly what that model can do.
 
 ```ts
 import { chat } from '@tanstack/ai'
@@ -145,7 +145,7 @@ import { chat, maxIterations, untilFinishReason, combineStrategies } from '@tans
 import { openaiText } from '@tanstack/ai-openai'
 
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages,
   tools,
   agentLoopStrategy: combineStrategies([
@@ -290,7 +290,7 @@ const logger: ChatMiddleware = {
 }
 
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages,
   middleware: [logger],
 })
@@ -491,7 +491,7 @@ The TanStack approach separates the tool contract from its implementation, makin
 import { chat, combineStrategies, maxIterations, untilFinishReason } from '@tanstack/ai'
 
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages,
   tools,
   agentLoopStrategy: combineStrategies([
diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md
index e078bcb4b..0cccc6256 100644
--- a/docs/getting-started/quick-start.md
+++ b/docs/getting-started/quick-start.md
@@ -68,7 +68,7 @@ export const Route = createFileRoute("/api/chat")({
           // Create a streaming chat response. `chat()` reads the AG-UI
           // `threadId` for devtools correlation when available.
           const stream = chat({
-            adapter: openaiText("gpt-5.2"),
+            adapter: openaiText("gpt-5.5"),
             messages: body.messages,
           });
 
@@ -118,7 +118,7 @@ export async function POST(request: Request) {
     // Create a streaming chat response. `chat()` reads the AG-UI
     // `threadId` for devtools correlation when available.
     const stream = chat({
-      adapter: openaiText("gpt-5.2"),
+      adapter: openaiText("gpt-5.5"),
       messages: body.messages,
     });
 
@@ -276,7 +276,7 @@ const getProducts = getProductsDef.server(async ({ query }) => {
 })
 
 const stream = chat({
-  adapter: openaiText('gpt-5.2'),
+  adapter: openaiText('gpt-5.5'),
   messages: [{ role: 'user', content: 'Find products' }],
   tools: [getProducts],
 })
diff --git a/docs/media/text-to-speech.md b/docs/media/text-to-speech.md
index f04a15dcb..8e5457e8f 100644
--- a/docs/media/text-to-speech.md
+++ b/docs/media/text-to-speech.md
@@ -53,7 +53,7 @@ import { geminiSpeech } from '@tanstack/ai-gemini'
 
 // Generate speech from text (uses GOOGLE_API_KEY or GEMINI_API_KEY from environment)
 const result = await generateSpeech({
-  adapter: geminiSpeech('gemini-2.5-flash-preview-tts'),
+  adapter: geminiSpeech('gemini-3.1-flash-tts-preview'),
   text: 'Hello from Gemini TTS!',
 })
 
@@ -504,7 +504,7 @@ import { createGeminiSpeech } from '@tanstack/ai-gemini'
 const openaiAdapter = createOpenaiSpeech('tts-1', 'your-openai-api-key')
 
 // Gemini
-const geminiAdapter = createGeminiSpeech('gemini-2.5-flash-preview-tts', 'your-google-api-key')
+const geminiAdapter = createGeminiSpeech('gemini-3.1-flash-tts-preview', 'your-google-api-key')
 ```
 
 ## Best Practices
diff --git a/docs/protocol/http-stream-protocol.md b/docs/protocol/http-stream-protocol.md
index b0cfa4e43..1d2e4e90a 100644
--- a/docs/protocol/http-stream-protocol.md
+++ b/docs/protocol/http-stream-protocol.md
@@ -197,7 +197,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText('gpt-5.2'),
+    adapter: openaiText('gpt-5.5'),
     messages,
   });
 
@@ -216,7 +216,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
   const abortController = new AbortController();
 
-  const stream = chat({ adapter: openaiText('gpt-5.2'), messages });
+  const stream = chat({ adapter: openaiText('gpt-5.5'), messages });
 
   return new Response(toHttpStream(stream, abortController), {
     headers: { 'Content-Type': 'application/x-ndjson' },
@@ -243,7 +243,7 @@ app.post('/api/chat', async (req, res) => {
 
   try {
     const stream = chat({
-      adapter: openaiText('gpt-5.2'),
+      adapter: openaiText('gpt-5.5'),
       messages,
     });
 
diff --git a/docs/protocol/sse-protocol.md b/docs/protocol/sse-protocol.md
index 64fa9e0ac..e8f1c83aa 100644
--- a/docs/protocol/sse-protocol.md
+++ b/docs/protocol/sse-protocol.md
@@ -189,7 +189,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText('gpt-5.2'),
+    adapter: openaiText('gpt-5.5'),
     messages,
   });
 
@@ -238,7 +238,7 @@ export async function POST(request: Request) {
   const stream = new ReadableStream({
     async start(controller) {
       try {
-        for await (const chunk of chat({ adapter: openaiText('gpt-5.2'), messages })) {
+        for await (const chunk of chat({ adapter: openaiText('gpt-5.5'), messages })) {
           const sseData = `data: ${JSON.stringify(chunk)}\n\n`;
           controller.enqueue(encoder.encode(sseData));
         }
diff --git a/docs/structured-outputs/multi-turn.md b/docs/structured-outputs/multi-turn.md
index f66602c5a..f8719be82 100644
--- a/docs/structured-outputs/multi-turn.md
+++ b/docs/structured-outputs/multi-turn.md
@@ -83,7 +83,7 @@ const SYSTEM_PROMPT = `You are a chef assistant. Always respond with a single re
 export async function POST(request: Request) {
   const { messages } = await request.json();
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     systemPrompts: [SYSTEM_PROMPT],
     outputSchema: RecipeSchema,
diff --git a/docs/structured-outputs/one-shot.md b/docs/structured-outputs/one-shot.md
index ecdf9a996..8c63b7896 100644
--- a/docs/structured-outputs/one-shot.md
+++ b/docs/structured-outputs/one-shot.md
@@ -35,7 +35,7 @@ const PersonSchema = z.object({
 });
 
 const person = await chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages: [
     {
       role: "user",
@@ -118,7 +118,7 @@ const CompanySchema = z.object({
 });
 
 const company = await chat({
-  adapter: anthropicText("claude-sonnet-4-5"),
+  adapter: anthropicText("claude-sonnet-4-6"),
   messages: [{ role: "user", content: "Extract company info from this article: ..." }],
   outputSchema: CompanySchema,
 });
@@ -145,7 +145,7 @@ const schema: JSONSchema = {
 };
 
 const result = await chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages: [{ role: "user", content: "Extract: John is 25 years old" }],
   outputSchema: schema,
 });
@@ -163,7 +163,7 @@ If the model's response doesn't satisfy your schema, `chat()` throws a validatio
 ```typescript
 try {
   const result = await chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages: [{ role: "user", content: "..." }],
     outputSchema: MySchema,
   });
@@ -190,7 +190,7 @@ If the client only needs the finished object and you don't want progressive UI,
 export async function POST(request: Request) {
   const { text } = await request.json();
   const person = await chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages: [{ role: "user", content: `Extract the person info: ${text}` }],
     outputSchema: PersonSchema,
   });
diff --git a/docs/structured-outputs/overview.md b/docs/structured-outputs/overview.md
index d7f8bd84a..2699f71a4 100644
--- a/docs/structured-outputs/overview.md
+++ b/docs/structured-outputs/overview.md
@@ -26,7 +26,7 @@ import { z } from "zod";
 const Person = z.object({ name: z.string(), age: z.number() });
 
 const person = await chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages: [{ role: "user", content: "John Doe, 30" }],
   outputSchema: Person,
 });
diff --git a/docs/structured-outputs/streaming.md b/docs/structured-outputs/streaming.md
index 7d005f30e..79c8b62e8 100644
--- a/docs/structured-outputs/streaming.md
+++ b/docs/structured-outputs/streaming.md
@@ -38,7 +38,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     outputSchema: PersonSchema,
     stream: true,
@@ -184,7 +184,7 @@ const PersonSchema = z.object({
 });
 
 const stream = chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages: [{ role: "user", content: "Extract: John Doe is 30, john@example.com" }],
   outputSchema: PersonSchema,
   stream: true,
diff --git a/docs/structured-outputs/with-tools.md b/docs/structured-outputs/with-tools.md
index 2ba67f7be..ddf10365b 100644
--- a/docs/structured-outputs/with-tools.md
+++ b/docs/structured-outputs/with-tools.md
@@ -43,7 +43,7 @@ const RecommendationSchema = z.object({
 });
 
 const recommendation = await chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages: [{ role: "user", content: "Recommend a product for a developer" }],
   tools: [getProductPrice],
   outputSchema: RecommendationSchema,
diff --git a/docs/tools/client-tools.md b/docs/tools/client-tools.md
index 52a76424c..4cc8cc53a 100644
--- a/docs/tools/client-tools.md
+++ b/docs/tools/client-tools.md
@@ -109,7 +109,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [updateUIDef, saveToLocalStorageDef], // Pass definitions
   });
@@ -339,10 +339,10 @@ const addToCartClient = addToCartDef.client((input) => {
 });
 
 // Server: Pass definition for client execution
-chat({ adapter: openaiText('gpt-5.2'), messages: [], tools: [addToCartDef] }); // Client will execute
+chat({ adapter: openaiText('gpt-5.5'), messages: [], tools: [addToCartDef] }); // Client will execute
 
 // Or pass server implementation for server execution
-chat({ adapter: openaiText('gpt-5.2'), messages: [], tools: [addToCartServer] }); // Server will execute
+chat({ adapter: openaiText('gpt-5.5'), messages: [], tools: [addToCartServer] }); // Server will execute
 ```
 
 ## Best Practices
diff --git a/docs/tools/provider-tools.md b/docs/tools/provider-tools.md
index 69cda2c2a..a40eb2d23 100644
--- a/docs/tools/provider-tools.md
+++ b/docs/tools/provider-tools.md
@@ -33,7 +33,7 @@ import { anthropicText } from '@tanstack/ai-anthropic'
 import { webSearchTool } from '@tanstack/ai-anthropic/tools'
 
 const stream = chat({
-  adapter: anthropicText('claude-opus-4-6'),
+  adapter: anthropicText('claude-opus-4.8'),
   messages: [{ role: 'user', content: "Summarize today's AI news." }],
   tools: [
     webSearchTool({
diff --git a/docs/tools/server-tools.md b/docs/tools/server-tools.md
index dc38ab141..0349d3299 100644
--- a/docs/tools/server-tools.md
+++ b/docs/tools/server-tools.md
@@ -158,7 +158,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [getUserData, searchProducts],
   });
@@ -208,7 +208,7 @@ export async function POST(request: Request) {
   const db = getDb();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [getCurrentUser],
     context: {
@@ -274,7 +274,7 @@ import { openaiText } from "@tanstack/ai-openai";
 import { getUserData, searchProducts } from "@/tools/server";
 
 const stream = chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   tools: [getUserData, searchProducts],
 });
diff --git a/docs/tools/tool-approval.md b/docs/tools/tool-approval.md
index b73c7c69a..e0c5c6e64 100644
--- a/docs/tools/tool-approval.md
+++ b/docs/tools/tool-approval.md
@@ -76,7 +76,7 @@ export async function POST(request: Request) {
   const { messages } = await request.json();
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [sendEmail],
   });
diff --git a/docs/tools/tool-architecture.md b/docs/tools/tool-architecture.md
index 26a0caef8..b4002bea4 100644
--- a/docs/tools/tool-architecture.md
+++ b/docs/tools/tool-architecture.md
@@ -87,7 +87,7 @@ export async function POST(request: Request) {
 
   // Create streaming chat with tools
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [getWeather, sendEmail], // Tool definitions passed here
   });
diff --git a/docs/tools/tools.md b/docs/tools/tools.md
index 9b2cdd178..f4aa270f5 100644
--- a/docs/tools/tools.md
+++ b/docs/tools/tools.md
@@ -201,7 +201,7 @@ export async function POST(request: Request) {
   });
 
   const stream = chat({
-    adapter: openaiText("gpt-5.2"),
+    adapter: openaiText("gpt-5.5"),
     messages,
     tools: [getWeather], // Pass server tools
   });
@@ -294,14 +294,14 @@ On the server, pass either the definition (for client execution) or the server i
 ```typescript
 // Pass the definition: the client will execute the tool
 chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   tools: [addToCartDef],
 });
 
 // Or pass the server implementation: the server will execute the tool
 chat({
-  adapter: openaiText("gpt-5.2"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   tools: [addToCartServer],
 });

From 5017df73517635e7c78b2696b5feea7dbddc831f Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 12:55:33 +0200
Subject: [PATCH 4/7] docs: reconcile thinking examples with modelOptions
 sampling convention

After rebasing onto main (which merged #660 moving temperature/topP/
maxTokens into modelOptions), fix the two spots where our edits still
assumed top-level sampling:
- thinking-content.md: max_tokens now lives in modelOptions alongside
  the thinking budget (was a top-level maxTokens).
- anthropic.md: drop the stale "auto-raises top-level maxTokens" note;
  budget_tokens must be below modelOptions.max_tokens.

(ollama.md and typed-options.md conflicts were resolved to main's new
convention during the rebase.)
---
 docs/adapters/anthropic.md    | 2 +-
 docs/chat/thinking-content.md | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/adapters/anthropic.md b/docs/adapters/anthropic.md
index 787afb2e8..4f45e3b80 100644
--- a/docs/adapters/anthropic.md
+++ b/docs/adapters/anthropic.md
@@ -140,7 +140,7 @@ modelOptions: {
 }
 ```
 
-**Note:** `max_tokens` must be greater than `budget_tokens`. The adapter automatically raises the top-level `maxTokens` when your `budget_tokens` meets or exceeds it. If you instead set `max_tokens` inside `modelOptions`, keep it greater than `budget_tokens` yourself — that path is validated and throws rather than auto-adjusting.
+**Note:** `budget_tokens` must be less than `modelOptions.max_tokens` — set `max_tokens` high enough to leave room for the visible response alongside the thinking budget, or the request is rejected.
 
 ### Prompt Caching
 
diff --git a/docs/chat/thinking-content.md b/docs/chat/thinking-content.md
index 939699eec..2402970cf 100644
--- a/docs/chat/thinking-content.md
+++ b/docs/chat/thinking-content.md
@@ -39,7 +39,7 @@ How you enable thinking depends on the provider.
 
 ### Anthropic (Extended Thinking)
 
-Pass the `thinking` option in `modelOptions` with `type: "enabled"` and a `budget_tokens` (minimum 1024). If `budget_tokens` is not below `maxTokens`, the adapter automatically raises `maxTokens` so there is room for the visible response in addition to the thinking budget:
+Pass the `thinking` option in `modelOptions` with `type: "enabled"` and a `budget_tokens` (minimum 1024). Keep `budget_tokens` below `modelOptions.max_tokens` so there is room for the visible response in addition to the thinking budget:
 
 ```typescript
 import { chat } from "@tanstack/ai";
@@ -48,9 +48,9 @@ import { anthropicText } from "@tanstack/ai-anthropic";
 const stream = chat({
   adapter: anthropicText("claude-sonnet-4-6"),
   messages,
-  maxTokens: 32000,
   modelOptions: {
-    // budget_tokens must be at least 1024
+    max_tokens: 32000,
+    // budget_tokens must be at least 1024 and below max_tokens
     thinking: { type: "enabled", budget_tokens: 10000 },
   },
 });

From 9184463392509dcebbe3d94341e732f84526ec23 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 13:09:54 +0200
Subject: [PATCH 5/7] docs: migrate ag-ui-compliance forwardedProps allowlist
 to modelOptions

The "safe allowlist" example still forwarded temperature/maxTokens as
top-level chat() options (missed by #660's sampling-into-modelOptions
migration). Map them into modelOptions under OpenAI's native keys
(temperature / max_output_tokens) so the example type-checks.
---
 docs/migration/ag-ui-compliance.md | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/docs/migration/ag-ui-compliance.md b/docs/migration/ag-ui-compliance.md
index fe4d7bc51..ec92e9804 100644
--- a/docs/migration/ag-ui-compliance.md
+++ b/docs/migration/ag-ui-compliance.md
@@ -204,19 +204,22 @@ chat({
 Always destructure the specific fields you intend to forward:
 
 ```ts
-// ✅ SAFE — explicit allowlist
+// ✅ SAFE — explicit allowlist. Sampling params live in modelOptions under
+// each provider's native key (OpenAI: temperature / max_output_tokens).
 chat({
   adapter: openaiText('gpt-4o'),
   messages: params.messages,
   tools: mergeAgentTools(serverTools, params.tools),
-  temperature:
-    typeof params.forwardedProps.temperature === 'number'
-      ? params.forwardedProps.temperature
-      : undefined,
-  maxTokens:
-    typeof params.forwardedProps.maxTokens === 'number'
-      ? params.forwardedProps.maxTokens
-      : undefined,
+  modelOptions: {
+    temperature:
+      typeof params.forwardedProps.temperature === 'number'
+        ? params.forwardedProps.temperature
+        : undefined,
+    max_output_tokens:
+      typeof params.forwardedProps.maxTokens === 'number'
+        ? params.forwardedProps.maxTokens
+        : undefined,
+  },
 })
 ```
 

From f74d5ebc6fe3436fc6b59d53c5181b40cfc32aa7 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 13:56:16 +0200
Subject: [PATCH 6/7] docs: remove deprecated observability + protocol pages,
 drop all casts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove the deprecated Observability page (event-client observability is
  superseded; otelMiddleware is the supported path) and its nav entry +
  inbound links.
- Remove the protocol pages (chunk-definitions, sse-protocol,
  http-stream-protocol) — TanStack AI implements AG-UI, whose protocol is
  documented upstream; repoint the few inbound links to docs.ag-ui.com.
- Fix the broken ToolCacheStorage snippet (it imported the type then
  re-declared it) and verify the shape against source.
- Remove every `as <Type>` assertion cast from the docs (JSON-schema tool
  inputs, JSON.parse, formData, custom-event values, type brands, …),
  replacing them with typeof/in guards, type guards, typed annotations, or
  schema validation. `createModel`'s provider-option brand now uses a typed
  const instead of `{} as X`.
- CLAUDE.md / AGENTS.md: codify the docs conventions — no `as` casts in
  samples, use the latest model per provider from model-meta.ts, and show
  both server and client sides when a doc spans both.
---
 AGENTS.md                             |  14 +
 CLAUDE.md                             |  12 +
 docs/adapters/gemini.md               |  13 +-
 docs/advanced/built-in-middleware.md  |  18 +-
 docs/advanced/debug-logging.md        |   2 +-
 docs/advanced/extend-adapter.md       |   8 +-
 docs/advanced/middleware.md           |   8 +-
 docs/advanced/observability.md        |  99 ----
 docs/advanced/otel.md                 |   3 +-
 docs/chat/connection-adapters.md      |   8 +-
 docs/chat/persistence.md              |   3 +-
 docs/code-mode/client-integration.md  |   7 +-
 docs/config.json                      |   4 -
 docs/media/transcription.md           |   5 +-
 docs/protocol/chunk-definitions.md    | 703 --------------------------
 docs/protocol/http-stream-protocol.md | 438 ----------------
 docs/protocol/sse-protocol.md         | 370 --------------
 docs/structured-outputs/multi-turn.md |   8 +-
 docs/structured-outputs/one-shot.md   |   6 +-
 docs/tools/server-tools.md            |   8 +-
 docs/tools/tool-architecture.md       |   2 +-
 docs/tools/tools.md                   |  11 +-
 22 files changed, 94 insertions(+), 1656 deletions(-)
 delete mode 100644 docs/advanced/observability.md
 delete mode 100644 docs/protocol/chunk-definitions.md
 delete mode 100644 docs/protocol/http-stream-protocol.md
 delete mode 100644 docs/protocol/sse-protocol.md

diff --git a/AGENTS.md b/AGENTS.md
index 09a23a352..1dd7ecce1 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -47,6 +47,20 @@ each of these and confirm they're green before pushing:
 
 Do **not** rely on CI as your first signal. Run locally, fix, then push.
 
+## Documentation
+
+When editing docs under `docs/`:
+
+- **No `as` type-assertion casts in code samples.** Examples must type-check
+  without `as SomeType` — narrow `unknown` values with `typeof` / `in`
+  checks, type guards, or Standard Schema validation instead. (`as const` is
+  fine — it's a const assertion, not a type cast.)
+- **Show both sides of the coin.** When a doc spans server and client,
+  include snippets for both halves (server endpoint AND client consumption).
+- **Use the latest model per provider**, sourced from each adapter's
+  `model-meta.ts` (newest `gpt-*`, `claude-*`, `gemini-*`, …), in example code.
+- Run `pnpm test:docs` (link verification) before pushing.
+
 ## Everything Else
 
 For package manager (`pnpm@10.17.0`), monorepo layout, adapter architecture,
diff --git a/CLAUDE.md b/CLAUDE.md
index 225ae34e2..a6ece3b8f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -321,6 +321,18 @@ OPENAI_API_KEY=sk-... pnpm --filter @tanstack/ai-e2e record
 - Docs are in `docs/` directory (Markdown)
 - Auto-generated docs via `pnpm generate-docs` (TypeDoc)
 - Link verification via `pnpm test:docs`
+- **No `as` type-assertion casts in doc code samples.** Examples must
+  type-check without `as SomeType`. To use a value typed `unknown` (a raw
+  JSON Schema tool input, `request.json()`, `JSON.parse`, custom-event
+  values, etc.), narrow it with a `typeof` / `in` check or a type guard, or
+  validate it with a Standard Schema library — never `as`. (`as const` is
+  allowed; it's a const assertion, not a type cast.)
+- **Show both sides of the coin.** When a doc spans both server and client,
+  include snippets for **both** halves (the server endpoint AND the client
+  consumption), not just one.
+- **Use the latest model per provider in examples**, sourced from each
+  adapter's `model-meta.ts` (the newest `gpt-*`, `claude-*`, `gemini-*`,
+  etc.). Don't introduce superseded model ids in new or edited samples.
 
 ## Key Dependencies
 
diff --git a/docs/adapters/gemini.md b/docs/adapters/gemini.md
index f7ff43954..4014a8810 100644
--- a/docs/adapters/gemini.md
+++ b/docs/adapters/gemini.md
@@ -134,11 +134,14 @@ for await (const chunk of chat({
   adapter: geminiTextInteractions("gemini-3.5-flash"),
   messages: [{ role: "user", content: "Hi, my name is Amir." }],
 })) {
-  if (chunk.type === "CUSTOM" && chunk.name === "gemini.interactionId") {
-    const value = chunk.value as GeminiInteractionsCustomEventValue<
-      "gemini.interactionId"
-    >;
-    interactionId = value.interactionId;
+  if (
+    chunk.type === "CUSTOM" &&
+    chunk.name === "gemini.interactionId" &&
+    chunk.value &&
+    typeof chunk.value === "object" &&
+    "interactionId" in chunk.value
+  ) {
+    interactionId = String(chunk.value.interactionId);
   }
 }
 
diff --git a/docs/advanced/built-in-middleware.md b/docs/advanced/built-in-middleware.md
index 9fa5b2ea8..3275059b2 100644
--- a/docs/advanced/built-in-middleware.md
+++ b/docs/advanced/built-in-middleware.md
@@ -65,10 +65,16 @@ const stream = chat({
 **Custom key function** — useful when you want to ignore certain arguments:
 
 ```typescript
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
 toolCacheMiddleware({
   keyFn: (toolName, args) => {
-    // Ignore pagination, cache by query only
-    const { page, ...rest } = args as Record<string, unknown>;
+    // Ignore pagination, cache by query only. `args` is `unknown`, so
+    // narrow it with a type guard before destructuring.
+    if (!isRecord(args)) return JSON.stringify([toolName, args]);
+    const { page, ...rest } = args;
     return JSON.stringify([toolName, rest]);
   },
 });
@@ -81,15 +87,14 @@ By default the cache lives in-memory and is scoped to a single `toolCacheMiddlew
 The storage interface:
 
 ```typescript
-import type { ToolCacheStorage, ToolCacheEntry } from "@tanstack/ai/middlewares";
-
+// Implement this interface (exported from `@tanstack/ai/middlewares`):
 interface ToolCacheStorage {
   getItem: (key: string) => ToolCacheEntry | undefined | Promise<ToolCacheEntry | undefined>;
   setItem: (key: string, value: ToolCacheEntry) => void | Promise<void>;
   deleteItem: (key: string) => void | Promise<void>;
 }
 
-// ToolCacheEntry is { result: unknown, timestamp: number }
+// ToolCacheEntry is { result: unknown; timestamp: number }
 ```
 
 All methods may return a `Promise` for async backends. The middleware handles TTL checking — your storage just needs to store and retrieve entries.
@@ -222,5 +227,4 @@ These built-ins are just `ChatMiddleware` objects — nothing about them is priv
 ## Next Steps
 
 - [Middleware](./middleware) — the full lifecycle and hook reference
-- [OpenTelemetry](./otel) — `otelMiddleware` in depth
-- [Observability](./observability) — event-driven observability with the event client
+- [OpenTelemetry](./otel) — `otelMiddleware` in depth
\ No newline at end of file
diff --git a/docs/advanced/debug-logging.md b/docs/advanced/debug-logging.md
index 3ebec740b..ec850c010 100644
--- a/docs/advanced/debug-logging.md
+++ b/docs/advanced/debug-logging.md
@@ -164,4 +164,4 @@ The chat-only categories (`middleware`, `tools`, `agentLoop`, `config`) simply n
 
 ## Related
 
-If you're building middleware and want to see chunks flow through it, `debug: { middleware: true }` is faster than writing a logging middleware. See [Middleware](./middleware) for writing your own middleware, or [Observability](./observability) for the programmatic event client.
+If you're building middleware and want to see chunks flow through it, `debug: { middleware: true }` is faster than writing a logging middleware. See [Middleware](./middleware) for writing your own middleware.
diff --git a/docs/advanced/extend-adapter.md b/docs/advanced/extend-adapter.md
index 55c5b4577..1e1abaa94 100644
--- a/docs/advanced/extend-adapter.md
+++ b/docs/advanced/extend-adapter.md
@@ -89,19 +89,21 @@ To attach typed `modelOptions`, declared `features`, or provider `tools` to a cu
 import { createModel } from '@tanstack/ai'
 import type { OpenAITextProviderOptions } from '@tanstack/ai-openai'
 
+// Type brand for provider options — the value is unused at runtime.
+const modelOptions: OpenAITextProviderOptions = {}
+
 const reasoner = createModel('my-reasoner', {
   input: ['text'],
   features: ['reasoning', 'structured_outputs'],
   tools: ['web_search'],
-  // Type brand for provider options — use `{} as YourOptionsType`
-  modelOptions: {} as OpenAITextProviderOptions,
+  modelOptions,
 })
 ```
 
 - `input` — supported input modalities (same as the positional form).
 - `features` — declared feature flags (e.g. `'reasoning'`, `'structured_outputs'`).
 - `tools` — declared provider tools (e.g. `'web_search'`).
-- `modelOptions` — a type brand for the provider options accepted by this model; the value is unused at runtime, so pass `{} as YourOptionsType`.
+- `modelOptions` — a type brand for the provider options accepted by this model; the value is unused at runtime, so declare an empty object typed as the provider options (e.g. `const modelOptions: OpenAITextProviderOptions = {}`).
  
 ## Preserving Original Factory Behavior
 
diff --git a/docs/advanced/middleware.md b/docs/advanced/middleware.md
index 9dd5ef0cc..9977d12a3 100644
--- a/docs/advanced/middleware.md
+++ b/docs/advanced/middleware.md
@@ -246,7 +246,7 @@ When multiple middleware define `onChunk`, chunks flow through them in order. If
 
 #### Chunk types you'll see
 
-`onChunk` receives every [AG-UI event](../protocol/chunk-definitions) the run produces — not just text. Narrow on `chunk.type` (a discriminated union) before reading type-specific fields. The common ones:
+`onChunk` receives every [AG-UI event](https://docs.ag-ui.com/introduction) the run produces — not just text. Narrow on `chunk.type` (a discriminated union) before reading type-specific fields. The common ones:
 
 | `chunk.type` | Meaning | Key fields |
 |--------------|---------|-----------|
@@ -257,7 +257,7 @@ When multiple middleware define `onChunk`, chunks flow through them in order. If
 | `STATE_SNAPSHOT` / `STATE_DELTA` | Agent state sync | `snapshot`, `delta` |
 | `CUSTOM` | Extensibility events (incl. structured-output — see below) | `name`, `value` |
 
-See [AG-UI Event Definitions](../protocol/chunk-definitions) for the full catalogue and exact field shapes.
+See the [AG-UI protocol docs](https://docs.ag-ui.com/introduction) for the full event catalogue and exact field shapes.
 
 #### Transforming structured-output chunks
 
@@ -735,8 +735,6 @@ import type {
 ## Next Steps
 
 - [Built-in Middleware](./built-in-middleware) — `toolCacheMiddleware`, `contentGuardMiddleware`, `otelMiddleware`
-- [OpenTelemetry](./otel) — emit traces and metrics via `otelMiddleware`
-- [Observability](./observability) — Event-driven observability with the event client
-- [Tools](../tools/tools) — Learn about the isomorphic tool system
+- [OpenTelemetry](./otel) — emit traces and metrics via `otelMiddleware`- [Tools](../tools/tools) — Learn about the isomorphic tool system
 - [Agentic Cycle](../chat/agentic-cycle) — Understand the multi-step agent loop
 - [Streaming](../chat/streaming) — How streaming works in TanStack AI
diff --git a/docs/advanced/observability.md b/docs/advanced/observability.md
deleted file mode 100644
index a036edd59..000000000
--- a/docs/advanced/observability.md
+++ /dev/null
@@ -1,99 +0,0 @@
----
-title: Observability
-id: observability
-order: 4
-description: "Subscribe to TanStack AI events for observability and debugging — tool calls, streaming chunks, usage, and errors via the type-safe event client."
-keywords:
-  - tanstack ai
-  - observability
-  - event client
-  - telemetry
-  - debugging
-  - tracing
-  - devtools
----
-
-# Event client
-
-The `@tanstack/ai` package offers you an event client for observability and debugging purposes.
-It's a fully type-safe decoupled event-driven system that emits events whenever they are internally
-triggered and you can subscribe to those events for observability.
-
-> **Looking for quick diagnostic console output instead of a programmatic event stream?**
-> See [Debug Logging](./debug-logging) for turning on category-toggleable logging across every adapter and middleware hook.
-
-Because the same event client is used for both the TanStack Devtools system and observability locally it will work
-by subscribing to the event bus and emitting events to/from the event bus into the listeners by default. If you 
-want to subscribe to events in production as well you need to pass in a third argument to the `on` function,
-the `{ withEventTarget: true }` option.
-
-This will not only emit to the event bus (which is not present in production), but to the current eventTarget that
-you will be able to listen to. 
-
-## Event naming scheme
-
-Events follow the format `<system-part>:<what-it-does>`.
-
-- Text: `text:request:started`, `text:message:created`, `text:chunk:content`, `text:usage`
-- Tools: `tools:approval:requested`, `tools:call:completed`, `tools:result:added`
-- Summarize: `summarize:request:started`, `summarize:usage`
-- Image: `image:request:started`, `image:usage`
-- Speech: `speech:request:started`, `speech:usage`
-- Transcription: `transcription:request:started`, `transcription:usage`
-- Video: `video:request:started`, `video:usage`
-- Client: `client:created`, `client:loading:changed`, `client:messages:cleared`
-
-Every event includes all metadata available at the time of emission (model, provider,
-system prompts, request and message IDs, options, and tool names).
-
-## Server events
-
-There are both events that happen on the server and on the client, if you want to listen to either side you just need to
-subscribe on the server/client respectfully. 
-
-Here is an example for the server:
-```ts
-import { aiEventClient } from "@tanstack/ai-event-client";
-
-// server.ts file or wherever the root of your server is
-aiEventClient.on("text:request:started", e => {
-  // implement whatever you need to here
-})
-// rest of your server logic
-const app = new Server();
-app.get()
-```
-
-## Client events
-
-Listening on the client is the same approach, just subscribe to the events:
-
-```tsx
-// App.tsx
-import { aiEventClient } from "@tanstack/ai-event-client";
-
-const App = () => {
-  useEffect(() => {
-    const cleanup = aiEventClient.on("tools:call:updated", e => {
-      // do whatever you need to do
-    })
-    return cleanup;
-  },[])
-  return <div></div>
-}
-```
-
-## Reconstructing chat
-
-To rebuild a chat timeline from events, listen for:
-
-- `text:message:created` (full message content)
-- `text:message:user` (explicit user message events)
-- `text:chunk:*` (streaming content, tool calls, tool results, thinking)
-- `tools:*` (approvals, input availability, call completion)
-- `text:request:completed` (final completion + usage)
-
-This set is sufficient to replay the conversation end-to-end for observability and
-telemetry systems.
-
- 
\ No newline at end of file
diff --git a/docs/advanced/otel.md b/docs/advanced/otel.md
index c6bf683e0..cf5f3ac2c 100644
--- a/docs/advanced/otel.md
+++ b/docs/advanced/otel.md
@@ -167,5 +167,4 @@ otelMiddleware({
 ## Related
 
 - [Middleware](./middleware) — the lifecycle this middleware hooks into
-- [Debug Logging](./debug-logging) — quick console-output diagnostics, complementary to OTel
-- [Observability](./observability) — TanStack AI's built-in event client
+- [Debug Logging](./debug-logging) — quick console-output diagnostics, complementary to OTel
\ No newline at end of file
diff --git a/docs/chat/connection-adapters.md b/docs/chat/connection-adapters.md
index 62532170d..ae11db128 100644
--- a/docs/chat/connection-adapters.md
+++ b/docs/chat/connection-adapters.md
@@ -272,7 +272,8 @@ function websocketConnection(url: string): SubscribeConnectionAdapter {
   }
 
   ws.addEventListener("message", (event) => {
-    deliver(JSON.parse(event.data) as StreamChunk);
+    const chunk: StreamChunk = JSON.parse(event.data);
+    deliver(chunk);
   });
   ws.addEventListener("close", () => {
     closed = true;
@@ -391,7 +392,10 @@ const myAdapter: ConnectConnectionAdapter = {
       const lines = buffer.split("\n");
       buffer = lines.pop() ?? "";
       for (const line of lines) {
-        if (line.trim()) yield JSON.parse(line) as StreamChunk;
+        if (line.trim()) {
+          const chunk: StreamChunk = JSON.parse(line);
+          yield chunk;
+        }
       }
     }
   },
diff --git a/docs/chat/persistence.md b/docs/chat/persistence.md
index db877be0c..7fb5d3a89 100644
--- a/docs/chat/persistence.md
+++ b/docs/chat/persistence.md
@@ -101,7 +101,8 @@ const localStoragePersistence: ChatClientPersistence = {
   getItem: (id) => {
     const raw = window.localStorage.getItem(id);
     if (!raw) return null;
-    return (JSON.parse(raw) as Array<UIMessage>).map((message) => ({
+    const stored: Array<UIMessage> = JSON.parse(raw);
+    return stored.map((message) => ({
       ...message,
       createdAt:
         typeof message.createdAt === "string"
diff --git a/docs/code-mode/client-integration.md b/docs/code-mode/client-integration.md
index 1a4a69c15..ff16d96e9 100644
--- a/docs/code-mode/client-integration.md
+++ b/docs/code-mode/client-integration.md
@@ -211,8 +211,13 @@ function CodeExecutionPanel({
   );
 }
 
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
 function EventLine({ event }: { event: VMEvent }) {
-  const data = event.data as Record<string, unknown>;
+  if (!isRecord(event.data)) return null;
+  const data = event.data;
 
   switch (event.eventType) {
     case "code_mode:console":
diff --git a/docs/config.json b/docs/config.json
index c9ce782c5..50686e44e 100644
--- a/docs/config.json
+++ b/docs/config.json
@@ -207,10 +207,6 @@
         {
           "label": "OpenTelemetry",
           "to": "advanced/otel"
-        },
-        {
-          "label": "Observability",
-          "to": "advanced/observability"
         }
       ]
     },
diff --git a/docs/media/transcription.md b/docs/media/transcription.md
index fb6e9c66c..40747ed79 100644
--- a/docs/media/transcription.md
+++ b/docs/media/transcription.md
@@ -303,7 +303,10 @@ import { openaiTranscription } from '@tanstack/ai-openai'
 
 export async function POST(request: Request) {
   const formData = await request.formData()
-  const audioFile = formData.get('audio') as File
+  const audioFile = formData.get('audio')
+  if (!(audioFile instanceof File)) {
+    throw new Error('Expected an audio file under "audio"')
+  }
 
   const result = await generateTranscription({
     adapter: openaiTranscription('whisper-1'),
diff --git a/docs/protocol/chunk-definitions.md b/docs/protocol/chunk-definitions.md
deleted file mode 100644
index 10bb8f0e8..000000000
--- a/docs/protocol/chunk-definitions.md
+++ /dev/null
@@ -1,703 +0,0 @@
----
-title: AG-UI Event Definitions
-id: chunk-definitions
-description: "TanStack AI implements the AG-UI protocol — full event definitions, types, and streaming semantics for agent-to-UI communication."
-keywords:
-  - tanstack ai
-  - ag-ui
-  - ag-ui protocol
-  - events
-  - stream chunks
-  - streaming protocol
-  - agent protocol
----
-
-TanStack AI implements the [AG-UI (Agent-User Interaction) Protocol](https://docs.ag-ui.com/introduction), an open, lightweight, event-based protocol that standardizes how AI agents connect to user-facing applications.
-
-All streaming responses in TanStack AI consist of a series of **AG-UI Events** - discrete JSON objects representing different stages of the conversation lifecycle. These events enable real-time updates for content generation, tool calls, thinking/reasoning, and completion signals.
-
-## Base Structure
-
-All AG-UI events share a common base structure:
-
-```typescript
-interface BaseAGUIEvent {
-  type: AGUIEventType;
-  timestamp: number;      // Unix timestamp in milliseconds
-  model?: string;         // Model identifier (TanStack AI addition)
-  rawEvent?: unknown;     // Original provider event for debugging
-}
-```
-
-### AG-UI Event Types
-
-```typescript
-type AGUIEventType =
-  | 'RUN_STARTED'                 // Run lifecycle begins
-  | 'RUN_FINISHED'                // Run completed successfully
-  | 'RUN_ERROR'                   // Error occurred
-  | 'TEXT_MESSAGE_START'          // Text message begins
-  | 'TEXT_MESSAGE_CONTENT'        // Text content streaming
-  | 'TEXT_MESSAGE_END'            // Text message completes
-  | 'TOOL_CALL_START'             // Tool invocation begins
-  | 'TOOL_CALL_ARGS'              // Tool arguments streaming
-  | 'TOOL_CALL_END'               // Tool call completes
-  | 'TOOL_CALL_RESULT'            // Tool execution result
-  | 'STEP_STARTED'                // Thinking/reasoning step begins
-  | 'STEP_FINISHED'               // Thinking/reasoning step completes
-  | 'REASONING_START'             // Reasoning begins for a message
-  | 'REASONING_MESSAGE_START'     // Reasoning message begins
-  | 'REASONING_MESSAGE_CONTENT'   // Reasoning content streaming
-  | 'REASONING_MESSAGE_END'       // Reasoning message completes
-  | 'REASONING_END'               // Reasoning ends for a message
-  | 'REASONING_ENCRYPTED_VALUE'   // Encrypted reasoning payload
-  | 'MESSAGES_SNAPSHOT'           // Full conversation transcript snapshot
-  | 'STATE_SNAPSHOT'              // Full state synchronization
-  | 'STATE_DELTA'                 // Incremental state update
-  | 'CUSTOM';                     // Custom extensibility events
-```
-
-> The exported `EventType` enum (`@tanstack/ai`) carries a few additional
-> internal/transitional members (e.g. `TEXT_MESSAGE_CHUNK`, `TOOL_CALL_CHUNK`,
-> `REASONING_MESSAGE_CHUNK`, `THINKING_*`, `ACTIVITY_*`, `RAW`). The events above are the ones that appear
-> on the wire for a normal chat run.
-
-Only AG-UI event types are supported; previous legacy chunk formats are no longer accepted.
-
-## AG-UI Event Definitions
-
-### RUN_STARTED
-
-Emitted when a run begins. This is the first event in any streaming response.
-
-```typescript
-interface RunStartedEvent extends BaseAGUIEvent {
-  type: 'RUN_STARTED';
-  runId: string;           // Unique identifier for this run
-  threadId: string;        // Thread/conversation ID (required; always emitted)
-}
-```
-
-**Example:**
-```json
-{
-  "type": "RUN_STARTED",
-  "runId": "run_abc123",
-  "threadId": "thread_abc123",
-  "model": "gpt-5.5",
-  "timestamp": 1701234567890
-}
-```
-
----
-
-### RUN_FINISHED
-
-Emitted when a run completes successfully.
-
-> **AG-UI vs TanStack AI:** AG-UI's `RUN_FINISHED` event only defines
-> `threadId`, `runId`, and an optional `result`. The `finishReason` and `usage`
-> fields below are **TanStack AI extensions** — they ride along on the event
-> (AG-UI event schemas are `passthrough`) but are not part of the AG-UI protocol
-> itself. `usage` is typed as `TokenUsage`, defined by `@tanstack/ai` and
-> mirrored under the same name by `@tanstack/ai-event-client` for wire/devtools
-> consumers. (`@tanstack/ai` also exports `UsageTotals` as a deprecated alias of
-> `TokenUsage` for backward compatibility.)
-
-```typescript
-interface RunFinishedEvent extends BaseAGUIEvent {
-  type: 'RUN_FINISHED';
-  runId: string;
-  threadId: string;                                                           // required (inherited from AG-UI base)
-  finishReason?: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null; // TanStack AI addition
-  usage?: TokenUsage;                                                         // TanStack AI addition
-}
-
-// TanStack AI extension — not an AG-UI primitive.
-interface TokenUsage {
-  // Core token counts (always present when usage is available)
-  promptTokens: number;
-  completionTokens: number;
-  totalTokens: number;
-
-  // Detailed prompt token breakdown (provider-dependent)
-  promptTokensDetails?: {
-    cachedTokens?: number;       // Tokens read from cache (prompt cache hits)
-    cacheWriteTokens?: number;   // Tokens written to cache (Anthropic cache creation)
-    audioTokens?: number;        // Audio input tokens
-    videoTokens?: number;        // Video input tokens
-    imageTokens?: number;        // Image input tokens
-    textTokens?: number;         // Text input tokens
-    documentTokens?: number;     // Document input tokens (e.g. PDF inputs on Gemini)
-  };
-
-  // Detailed completion token breakdown (provider-dependent)
-  completionTokensDetails?: {
-    reasoningTokens?: number;    // Reasoning/thinking tokens (o1, Claude thinking)
-    audioTokens?: number;        // Audio output tokens
-    videoTokens?: number;        // Video output tokens
-    imageTokens?: number;        // Image output tokens
-    textTokens?: number;         // Text output tokens
-    documentTokens?: number;     // Document output tokens
-  };
-
-  // Duration in seconds for duration-billed models (e.g. Whisper transcription)
-  durationSeconds?: number;
-
-  // Provider-specific fields not covered by the standard schema (e.g. OpenRouter
-  // accepted/rejectedPredictionTokens, Anthropic serverToolUse)
-  providerUsageDetails?: Record<string, unknown>;
-
-  // Provider-reported cost, when available (e.g. OpenRouter)
-  cost?: number;
-  costDetails?: {
-    upstreamCost?: number;        // Total cost the gateway paid upstream
-    upstreamInputCost?: number;   // Upstream cost for input (prompt) tokens
-    upstreamOutputCost?: number;  // Upstream cost for output (completion) tokens
-  };
-}
-```
-
-**Example (basic usage):**
-```json
-{
-  "type": "RUN_FINISHED",
-  "runId": "run_abc123",
-  "model": "gpt-4o",
-  "timestamp": 1701234567900,
-  "finishReason": "stop",
-  "usage": {
-    "promptTokens": 100,
-    "completionTokens": 50,
-    "totalTokens": 150
-  }
-}
-```
-
-**Example (with cached tokens - OpenAI):**
-```json
-{
-  "type": "RUN_FINISHED",
-  "runId": "run_abc123",
-  "model": "gpt-4o",
-  "timestamp": 1701234567892,
-  "finishReason": "stop",
-  "usage": {
-    "promptTokens": 150,
-    "completionTokens": 75,
-    "totalTokens": 225,
-    "promptTokensDetails": {
-      "cachedTokens": 100
-    }
-  }
-}
-```
-
-**Example (with reasoning tokens - o1):**
-```json
-{
-  "type": "RUN_FINISHED",
-  "runId": "run_abc123",
-  "model": "o1-preview",
-  "timestamp": 1701234567892,
-  "finishReason": "stop",
-  "usage": {
-    "promptTokens": 150,
-    "completionTokens": 500,
-    "totalTokens": 650,
-    "completionTokensDetails": {
-      "reasoningTokens": 425
-    }
-  }
-}
-```
-
-**Example (Anthropic with cache):**
-```json
-{
-  "type": "RUN_FINISHED",
-  "runId": "run_abc123",
-  "model": "claude-3-5-sonnet",
-  "timestamp": 1701234567892,
-  "finishReason": "stop",
-  "usage": {
-    "promptTokens": 150,
-    "completionTokens": 75,
-    "totalTokens": 225,
-    "promptTokensDetails": {
-      "cacheWriteTokens": 50,
-      "cachedTokens": 100
-    }
-  }
-}
-```
-
-**Example (OpenRouter with cost):**
-```json
-{
-  "type": "RUN_FINISHED",
-  "runId": "run_abc123",
-  "model": "openai/gpt-4o",
-  "timestamp": 1701234567892,
-  "finishReason": "stop",
-  "usage": {
-    "promptTokens": 150,
-    "completionTokens": 75,
-    "totalTokens": 225,
-    "cost": 0.0012,
-    "costDetails": {
-      "upstreamInputCost": 0.0008,
-      "upstreamOutputCost": 0.0004
-    }
-  }
-}
-```
-
-**Token Usage Notes:**
-- `usage` is a TanStack AI extension to the AG-UI `RUN_FINISHED` event; all fields beyond the three core counts are optional and provider-dependent.
-- `promptTokensDetails.cachedTokens` - Tokens read from cache (OpenAI/Anthropic prompt caching)
-- `promptTokensDetails.cacheWriteTokens` - Tokens written to cache (Anthropic prompt caching)
-- `completionTokensDetails.reasoningTokens` - Internal reasoning tokens (o1, Claude thinking)
-- `durationSeconds` - Set by duration-billed models (e.g. Whisper transcription) instead of token counts
-- `providerUsageDetails` - Provider-specific fields not in the standard schema (e.g. OpenRouter's accepted/rejected prediction tokens, Anthropic's server tool use)
-- `cost` / `costDetails` - Provider-reported per-request cost, populated only by gateways that return it (e.g. OpenRouter)
-- For Gemini, modality-specific token counts (audio, video, image, text) are extracted from the response
-
-
----
-
-### RUN_ERROR
-
-Emitted when an error occurs during a run.
-
-> **Canonical vs deprecated shape.** The AG-UI-canonical form carries
-> `message` and `code` at the **top level** of the event. The nested `error`
-> object is a TanStack AI backward-compatibility alias and is `@deprecated`;
-> prefer reading the top-level fields. Note that the wire emitter
-> (`toServerSentEventsStream` / `toHttpStream`) still emits the nested `error`
-> form, so consumers should accept either until the alias is removed.
-
-```typescript
-interface RunErrorEvent extends BaseAGUIEvent {
-  type: 'RUN_ERROR';
-  message: string;     // Canonical (AG-UI)
-  code?: string;       // Canonical (AG-UI)
-  runId?: string;
-  /** @deprecated Use top-level `message`/`code`. Still emitted on the wire. */
-  error?: {
-    message: string;
-    code?: string;
-  };
-}
-```
-
-**Example (as emitted on the wire — nested `error`):**
-```json
-{
-  "type": "RUN_ERROR",
-  "model": "gpt-4o",
-  "timestamp": 1701234567890,
-  "error": {
-    "message": "Rate limit exceeded",
-    "code": "rate_limit"
-  }
-}
-```
-
----
-
-### TEXT_MESSAGE_START
-
-Emitted when a text message starts.
-
-```typescript
-interface TextMessageStartEvent extends BaseAGUIEvent {
-  type: 'TEXT_MESSAGE_START';
-  messageId: string;
-  role: 'assistant';
-}
-```
-
----
-
-### TEXT_MESSAGE_CONTENT
-
-Emitted when text content is generated (streaming tokens).
-
-```typescript
-interface TextMessageContentEvent extends BaseAGUIEvent {
-  type: 'TEXT_MESSAGE_CONTENT';
-  messageId: string;
-  delta: string;       // The incremental content token
-  content?: string;    // Full accumulated content so far
-}
-```
-
-**Example:**
-```json
-{
-  "type": "TEXT_MESSAGE_CONTENT",
-  "messageId": "msg_abc123",
-  "model": "gpt-4o",
-  "timestamp": 1701234567890,
-  "delta": "Hello",
-  "content": "Hello"
-}
-```
-
----
-
-### TEXT_MESSAGE_END
-
-Emitted when a text message completes.
-
-```typescript
-interface TextMessageEndEvent extends BaseAGUIEvent {
-  type: 'TEXT_MESSAGE_END';
-  messageId: string;
-}
-```
-
----
-
-### TOOL_CALL_START
-
-Emitted when a tool call starts.
-
-```typescript
-interface ToolCallStartEvent extends BaseAGUIEvent {
-  type: 'TOOL_CALL_START';
-  toolCallId: string;
-  toolCallName: string;  // Canonical (AG-UI)
-  /** @deprecated Use `toolCallName` instead. */
-  toolName: string;      // Deprecated alias, still emitted
-  index?: number;        // Index for parallel tool calls
-}
-```
-
----
-
-### TOOL_CALL_ARGS
-
-Emitted when tool call arguments are streaming.
-
-```typescript
-interface ToolCallArgsEvent extends BaseAGUIEvent {
-  type: 'TOOL_CALL_ARGS';
-  toolCallId: string;
-  delta: string;       // Incremental JSON arguments delta
-  args?: string;       // Full accumulated arguments so far
-}
-```
-
----
-
-### TOOL_CALL_END
-
-Emitted when a tool call completes.
-
-```typescript
-interface ToolCallEndEvent extends BaseAGUIEvent {
-  type: 'TOOL_CALL_END';
-  toolCallId: string;
-  toolCallName?: string;  // Canonical (AG-UI)
-  /** @deprecated Use `toolCallName` instead. */
-  toolName?: string;      // Deprecated alias
-  input?: unknown;        // Final parsed input arguments (TanStack AI internal)
-  result?: string | ContentPart[]; // Tool execution result (TanStack AI internal)
-}
-```
-
----
-
-### TOOL_CALL_RESULT
-
-Emitted when a tool's execution result is available. AG-UI carries this as a
-distinct event from `TOOL_CALL_END`: `TOOL_CALL_END` closes the call's
-argument stream, while `TOOL_CALL_RESULT` delivers the executed tool's output
-as a `tool`-role message.
-
-```typescript
-interface ToolCallResultEvent extends BaseAGUIEvent {
-  type: 'TOOL_CALL_RESULT';
-  messageId: string;   // ID of the resulting tool-role message
-  toolCallId: string;  // The tool call this result answers
-  content: string;     // Serialized tool result
-  role?: 'tool';
-}
-```
-
-**Example:**
-```json
-{
-  "type": "TOOL_CALL_RESULT",
-  "messageId": "msg_tool_1",
-  "toolCallId": "call_xyz",
-  "content": "{\"temperature\":72,\"conditions\":\"sunny\"}",
-  "timestamp": 1701234567894
-}
-```
-
----
-
-### STEP_STARTED
-
-Emitted when a thinking/reasoning step starts.
-
-```typescript
-interface StepStartedEvent extends BaseAGUIEvent {
-  type: 'STEP_STARTED';
-  stepName: string;    // Canonical (AG-UI)
-  /** @deprecated Use `stepName` instead. */
-  stepId?: string;     // Deprecated alias
-  stepType?: string;   // e.g., 'thinking', 'planning'
-}
-```
-
----
-
-### STEP_FINISHED
-
-Emitted when a thinking/reasoning step finishes.
-
-```typescript
-interface StepFinishedEvent extends BaseAGUIEvent {
-  type: 'STEP_FINISHED';
-  stepName: string;    // Canonical (AG-UI)
-  /** @deprecated Use `stepName` instead. */
-  stepId?: string;     // Deprecated alias
-  delta?: string;      // Incremental thinking content (TanStack AI internal)
-  content?: string;    // Full accumulated thinking content (TanStack AI internal)
-}
-```
-
----
-
-## Reasoning Events
-
-AG-UI defines a dedicated reasoning event family for thinking/reasoning models.
-**These `REASONING_MESSAGE_*` events are the AG-UI-canonical path for reasoning
-content.** During a transition period, adapters also emit `STEP_FINISHED` with
-the same thinking deltas as a backward-compatibility duplicate; the stream
-processor de-duplicates by ignoring `STEP_FINISHED` thinking deltas once it has
-seen reasoning events for a message (see
-`packages/ai/src/activities/chat/stream/processor.ts`). Prefer
-`REASONING_MESSAGE_*` in new consumers.
-
-All reasoning events extend `BaseAGUIEvent`. TanStack AI adds an optional
-`model?` field; the canonical fields come from `@ag-ui/core`.
-
-### REASONING_START
-
-Reasoning begins for a message.
-
-```typescript
-interface ReasoningStartEvent extends BaseAGUIEvent {
-  type: 'REASONING_START';
-  messageId: string;
-}
-```
-
-### REASONING_MESSAGE_START
-
-A reasoning message begins.
-
-```typescript
-interface ReasoningMessageStartEvent extends BaseAGUIEvent {
-  type: 'REASONING_MESSAGE_START';
-  messageId: string;
-  role: 'reasoning';
-}
-```
-
-### REASONING_MESSAGE_CONTENT
-
-Incremental reasoning content (streaming tokens).
-
-```typescript
-interface ReasoningMessageContentEvent extends BaseAGUIEvent {
-  type: 'REASONING_MESSAGE_CONTENT';
-  messageId: string;
-  delta: string;
-}
-```
-
-### REASONING_MESSAGE_END
-
-A reasoning message completes.
-
-```typescript
-interface ReasoningMessageEndEvent extends BaseAGUIEvent {
-  type: 'REASONING_MESSAGE_END';
-  messageId: string;
-}
-```
-
-### REASONING_END
-
-Reasoning ends for a message.
-
-```typescript
-interface ReasoningEndEvent extends BaseAGUIEvent {
-  type: 'REASONING_END';
-  messageId: string;
-}
-```
-
-### REASONING_ENCRYPTED_VALUE
-
-Carries an encrypted/opaque reasoning payload (e.g. provider-encrypted thinking
-that can be replayed but not read).
-
-```typescript
-interface ReasoningEncryptedValueEvent extends BaseAGUIEvent {
-  type: 'REASONING_ENCRYPTED_VALUE';
-  subtype: string;
-  entityId: string;
-  encryptedValue: string;
-}
-```
-
----
-
-## MESSAGES_SNAPSHOT
-
-Delivers a full snapshot of the conversation transcript. Unlike
-`STATE_SNAPSHOT` (which carries arbitrary application state),
-`MESSAGES_SNAPSHOT` specifically carries the message list.
-
-```typescript
-interface MessagesSnapshotEvent extends BaseAGUIEvent {
-  type: 'MESSAGES_SNAPSHOT';
-  messages: Message[];  // @ag-ui/core Message[] — use converters for UIMessage
-}
-```
-
----
-
-## Chunk Ordering and Relationships
-
-### Typical Flow
-
-1. **Content Generation:**
-   ```
-   RUN_STARTED
-   TEXT_MESSAGE_START
-   TEXT_MESSAGE_CONTENT (delta: "Hello")
-   TEXT_MESSAGE_CONTENT (delta: " world")
-   TEXT_MESSAGE_CONTENT (delta: "!")
-   TEXT_MESSAGE_END
-   RUN_FINISHED (finishReason: "stop")
-   ```
-
-2. **With Thinking:**
-   ```
-   RUN_STARTED
-   STEP_STARTED (stepType: "thinking")
-   STEP_FINISHED (delta: "I need to...")
-   STEP_FINISHED (delta: " check the weather")
-   TEXT_MESSAGE_START
-   TEXT_MESSAGE_CONTENT (delta: "Let me check")
-   TEXT_MESSAGE_END
-   RUN_FINISHED (finishReason: "stop")
-   ```
-
-3. **Tool Usage:**
-   ```
-   RUN_STARTED
-   TOOL_CALL_START (name: "get_weather")
-   TOOL_CALL_ARGS / TOOL_CALL_END (result: "{...}")
-   TEXT_MESSAGE_START
-   TEXT_MESSAGE_CONTENT (delta: "The weather is...")
-   TEXT_MESSAGE_END
-   RUN_FINISHED (finishReason: "stop")
-   ```
-
-4. **Client Tool with Approval:**
-   ```
-   RUN_STARTED
-   TOOL_CALL_START (name: "send_email")
-   TOOL_CALL_ARGS / TOOL_CALL_END
-   CUSTOM (name: "approval-requested")
-   [User approves]
-   [Client executes]
-   TEXT_MESSAGE_START
-   TEXT_MESSAGE_CONTENT (delta: "Email sent successfully")
-   TEXT_MESSAGE_END
-   RUN_FINISHED (finishReason: "stop")
-   ```
-
-### Multiple Tool Calls
-
-When the model calls multiple tools in parallel:
-
-```
-RUN_STARTED
-TOOL_CALL_START (index: 0, name: "get_weather")
-TOOL_CALL_START (index: 1, name: "get_time")
-TOOL_CALL_END (toolCallId: "call_1", result: "...")
-TOOL_CALL_END (toolCallId: "call_2", result: "...")
-TEXT_MESSAGE_START
-TEXT_MESSAGE_CONTENT (delta: "Based on the data...")
-TEXT_MESSAGE_END
-RUN_FINISHED (finishReason: "stop")
-```
-
----
-
-## TypeScript Union Type
-
-All chunks are represented as the AG-UI event union (`StreamChunk = AGUIEvent`):
-
-```typescript
-type StreamChunk =
-  | RunStartedEvent
-  | RunFinishedEvent
-  | RunErrorEvent
-  | TextMessageStartEvent
-  | TextMessageContentEvent
-  | TextMessageEndEvent
-  | ToolCallStartEvent
-  | ToolCallArgsEvent
-  | ToolCallEndEvent
-  | ToolCallResultEvent
-  | StepStartedEvent
-  | StepFinishedEvent
-  | MessagesSnapshotEvent
-  | StateSnapshotEvent
-  | StateDeltaEvent
-  | CustomEvent
-  | ReasoningStartEvent
-  | ReasoningMessageStartEvent
-  | ReasoningMessageContentEvent
-  | ReasoningMessageEndEvent
-  | ReasoningEndEvent
-  | ReasoningEncryptedValueEvent;
-```
-
-This enables type-safe handling in TypeScript:
-
-```typescript
-function handleChunk(chunk: StreamChunk) {
-  switch (chunk.type) {
-    case 'TEXT_MESSAGE_CONTENT':
-      console.log(chunk.delta);
-      break;
-    case 'STEP_FINISHED':
-      console.log(chunk.content);
-      break;
-    case 'TOOL_CALL_START':
-      console.log(chunk.toolCallName);
-      break;
-    // ... other cases
-  }
-}
-```
-
----
-
-## See Also
-
-- [SSE Protocol](./sse-protocol) - How chunks are transmitted via Server-Sent Events
-- [HTTP Stream Protocol](./http-stream-protocol) - How chunks are transmitted via HTTP streaming
-- [Connection Adapters Guide](../chat/connection-adapters) - Client implementation
diff --git a/docs/protocol/http-stream-protocol.md b/docs/protocol/http-stream-protocol.md
deleted file mode 100644
index 1d2e4e90a..000000000
--- a/docs/protocol/http-stream-protocol.md
+++ /dev/null
@@ -1,438 +0,0 @@
----
-title: HTTP Stream Protocol
-id: http-stream-protocol
-description: "TanStack AI's HTTP streaming protocol spec using newline-delimited JSON (NDJSON) — an alternative to SSE for simpler line-based transport."
-keywords:
-  - tanstack ai
-  - http stream
-  - ndjson
-  - newline-delimited json
-  - streaming protocol
-  - protocol spec
----
-
-HTTP streaming with newline-delimited JSON (NDJSON) is a simpler protocol than SSE that sends one JSON object per line. It's useful when:
-
-- SSE event prefixes add unwanted overhead
-- You need more control over the streaming format
-- Working in environments that don't support SSE well
-- Building custom protocols on top of the stream
-
-This protocol is **less common** than SSE for TanStack AI applications, but supported for flexibility.
-
-This document describes how TanStack AI transmits [AG-UI events](./chunk-definitions) over raw HTTP streaming (newline-delimited JSON), an alternative to Server-Sent Events.
-
----
-
-## Protocol Specification
-
-### HTTP Request
-
-**Method:** `POST`
-
-**Headers:**
-```http
-Content-Type: application/json
-```
-
-**Body:** The current `@tanstack/ai-client` POSTs an AG-UI `RunAgentInput` object — `threadId`, `runId`, `messages`, `tools`, `forwardedProps`, etc. The legacy `data` field is still emitted alongside `forwardedProps` as a deprecation bridge. See [Migrating to AG-UI Client-to-Server Compliance](../migration/ag-ui-compliance) for the full wire shape.
-
-```json
-{
-  "threadId": "thread-abc",
-  "runId": "run-123",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Hello, how are you?"
-    }
-  ],
-  "tools": [],
-  "forwardedProps": {
-    // Optional client-supplied options
-  }
-}
-```
-
-### HTTP Response
-
-**Status:** `200 OK`
-
-**Headers:**
-```http
-Content-Type: application/x-ndjson
-Transfer-Encoding: chunked
-```
-
-Or alternatively:
-```http
-Content-Type: application/json
-Transfer-Encoding: chunked
-```
-
-**Body:** Stream of newline-delimited JSON, one [AG-UI event](./chunk-definitions) per line
-
----
-
-## Stream Format
-
-Each [AG-UI event](./chunk-definitions) is transmitted as a single line of JSON followed by a newline (`\n`):
-
-```
-{JSON_ENCODED_EVENT}\n
-```
-
-### Key Points
-
-1. **One JSON object per line**
-2. **Each line ends with `\n`**
-3. **No prefixes** (unlike SSE's `data:` prefix)
-4. **No blank lines between events** (unlike SSE's `\n\n`)
-5. **Stream ends when connection closes** (no `[DONE]` marker — `RUN_FINISHED` is the terminal event)
-
-### Examples
-
-#### Text Content
-
-```json
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" world","timestamp":1701234567891}
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"!","timestamp":1701234567892}
-```
-
-#### Tool Call
-
-A tool call streams as `TOOL_CALL_START` → `TOOL_CALL_ARGS` → `TOOL_CALL_END`, optionally followed by `TOOL_CALL_RESULT`:
-
-```json
-{"type":"TOOL_CALL_START","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567893}
-{"type":"TOOL_CALL_ARGS","toolCallId":"call_xyz","delta":"{\"location\":\"SF\"}","timestamp":1701234567894}
-{"type":"TOOL_CALL_END","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567895}
-{"type":"TOOL_CALL_RESULT","messageId":"msg_2","toolCallId":"call_xyz","content":"{\"temperature\":72,\"conditions\":\"sunny\"}","timestamp":1701234567896}
-```
-
-#### Run Completion
-
-```json
-{"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567897,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":15,"totalTokens":25}}
-```
-
----
-
-## Stream Lifecycle
-
-### 1. Client Initiates Connection
-
-```typescript
-const response = await fetch('/api/chat', {
-  method: 'POST',
-  headers: {
-    'Content-Type': 'application/json',
-  },
-  body: JSON.stringify({ messages }),
-});
-```
-
-### 2. Server Sends Response Header
-
-```http
-HTTP/1.1 200 OK
-Content-Type: application/x-ndjson
-Transfer-Encoding: chunked
-```
-
-### 3. Server Streams Chunks
-
-The server sends newline-delimited JSON:
-
-```json
-{"type":"RUN_STARTED","runId":"run_123","timestamp":1701234567889}
-{"type":"TEXT_MESSAGE_START","messageId":"msg_1","role":"assistant","timestamp":1701234567890}
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"The","timestamp":1701234567890}
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" weather is sunny","timestamp":1701234567891}
-{"type":"TEXT_MESSAGE_END","messageId":"msg_1","timestamp":1701234567893}
-{"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567894,"finishReason":"stop"}
-```
-
-### 4. Stream Completion
-
-`RUN_FINISHED` is the terminal event of a successful run; the server then closes the connection. No special marker is sent (neither transport uses a `[DONE]` sentinel).
-
----
-
-## Error Handling
-
-### Server-Side Errors
-
-If an error occurs during generation, TanStack AI's HTTP-stream helpers emit a `RUN_ERROR` event, then close the connection:
-
-```json
-{"type":"RUN_ERROR","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}
-```
-
-> **Canonical shape.** The AG-UI-canonical form carries `message` and `code` at the top level of the event. The wire emitter still nests them under `error` (shown above) as a backward-compatibility bridge; new consumers should prefer the top-level fields. See [Chunk Definitions → RUN_ERROR](./chunk-definitions#run_error).
-
-### Connection Errors
-
-Unlike SSE, HTTP streaming does not provide automatic reconnection:
-- Client must detect connection drops
-- Client must implement retry logic
-- Use exponential backoff for retries
-
----
-
-## Implementation
-
-### Server-Side (Node.js/TypeScript)
-
-#### Using TanStack AI
-
-TanStack AI provides built-in NDJSON helpers — `toHttpResponse(stream, init?)` returns a ready-to-return `Response`, and `toHttpStream(stream, abortController?)` returns the raw `ReadableStream` if you need to set your own headers or wrap it. Both are exported from `@tanstack/ai`, emit one AG-UI event per line, close the connection when the stream ends (`RUN_FINISHED` is terminal), and emit a `RUN_ERROR` event on a thrown error.
-
-```typescript
-import { chat, toHttpResponse } from '@tanstack/ai';
-import { openaiText } from '@tanstack/ai-openai';
-
-export async function POST(request: Request) {
-  const { messages } = await request.json();
-
-  const stream = chat({
-    adapter: openaiText('gpt-5.5'),
-    messages,
-  });
-
-  // Emits newline-delimited AG-UI events; sets NDJSON-friendly defaults.
-  return toHttpResponse(stream);
-}
-```
-
-If you need the raw stream (e.g. to add custom headers), use `toHttpStream`:
-
-```typescript
-import { chat, toHttpStream } from '@tanstack/ai';
-import { openaiText } from '@tanstack/ai-openai';
-
-export async function POST(request: Request) {
-  const { messages } = await request.json();
-  const abortController = new AbortController();
-
-  const stream = chat({ adapter: openaiText('gpt-5.5'), messages });
-
-  return new Response(toHttpStream(stream, abortController), {
-    headers: { 'Content-Type': 'application/x-ndjson' },
-  });
-}
-```
-
-#### Using Express.js
-
-```typescript
-import express from 'express';
-import { chat } from '@tanstack/ai';
-import { openaiText } from '@tanstack/ai-openai';
-
-const app = express();
-app.use(express.json());
-
-app.post('/api/chat', async (req, res) => {
-  const { messages } = req.body;
-
-  res.setHeader('Content-Type', 'application/x-ndjson');
-  res.setHeader('Cache-Control', 'no-cache');
-  res.setHeader('Transfer-Encoding', 'chunked');
-
-  try {
-    const stream = chat({
-      adapter: openaiText('gpt-5.5'),
-      messages,
-    });
-
-    for await (const chunk of stream) {
-      res.write(JSON.stringify(chunk) + '\n');
-    }
-  } catch (error: any) {
-    const errorEvent = {
-      type: 'RUN_ERROR',
-      timestamp: Date.now(),
-      error: { message: error.message },
-    };
-    res.write(JSON.stringify(errorEvent) + '\n');
-  } finally {
-    res.end();
-  }
-});
-```
-
-### Client-Side (Browser/Node.js)
-
-TanStack AI provides `fetchHttpStream()` connection adapter:
-
-```typescript
-import { useChat, fetchHttpStream } from '@tanstack/ai-react';
-
-const { messages, sendMessage } = useChat({
-  connection: fetchHttpStream('/api/chat'),
-});
-```
-
-**What `fetchHttpStream()` does:**
-1. Makes a POST request with the AG-UI `RunAgentInput` body
-2. Reads the response body as a stream
-3. Splits by newlines
-4. Parses each line as JSON
-5. Yields `StreamChunk` (AG-UI event) objects
-
-### Manual Implementation (Advanced)
-
-#### Client
-```typescript
-const response = await fetch('/api/chat', {
-  method: 'POST',
-  headers: { 'Content-Type': 'application/json' },
-  body: JSON.stringify({ messages }),
-});
-
-const reader = response.body!.getReader();
-const decoder = new TextDecoder();
-let buffer = '';
-
-while (true) {
-  const { done, value } = await reader.read();
-  if (done) break;
-  
-  buffer += decoder.decode(value, { stream: true });
-  const lines = buffer.split('\n');
-  
-  // Keep incomplete line in buffer
-  buffer = lines.pop() || '';
-  
-  for (const line of lines) {
-    if (line.trim()) {
-      try {
-        const chunk = JSON.parse(line);
-        // Handle chunk...
-        console.log(chunk);
-      } catch (error) {
-        console.warn('Failed to parse chunk:', line);
-      }
-    }
-  }
-}
-
-// Process any remaining data in buffer
-if (buffer.trim()) {
-  try {
-    const chunk = JSON.parse(buffer);
-    console.log(chunk);
-  } catch (error) {
-    console.warn('Failed to parse final chunk:', buffer);
-  }
-}
-```
-
----
-
-## Comparison: HTTP Stream vs SSE
-
-| Feature | HTTP Stream (NDJSON) | Server-Sent Events (SSE) |
-|---------|---------------------|--------------------------|
-| Format | `{json}\n` | `data: {json}\n\n` |
-| Overhead | Lower (no prefixes) | Higher (`data:` prefix) |
-| Auto-reconnect | ❌ No | ✅ Yes |
-| Browser API | ❌ No (manual) | ✅ Yes (EventSource) |
-| Completion marker | ❌ No (close connection after `RUN_FINISHED`) | ❌ No (close connection after `RUN_FINISHED`) |
-| Debugging | Easy (plain JSON lines) | Easy (plain text) |
-| Use case | Custom protocols, lower overhead | Standard streaming, reconnection needed |
-
-**Recommendation:** Use SSE (`fetchServerSentEvents`) for most applications. Use HTTP streaming when you need lower overhead or have specific protocol requirements.
-
----
-
-## Debugging
-
-### Inspecting HTTP Stream Traffic
-
-**Browser DevTools:**
-1. Open Network tab
-2. Look for POST request to `/api/chat`
-3. View response as it streams in
-
-**cURL:**
-```bash
-curl -N -X POST http://localhost:3000/api/chat \
-  -H "Content-Type: application/json" \
-  -d '{"messages":[{"role":"user","content":"Hello"}]}'
-```
-
-The `-N` flag disables buffering to see real-time output.
-
-**Example Output:**
-```json
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}
-{"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" there","timestamp":1701234567891}
-{"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567892,"finishReason":"stop"}
-```
-
-### Validating NDJSON
-
-Each line must be valid JSON. Test with:
-
-```bash
-# Validate each line
-curl -N http://localhost:3000/api/chat | while read line; do
-  echo "$line" | jq . > /dev/null || echo "Invalid JSON: $line"
-done
-```
-
----
-
-## Advantages of HTTP Streaming
-
-1. **Lower Overhead** - No `data:` prefixes or double newlines
-2. **Simpler Protocol** - Just JSON + newline
-3. **Flexible** - Easy to extend or modify
-4. **Standard Format** - NDJSON is widely used
-
----
-
-## Disadvantages vs SSE
-
-1. **No Auto-Reconnect** - Must implement manually
-2. **No Browser API** - Can't use EventSource
-3. **Less Common** - SSE is more standard for streaming
-
-(Both transports rely on connection close after `RUN_FINISHED`; neither uses a `[DONE]` marker.)
-
----
-
-## Best Practices
-
-1. **Use `\n` consistently** - Don't mix `\r\n` and `\n`
-2. **Set proper Content-Type** - Use `application/x-ndjson` or `application/json`
-3. **Handle partial lines** - Buffer incomplete data
-4. **Validate JSON** - Catch parsing errors gracefully
-5. **Flush regularly** - Don't buffer chunks server-side
-6. **Implement retry logic** - Client should handle connection drops
-
----
-
-## Alternative: JSON Lines (.jsonl)
-
-HTTP streaming in TanStack AI follows the [JSON Lines](http://jsonlines.org/) specification (also called NDJSON):
-
-- One JSON value per line
-- Each line is terminated with `\n`
-- UTF-8 encoding
-- File extension: `.jsonl` or `.ndjson`
-
-This makes streams compatible with standard NDJSON tools and libraries.
-
----
-
-## See Also
-
-- [Chunk Definitions](./chunk-definitions) - StreamChunk type reference
-- [SSE Protocol](./sse-protocol) - Recommended protocol (with auto-reconnect)
-- [Connection Adapters Guide](../chat/connection-adapters) - Client implementation
-- [JSON Lines Specification](http://jsonlines.org/)
-- [NDJSON Specification](http://ndjson.org/)
diff --git a/docs/protocol/sse-protocol.md b/docs/protocol/sse-protocol.md
deleted file mode 100644
index e8f1c83aa..000000000
--- a/docs/protocol/sse-protocol.md
+++ /dev/null
@@ -1,370 +0,0 @@
----
-title: Server-Sent Events (SSE) Protocol
-id: sse-protocol
-description: "TanStack AI's Server-Sent Events protocol spec — the recommended streaming transport for chat and media generations, with auto-reconnection."
-keywords:
-  - tanstack ai
-  - sse
-  - server-sent events
-  - streaming protocol
-  - protocol spec
-  - eventsource
----
-
-Server-Sent Events (SSE) is a standard HTTP-based protocol for server-to-client streaming. It provides:
-
-- ✅ **Automatic reconnection** - Browser handles connection drops
-- ✅ **Event-driven** - Native browser EventSource API
-- ✅ **Simple protocol** - Text-based, easy to debug
-- ✅ **Wide support** - Works in all modern browsers
-- ✅ **Efficient** - Single long-lived HTTP connection
-
-This document describes how TanStack AI transmits [AG-UI events](./chunk-definitions) over Server-Sent Events (SSE), the recommended protocol for most use cases.
-
-## Protocol Specification
-
-### HTTP Request
-
-**Method:** `POST`
-
-**Headers:**
-```http
-Content-Type: application/json
-```
-
-**Body:** The current `@tanstack/ai-client` POSTs an AG-UI `RunAgentInput` object — `threadId`, `runId`, `messages`, `tools`, `forwardedProps`, etc. The legacy `data` field is still emitted alongside `forwardedProps` as a deprecation bridge. See [Migrating to AG-UI Client-to-Server Compliance](../migration/ag-ui-compliance) for the full wire shape and migration tiers.
-
-```json
-{
-  "threadId": "thread-abc",
-  "runId": "run-123",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Hello, how are you?"
-    }
-  ],
-  "tools": [],
-  "forwardedProps": {
-    // Optional client-supplied options
-  }
-}
-```
-
-### HTTP Response
-
-**Status:** `200 OK`
-
-**Headers:**
-```http
-Content-Type: text/event-stream
-Cache-Control: no-cache
-Connection: keep-alive
-```
-
-**Body:** Stream of SSE events — each event is a single [AG-UI event](./chunk-definitions) JSON object.
-
----
-
-## SSE Format
-
-Each [AG-UI event](./chunk-definitions) is transmitted as an SSE event with the following format:
-
-```
-data: {JSON_ENCODED_EVENT}\n\n
-```
-
-### Key Points
-
-1. **Each event starts with `data: `**
-2. **Followed by the JSON-encoded AG-UI event**
-3. **Ends with double newline `\n\n`**
-4. **No event names or IDs** (not required for our use case)
-
-### Examples
-
-#### Text Content
-
-```
-data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}\n\n
-```
-
-#### Tool Call
-
-A tool call streams as a `TOOL_CALL_START` → `TOOL_CALL_ARGS` → `TOOL_CALL_END` sequence, optionally followed by a `TOOL_CALL_RESULT` once the tool runs:
-
-```
-data: {"type":"TOOL_CALL_START","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567891}\n\n
-data: {"type":"TOOL_CALL_ARGS","toolCallId":"call_xyz","delta":"{\"location\":\"SF\"}","timestamp":1701234567892}\n\n
-data: {"type":"TOOL_CALL_END","toolCallId":"call_xyz","toolCallName":"get_weather","timestamp":1701234567893}\n\n
-data: {"type":"TOOL_CALL_RESULT","messageId":"msg_2","toolCallId":"call_xyz","content":"{\"temperature\":72,\"conditions\":\"sunny\"}","timestamp":1701234567894}\n\n
-```
-
-#### Run Completion
-
-`RUN_FINISHED` is the terminal event of a successful run:
-
-```
-data: {"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567895,"finishReason":"stop","usage":{"promptTokens":10,"completionTokens":5,"totalTokens":15}}\n\n
-```
-
----
-
-## Stream Lifecycle
-
-### 1. Client Initiates Connection
-
-```typescript
-// Client code
-const response = await fetch('/api/chat', {
-  method: 'POST',
-  headers: {
-    'Content-Type': 'application/json',
-  },
-  body: JSON.stringify({ messages }),
-});
-```
-
-### 2. Server Sends Response Header
-
-```http
-HTTP/1.1 200 OK
-Content-Type: text/event-stream
-Cache-Control: no-cache
-Connection: keep-alive
-```
-
-### 3. Server Streams Events
-
-The server sends multiple `data:` events as the run progresses:
-
-```
-data: {"type":"RUN_STARTED","runId":"run_123","timestamp":1701234567889}\n\n
-data: {"type":"TEXT_MESSAGE_START","messageId":"msg_1","role":"assistant","timestamp":1701234567890}\n\n
-data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"The","timestamp":1701234567890}\n\n
-data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" weather","timestamp":1701234567891}\n\n
-data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" is sunny","timestamp":1701234567892}\n\n
-data: {"type":"TEXT_MESSAGE_END","messageId":"msg_1","timestamp":1701234567893}\n\n
-data: {"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567894,"finishReason":"stop"}\n\n
-```
-
-### 4. Stream Completion
-
-`RUN_FINISHED` is the terminal event of a successful run. There is **no** `[DONE]` sentinel — after `RUN_FINISHED` the server simply closes the connection, and the client treats connection close as end-of-stream.
-
----
-
-## Error Handling
-
-### Server-Side Errors
-
-If an error occurs during generation, TanStack AI's SSE helpers emit a `RUN_ERROR` event, then close the connection:
-
-```
-data: {"type":"RUN_ERROR","timestamp":1701234567895,"error":{"message":"Rate limit exceeded","code":"rate_limit_exceeded"}}\n\n
-```
-
-> **Canonical shape.** The AG-UI-canonical form carries `message` and `code` at the top level of the event. The wire emitter still nests them under `error` (shown above) as a backward-compatibility bridge; new consumers should prefer the top-level fields. See [Chunk Definitions → RUN_ERROR](./chunk-definitions#run_error).
-
-### Connection Errors
-
-SSE provides automatic reconnection:
-- Browser automatically reconnects on connection drop
-- Server can send `retry:` field to control reconnection delay
-- Client can handle `error` events from EventSource
-
----
-
-## Implementation
-
-### Server-Side (Node.js/TypeScript)
-
-TanStack AI provides `toServerSentEventsStream()` and `toServerSentEventsResponse()` utilities:
-
-```typescript
-import { chat, toServerSentEventsResponse } from '@tanstack/ai';
-import { openaiText } from '@tanstack/ai-openai';
-
-export async function POST(request: Request) {
-  const { messages } = await request.json();
-
-  const stream = chat({
-    adapter: openaiText('gpt-5.5'),
-    messages,
-  });
-
-  // Automatically converts StreamChunks to SSE format
-  return toServerSentEventsResponse(stream);
-}
-```
-
-**What `toServerSentEventsResponse()` does:**
-1. Creates a `ReadableStream` from the async iterable
-2. Wraps each AG-UI event as `data: {JSON}\n\n`
-3. On the stream ending, closes the connection (no `[DONE]` marker — `RUN_FINISHED` is the terminal event)
-4. Sets proper SSE headers
-5. On a thrown error, emits a `RUN_ERROR` event and closes the connection
-
-### Client-Side (Browser/Node.js)
-
-TanStack AI provides `fetchServerSentEvents()` connection adapter:
-
-```typescript
-import { useChat, fetchServerSentEvents } from '@tanstack/ai-react';
-
-const { messages, sendMessage } = useChat({
-  connection: fetchServerSentEvents('/api/chat'),
-});
-```
-
-**What `fetchServerSentEvents()` does:**
-1. Makes a POST request with the AG-UI `RunAgentInput` body
-2. Reads the response body as a stream
-3. Parses SSE format (`data:` prefix)
-4. Deserializes each line into an AG-UI event
-5. Yields `StreamChunk` (AG-UI event) objects
-6. Ends when the connection closes (after `RUN_FINISHED`)
-
-### Manual Implementation (Advanced)
-
-If you need custom handling:
-
-#### Server
-```typescript
-export async function POST(request: Request) {
-  const { messages } = await request.json();
-  const encoder = new TextEncoder();
-
-  const stream = new ReadableStream({
-    async start(controller) {
-      try {
-        for await (const chunk of chat({ adapter: openaiText('gpt-5.5'), messages })) {
-          const sseData = `data: ${JSON.stringify(chunk)}\n\n`;
-          controller.enqueue(encoder.encode(sseData));
-        }
-        // No [DONE] marker — the stream's RUN_FINISHED event is terminal.
-        controller.close();
-      } catch (error) {
-        const errorEvent = {
-          type: 'RUN_ERROR',
-          timestamp: Date.now(),
-          error: { message: (error as Error).message },
-        };
-        controller.enqueue(encoder.encode(`data: ${JSON.stringify(errorEvent)}\n\n`));
-        controller.close();
-      }
-    }
-  });
-
-  return new Response(stream, {
-    headers: {
-      'Content-Type': 'text/event-stream',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-    },
-  });
-}
-```
-
-#### Client
-```typescript
-const response = await fetch('/api/chat', {
-  method: 'POST',
-  headers: { 'Content-Type': 'application/json' },
-  body: JSON.stringify({ messages }),
-});
-
-const reader = response.body!.getReader();
-const decoder = new TextDecoder();
-let buffer = '';
-
-while (true) {
-  const { done, value } = await reader.read();
-  if (done) break;
-  
-  buffer += decoder.decode(value, { stream: true });
-  const lines = buffer.split('\n');
-  buffer = lines.pop() || '';
-  
-  for (const line of lines) {
-    if (line.startsWith('data: ')) {
-      const data = line.slice(6);
-
-      const event = JSON.parse(data);
-      // Handle the AG-UI event...
-      // (RUN_FINISHED signals the run is complete; the stream ends on close)
-    }
-  }
-}
-```
-
----
-
-## Debugging
-
-### Inspecting SSE Traffic
-
-**Browser DevTools:**
-1. Open Network tab
-2. Look for requests with `text/event-stream` type
-3. View response as it streams in
-
-**cURL:**
-```bash
-curl -N -X POST http://localhost:3000/api/chat \
-  -H "Content-Type: application/json" \
-  -d '{"messages":[{"role":"user","content":"Hello"}]}'
-```
-
-The `-N` flag disables buffering to see real-time output.
-
-**Example Output:**
-```
-data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":"Hello","timestamp":1701234567890}
-
-data: {"type":"TEXT_MESSAGE_CONTENT","messageId":"msg_1","delta":" there","timestamp":1701234567891}
-
-data: {"type":"RUN_FINISHED","runId":"run_123","timestamp":1701234567892,"finishReason":"stop"}
-```
-
-The connection closes after `RUN_FINISHED` — there is no `[DONE]` line.
-
----
-
-## Advantages of SSE
-
-1. **Built-in Reconnection** - Browser handles connection drops automatically
-2. **Simpler than WebSocket** - No handshake, just HTTP
-3. **Server-to-Client Only** - Matches chat streaming use case perfectly
-4. **Wide Browser Support** - Works everywhere (except IE11)
-5. **Proxy-Friendly** - Works through most HTTP proxies
-6. **Easy to Debug** - Plain text format, visible in DevTools
-
----
-
-## Limitations
-
-1. **One-Way Communication** - Server to client only (fine for streaming responses)
-2. **HTTP/1.1 Connection Limits** - Browsers limit concurrent connections per domain (6-8)
-3. **No Binary Data** - Text-only (not an issue for JSON chunks)
-4. **HTTP/2 Streams** - Can be more efficient but SSE works fine
-
----
-
-## Best Practices
-
-1. **Always set proper headers** - `Content-Type`, `Cache-Control`, `Connection`
-2. **Treat `RUN_FINISHED` as terminal** - There is no `[DONE]` marker; close the connection after it
-3. **Handle errors gracefully** - Emit a `RUN_ERROR` event before closing
-4. **Use compression** - Enable gzip/brotli at the reverse proxy level
-5. **Set timeouts** - Prevent hanging connections
-6. **Monitor connection count** - Watch for connection leaks
-
----
-
-## See Also
-
-- [Chunk Definitions](./chunk-definitions) - StreamChunk type reference
-- [HTTP Stream Protocol](./http-stream-protocol) - Alternative protocol
-- [Connection Adapters Guide](../chat/connection-adapters) - Client implementation
-- [MDN: Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events)
diff --git a/docs/structured-outputs/multi-turn.md b/docs/structured-outputs/multi-turn.md
index f8719be82..8e4ce55ff 100644
--- a/docs/structured-outputs/multi-turn.md
+++ b/docs/structured-outputs/multi-turn.md
@@ -161,13 +161,13 @@ function RecipeCard({ part }: { part: RecipePart }) {
   // `data` is `Recipe` once status === 'complete'. `partial` is
   // DeepPartial<Recipe> while the model is still streaming the JSON.
   // Read whichever is freshest — they converge on complete.
-  const recipe = part.data ?? part.partial ?? ({} as Partial<Recipe>);
+  const recipe = part.data ?? part.partial;
 
   return (
     <article>
-      <h3>{recipe.title ?? "Plating up…"}</h3>
-      {recipe.cuisine && <p>{recipe.cuisine}</p>}
-      {recipe.ingredients?.map((ing, i) => (
+      <h3>{recipe?.title ?? "Plating up…"}</h3>
+      {recipe?.cuisine && <p>{recipe?.cuisine}</p>}
+      {recipe?.ingredients?.map((ing, i) => (
         <li key={i}>
           {ing?.amount} {ing?.item}
         </li>
diff --git a/docs/structured-outputs/one-shot.md b/docs/structured-outputs/one-shot.md
index 8c63b7896..cbb4fea3d 100644
--- a/docs/structured-outputs/one-shot.md
+++ b/docs/structured-outputs/one-shot.md
@@ -150,8 +150,8 @@ const result = await chat({
   outputSchema: schema,
 });
 
-// Result is `unknown` — narrow before use.
-const person = result as { name: string; age: number };
+// `result` is `unknown` — a raw JSON Schema gives no compile-time type.
+// Validate it (e.g. with a Standard Schema library) before use.
 ```
 
 Prefer a schema library when you can — type inference is worth it.
@@ -204,7 +204,7 @@ const res = await fetch("/api/extract-person", {
   method: "POST",
   body: JSON.stringify({ text }),
 });
-const person = (await res.json()) as z.infer<typeof PersonSchema>;
+const person = PersonSchema.parse(await res.json()); // validated + typed
 ```
 
 This is the most literal one-shot shape: one request, one object back. You own the fetch and the typing; the hook isn't involved.
diff --git a/docs/tools/server-tools.md b/docs/tools/server-tools.md
index 0349d3299..c7d45059d 100644
--- a/docs/tools/server-tools.md
+++ b/docs/tools/server-tools.md
@@ -359,10 +359,12 @@ const getUserDataDef = toolDefinition({
   outputSchema,
 });
 
-// When using JSON Schema, args is typed as `unknown` — narrow or cast before use
+// With a raw JSON Schema, args is typed as `unknown` — narrow it before use
 const getUserData = getUserDataDef.server(async (args) => {
-  const { userId } = args as { userId: string };
-  const user = await db.users.findUnique({ where: { id: userId } });
+  if (typeof args !== "object" || args === null || !("userId" in args)) {
+    throw new Error("Invalid input: expected a userId");
+  }
+  const user = await db.users.findUnique({ where: { id: String(args.userId) } });
   return { name: user.name, email: user.email };
 });
 ```
diff --git a/docs/tools/tool-architecture.md b/docs/tools/tool-architecture.md
index b4002bea4..399d7aa5a 100644
--- a/docs/tools/tool-architecture.md
+++ b/docs/tools/tool-architecture.md
@@ -424,5 +424,5 @@ All execute simultaneously, then LLM generates comparison.
 - [Server Tools](./server-tools) - Deep dive into server-side tools
 - [Client Tools](./client-tools) - Deep dive into client-side tools
 - [Tool Approval Flow](./tool-approval) - Implementing approval workflows
-- [Stream Chunk Definitions](../protocol/chunk-definitions) - Understanding the streaming protocol
+- [AG-UI protocol](https://docs.ag-ui.com/introduction) - Understanding the streaming protocol
 
diff --git a/docs/tools/tools.md b/docs/tools/tools.md
index f4aa270f5..356a40e95 100644
--- a/docs/tools/tools.md
+++ b/docs/tools/tools.md
@@ -172,11 +172,16 @@ const getWeatherDef = toolDefinition({
   outputSchema,
 });
 
-// Create server implementation (args is typed as `unknown` with JSON Schema)
+// With a raw JSON Schema, `args` is `unknown` — narrow it before use
+// (prefer a Zod schema for automatic typing).
 const getWeatherServer = getWeatherDef.server(async (args) => {
-  const { location, unit } = args as { location: string; unit?: string };
+  if (typeof args !== "object" || args === null || !("location" in args)) {
+    throw new Error("Invalid input: expected a location");
+  }
+  const location = String(args.location);
+  const unit = "unit" in args ? String(args.unit) : "fahrenheit";
   const response = await fetch(
-    `https://api.weather.com/v1/current?location=${location}&unit=${unit || "fahrenheit"}`
+    `https://api.weather.com/v1/current?location=${location}&unit=${unit}`
   );
   return await response.json();
 });

From c07bea19818860068fadb40212a5c8a7c904f4bf Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Wed, 3 Jun 2026 14:08:38 +0200
Subject: [PATCH 7/7] docs: latest model in built-in-middleware + concrete
 structured-output transform
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- built-in-middleware.md: gpt-4o -> gpt-5.5 in the examples.
- middleware.md: make the "Transforming structured-output chunks" example
  self-contained — redact SSNs inline in the streaming JSON delta instead of
  calling an undefined `redact()` helper.

(The docs conventions — no casts, latest models, show both sides — already
live in the project CLAUDE.md / AGENTS.md; the earlier global-CLAUDE.md
addition has been reverted.)
---
 docs/advanced/built-in-middleware.md |  6 +++---
 docs/advanced/middleware.md          | 25 +++++++++++++++----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/docs/advanced/built-in-middleware.md b/docs/advanced/built-in-middleware.md
index 3275059b2..ecc600759 100644
--- a/docs/advanced/built-in-middleware.md
+++ b/docs/advanced/built-in-middleware.md
@@ -32,7 +32,7 @@ import { chat } from "@tanstack/ai";
 import { toolCacheMiddleware } from "@tanstack/ai/middlewares";
 
 const stream = chat({
-  adapter: openaiText("gpt-4o"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   tools: [weatherTool, stockTool],
   middleware: [
@@ -159,7 +159,7 @@ import { chat } from "@tanstack/ai";
 import { contentGuardMiddleware } from "@tanstack/ai/middlewares";
 
 const stream = chat({
-  adapter: openaiText("gpt-4o"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   middleware: [
     contentGuardMiddleware({
@@ -212,7 +212,7 @@ const otel = otelMiddleware({
 });
 
 const result = await chat({
-  adapter: openaiText("gpt-4o"),
+  adapter: openaiText("gpt-5.5"),
   messages,
   middleware: [otel],
 });
diff --git a/docs/advanced/middleware.md b/docs/advanced/middleware.md
index 9977d12a3..6dc399d0f 100644
--- a/docs/advanced/middleware.md
+++ b/docs/advanced/middleware.md
@@ -269,24 +269,29 @@ How you distinguish them depends on which finalization path the adapter takes:
 - **Native-combined adapters** (modern OpenAI Chat Completions / Responses, Claude 4.5+, Gemini 3.x, Grok 4.x — see issue #605): the schema-constrained JSON is produced on the model's natural final turn, so **`ctx.phase` stays `'modelStream'`** — the `'structuredOutput'` phase never fires. Discriminate on the CUSTOM event name (`structured-output.start` / `structured-output.complete`) instead.
 
 ```typescript
-const structuredOutputObserver: ChatMiddleware = {
-  name: "structured-output-observer",
+const redactStructuredOutput: ChatMiddleware = {
+  name: "redact-structured-output",
   onChunk: (ctx, chunk) => {
-    // Separate-finalization path: the raw JSON streams as TEXT_MESSAGE_CONTENT
-    // during the 'structuredOutput' phase. Transform it like any text delta.
+    // Separate-finalization path: the JSON streams as TEXT_MESSAGE_CONTENT
+    // during the 'structuredOutput' phase. Transform the delta like any
+    // other text chunk — here, redact anything that looks like an SSN before
+    // it reaches the client.
     if (
       ctx.phase === "structuredOutput" &&
       chunk.type === "TEXT_MESSAGE_CONTENT"
     ) {
-      return { ...chunk, delta: redact(chunk.delta) };
+      return {
+        ...chunk,
+        delta: chunk.delta.replace(/\b\d{3}-\d{2}-\d{4}\b/g, "[REDACTED]"),
+      };
     }
 
-    // Both paths: the final parsed object arrives as a CUSTOM event. On the
-    // native-combined path this is your only signal (ctx.phase never flips
-    // to 'structuredOutput'), so key off the event name, not the phase.
+    // Both paths: the validated object arrives as a CUSTOM
+    // `structured-output.complete` event. On the native-combined path this is
+    // your only signal (ctx.phase never flips to 'structuredOutput'), so key
+    // off the event name, not the phase. `chunk.value` carries { object, raw }.
     if (chunk.type === "CUSTOM" && chunk.name === "structured-output.complete") {
-      // chunk.value carries { object, raw } — observe, log, or replace it
-      console.log("structured output:", chunk.value);
+      console.log("final structured output:", chunk.value);
     }
 
     // Return void to pass everything else through unchanged.