diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 44ade6f1..6616b66d 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,9 +9,9 @@
       "source": {
         "source": "npm",
         "package": "@copilotkit/llmock",
-        "version": "^1.3.1"
+        "version": "^1.5.0"
       },
-      "description": "Fixture authoring skill for @copilotkit/llmock — match fields, response types, agent loop patterns, gotchas, and debugging"
+      "description": "Fixture authoring skill for @copilotkit/llmock — match fields, response types, embeddings, structured output, sequential responses, streaming physics, agent loop patterns, gotchas, and debugging"
     }
   ]
 }
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index 7edff576..cd8e5ae4 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "llmock",
-  "version": "1.3.1",
+  "version": "1.5.0",
   "description": "Fixture authoring guidance for @copilotkit/llmock",
   "author": {
     "name": "CopilotKit"
diff --git a/.claude/commands/write-fixtures.md b/.claude/commands/write-fixtures.md
deleted file mode 100644
index 1d1ee8c8..00000000
--- a/.claude/commands/write-fixtures.md
+++ /dev/null
@@ -1,238 +0,0 @@
----
-name: write-fixtures
-description: Use when writing test fixtures for @copilotkit/llmock — mock LLM responses, tool call sequences, error injection, multi-turn agent loops, or debugging fixture mismatches
----
-
-# Writing llmock Test Fixtures
-
-## What llmock Is
-
-Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs.
-
-## Core Mental Model
-
-- **Fixtures** = match criteria + response
-- **First-match-wins** — order matters
-- All providers share one fixture pool (provider adapters normalize to `ChatCompletionRequest`)
-- Fixtures are stateless — no built-in multi-turn sequencing
-- Fixtures are live — mutations after `start()` take effect immediately
-
-## Match Field Reference
-
-| Field         | Type                                      | Matches Against                                                           |
-| ------------- | ----------------------------------------- | ------------------------------------------------------------------------- |
-| `userMessage` | `string`                                  | Substring of last `role: "user"` message text                             |
-| `userMessage` | `RegExp`                                  | Pattern test on last `role: "user"` message text                          |
-| `toolName`    | `string`                                  | Exact match on any tool in request's `tools[]` array (by `function.name`) |
-| `toolCallId`  | `string`                                  | Exact match on `tool_call_id` of last `role: "tool"` message              |
-| `model`       | `string`                                  | Exact match on `req.model`                                                |
-| `model`       | `RegExp`                                  | Pattern test on `req.model`                                               |
-| `predicate`   | `(req: ChatCompletionRequest) => boolean` | Custom function — full access to request                                  |
-
-**AND logic**: all specified fields must match. Empty match `{}` = catch-all.
-
-Multi-part content (e.g., `[{type: "text", text: "hello"}]`) is automatically extracted — `userMessage` matching works regardless of content format.
-
-## Response Types
-
-### Text
-
-```typescript
-{
-  content: "Hello!";
-}
-```
-
-### Tool Calls
-
-```typescript
-{
-  toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }];
-}
-```
-
-**`arguments` MUST be a JSON string**, not an object. This is the #1 mistake.
-
-### Error
-
-```typescript
-{ error: { message: "Rate limited", type: "rate_limit_error" }, status: 429 }
-```
-
-## Common Patterns
-
-### Basic text fixture
-
-```typescript
-mock.onMessage("hello", { content: "Hi there!" });
-```
-
-### Tool call → tool result → final response (3-step agent loop)
-
-The most common pattern. Fixture 1 triggers the tool call, fixture 2 handles the tool result.
-
-```typescript
-// Step 1: User asks about weather → LLM calls tool
-mock.onMessage("weather", {
-  toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
-});
-
-// Step 2: Tool result comes back → LLM responds with text
-mock.addFixture({
-  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
-  response: { content: "It's 72°F in San Francisco." },
-});
-```
-
-**Why predicate, not userMessage?** After a tool call, the client replays the same conversation with the tool result appended. The user message hasn't changed — `userMessage: "weather"` would match the SAME fixture again, creating an infinite loop.
-
-### Predicate-based routing (same user message, different context)
-
-Common in supervisor/orchestrator patterns where the system prompt changes:
-
-```typescript
-mock.addFixture({
-  match: {
-    predicate: (req) => {
-      const sys = req.messages.find((m) => m.role === "system")?.content ?? "";
-      return typeof sys === "string" && sys.includes("Flights found: false");
-    },
-  },
-  response: { toolCalls: [{ name: "search_flights", arguments: "{}" }] },
-});
-```
-
-### Catch-all (always add one)
-
-Prevents unmatched requests from returning 404 and crashing the test:
-
-```typescript
-mock.addFixture({
-  match: { predicate: () => true },
-  response: { content: "I understand. How can I help?" },
-});
-```
-
-### Tool result catch-all with prependFixture
-
-Must go at the front so it matches before substring-based fixtures:
-
-```typescript
-mock.prependFixture({
-  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
-  response: { content: "Done!" },
-});
-```
-
-### Stream interruption simulation (v1.3.0+)
-
-```typescript
-mock.onMessage(
-  "long response",
-  { content: "This will be cut short..." },
-  {
-    truncateAfterChunks: 3, // Stop after 3 SSE chunks
-    disconnectAfterMs: 500, // Or disconnect after 500ms
-  },
-);
-```
-
-### Error injection (one-shot)
-
-```typescript
-mock.nextRequestError(429, { message: "Rate limited", type: "rate_limit_error" });
-// Next request gets 429, then fixture auto-removes itself
-```
-
-### JSON fixture files
-
-```json
-{
-  "fixtures": [
-    {
-      "match": { "userMessage": "hello" },
-      "response": { "content": "Hi!" }
-    }
-  ]
-}
-```
-
-JSON files cannot use `RegExp` or `predicate` — those are code-only features.
-
-Load with `mock.loadFixtureFile("./fixtures/greetings.json")` or `mock.loadFixtureDir("./fixtures/")`.
-
-## Critical Gotchas
-
-1. **Order matters** — first match wins. Specific fixtures before general ones. Use `prependFixture()` to force priority.
-
-2. **`arguments` must be a JSON string** — `"arguments": "{\"key\":\"value\"}"` not `"arguments": {"key":"value"}`. The type system enforces this but JSON fixtures can get it wrong silently.
-
-3. **Latency is per-chunk, not total** — `latency: 100` means 100ms between each SSE chunk, not 100ms total response time. Similarly, `truncateAfterChunks` and `disconnectAfterMs` are for simulating stream interruptions (added in v1.3.0).
-
-4. **Tool result messages don't change the user message** — after a tool call, the client sends the same conversation + tool result. Matching on `userMessage` will hit the SAME fixture again → infinite loop. Always use `predicate` checking `role === "tool"` for tool results.
-
-5. **`clearFixtures()` preserves the array reference** — uses `.length = 0`, not reassignment. The running server reads the same array object.
-
-6. **Journal records everything** — including 404 "no match" responses. Use `mock.getLastRequest()` to debug mismatches.
-
-7. **All providers share fixtures** — a fixture matching "hello" works whether the request comes via `/v1/chat/completions` (OpenAI), `/v1/messages` (Anthropic), or Gemini endpoints.
-
-8. **WebSocket uses the same fixture pool** — no special setup needed for WebSocket-based APIs (OpenAI Responses WS, Realtime, Gemini Live).
-
-## Debugging Fixture Mismatches
-
-When a fixture doesn't match:
-
-1. **Inspect what the server received**: `mock.getLastRequest()` → check `body.messages` array
-2. **Check fixture order**: `mock.getFixtures()` returns fixtures in registration order
-3. **For `userMessage`**: match is against the LAST `role: "user"` message only, substring match (not exact)
-4. **Check the journal**: `mock.getRequests()` shows all requests including which fixture matched (or `null` for 404)
-
-## E2E Test Setup Pattern
-
-```typescript
-import { LLMock } from "@copilotkit/llmock";
-
-// Setup — port: 0 picks a random available port
-const mock = new LLMock({ port: 0 });
-mock.loadFixtureDir("./fixtures");
-await mock.start();
-process.env.OPENAI_BASE_URL = `${mock.url}/v1`;
-
-// Per-test cleanup
-afterEach(() => mock.reset()); // clears fixtures AND journal
-
-// Teardown
-afterAll(async () => await mock.stop());
-```
-
-### Static factory shorthand
-
-```typescript
-const mock = await LLMock.create({ port: 0 }); // creates + starts in one call
-```
-
-## API Quick Reference
-
-| Method                                | Purpose                            |
-| ------------------------------------- | ---------------------------------- |
-| `addFixture(f)`                       | Append fixture (last priority)     |
-| `addFixtures(f[])`                    | Append multiple                    |
-| `prependFixture(f)`                   | Insert at front (highest priority) |
-| `clearFixtures()`                     | Remove all fixtures                |
-| `getFixtures()`                       | Read current fixture list          |
-| `on(match, response, opts?)`          | Shorthand for `addFixture`         |
-| `onMessage(pattern, response, opts?)` | Match by user message              |
-| `onToolCall(name, response, opts?)`   | Match by tool name in `tools[]`    |
-| `onToolResult(id, response, opts?)`   | Match by `tool_call_id`            |
-| `nextRequestError(status, body?)`     | One-shot error, auto-removes       |
-| `loadFixtureFile(path)`               | Load JSON fixture file             |
-| `loadFixtureDir(path)`                | Load all JSON files in directory   |
-| `start()`                             | Start server, returns URL          |
-| `stop()`                              | Stop server                        |
-| `reset()`                             | Clear fixtures + journal           |
-| `getRequests()`                       | All journal entries                |
-| `getLastRequest()`                    | Most recent journal entry          |
-| `clearRequests()`                     | Clear journal only                 |
-| `url` / `baseUrl`                     | Server URL (throws if not started) |
-| `port`                                | Server port number                 |
diff --git a/.claude/commands/write-fixtures.md b/.claude/commands/write-fixtures.md
new file mode 120000
index 00000000..3d887c6a
--- /dev/null
+++ b/.claude/commands/write-fixtures.md
@@ -0,0 +1 @@
+../../skills/write-fixtures/SKILL.md
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c3b5a8db..69708a1c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,38 @@
 # @copilotkit/llmock
 
+## 1.5.0
+
+### Minor Changes
+
+- Embeddings API: `POST /v1/embeddings` endpoint, `onEmbedding()` convenience method, `inputText` match field, `EmbeddingResponse` type, deterministic fallback embeddings from input hash, Azure embedding routing
+- Structured output / JSON mode: `responseFormat` match field, `onJsonOutput()` convenience method
+- Sequential responses: `sequenceIndex` match field for stateful multi-turn fixtures, per-fixture-group match counting, `resetMatchCounts()` method
+- Streaming physics: `StreamingProfile` type with `ttft`, `tps`, `jitter` fields for realistic timing simulation
+- AWS Bedrock: `POST /model/{modelId}/invoke` endpoint, Anthropic Messages format translation
+- Azure OpenAI: provider routing for `/openai/deployments/{id}/chat/completions` and `/openai/deployments/{id}/embeddings`
+- Health & models endpoints: `GET /health`, `GET /ready`, `GET /v1/models` (auto-populated from fixtures)
+- Docker & Helm: Dockerfile, Helm chart for Kubernetes deployment
+- Documentation website: full docs site at llmock.com with feature pages and competitive comparison matrix
+- Automated drift remediation: `scripts/drift-report-collector.ts` and `scripts/fix-drift.ts` for CI-driven drift fixes
+- CI automation: competitive matrix update workflow, drift fix workflow
+- `FixtureOpts` and `EmbeddingFixtureOpts` type aliases exported for external consumers
+
+### Patch Changes
+
+- Fix Gemini Live handler crash on malformed `clientContent.turns` and `toolResponse.functionResponses`
+- Add `isClosed` guard before WebSocket finalization events (prevents writes to closed connections)
+- Default to non-streaming for Claude Messages API and Responses API (matching real API defaults)
+- Fix `streamingProfile` missing from convenience method opts types (`on`, `onMessage`, etc.)
+- Fix skills/ symlink direction so npm pack includes the write-fixtures skill
+- Fix `.claude` removed from package.json files (was dead weight — symlink doesn't ship)
+- Add `.worktrees/` to eslint ignores
+- Remove dead `@keyframes sseLine` CSS from docs site
+- Fix watcher cleanup on error (clear debounce timer, null guard)
+- Fix empty-reload guard (keep previous fixtures when reload produces 0)
+- README rewritten as concise overview with links to docs site
+- Write-fixtures skill updated for all v1.5.0 features
+- Docs site: Get Started links to docs, comparison above reliability, npm version badge
+
 ## 1.4.0
 
 ### Minor Changes
diff --git a/README.md b/README.md
index ebad0ca5..562cde38 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,28 @@
-# @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml)
+# @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock)
 
-Deterministic multi-provider mock LLM server for testing. Streams SSE responses in real OpenAI, Claude, and Gemini API formats, driven entirely by fixtures. Zero runtime dependencies — built on Node.js builtins only.
+Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, and Azure API formats, driven entirely by fixtures. Zero runtime dependencies.
 
-Supports both streaming (SSE) and non-streaming JSON responses across OpenAI (Chat Completions + Responses), Anthropic Claude (Messages), and Google Gemini (GenerateContent) APIs. Text completions, tool calls, and error injection. Point any process at it via `OPENAI_BASE_URL`, `ANTHROPIC_BASE_URL`, or Gemini base URL and get reproducible, instant responses.
-
-## Install
+## Quick Start
 
 ```bash
 npm install @copilotkit/llmock
 ```
 
+```typescript
+import { LLMock } from "@copilotkit/llmock";
+
+const mock = new LLMock({ port: 5555 });
+
+mock.onMessage("hello", { content: "Hi there!" });
+
+const url = await mock.start();
+// Point your OpenAI client at `url` instead of https://api.openai.com
+
+// ... run your tests ...
+
+await mock.stop();
+```
+
 ## When to Use This vs MSW
 
 [MSW (Mock Service Worker)](https://mswjs.io/) is a popular API mocking library, but it solves a different problem.
@@ -57,534 +70,23 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r
 | CLI for standalone use       | **Yes**               | **No**                                                                    |
 | Zero dependencies            | **Yes**               | **No** (~300KB)                                                           |
 
-## Quick Start
-
-```typescript
-import { LLMock } from "@copilotkit/llmock";
-
-const mock = new LLMock({ port: 5555 });
-
-mock.onMessage("hello", { content: "Hi there!" });
-
-const url = await mock.start();
-// Point your OpenAI client at `url` instead of https://api.openai.com
-
-// ... run your tests ...
-
-await mock.stop();
-```
-
-## E2E Test Patterns
-
-Real-world patterns from using llmock in Playwright E2E tests with CopilotKit, Mastra, LangGraph, and Agno agent frameworks.
-
-### Global Setup/Teardown
-
-Start the mock server once for the entire test suite. All child processes (Next.js, agent workers) inherit the URL via environment variable.
-
-```typescript
-// e2e/llmock-setup.ts
-import { LLMock } from "@copilotkit/llmock";
-import * as path from "node:path";
-
-let mockServer: LLMock | null = null;
-
-export async function setupLLMock(): Promise<void> {
-  mockServer = new LLMock({ port: 5555 });
-
-  // Load JSON fixtures from a directory
-  mockServer.loadFixtureDir(path.join(__dirname, "fixtures", "openai"));
-
-  const url = await mockServer.start();
-
-  // Child processes use this to find the mock
-  process.env.LLMOCK_URL = `${url}/v1`;
-}
-
-export async function teardownLLMock(): Promise<void> {
-  if (mockServer) {
-    await mockServer.stop();
-    mockServer = null;
-  }
-}
-```
-
-The Next.js app (or any other service) just needs:
-
-```env
-OPENAI_BASE_URL=http://localhost:5555/v1
-OPENAI_API_KEY=mock-key
-
-# Or for Anthropic Claude:
-ANTHROPIC_BASE_URL=http://localhost:5555/v1
-ANTHROPIC_API_KEY=mock-key
-
-# Or for Google Gemini (set baseUrl in code — see below):
-GOOGLE_API_KEY=mock-key
-```
-
-For Google Gemini, the SDK doesn't support a base URL env var — pass it in code:
-
-```typescript
-// @google/genai (v1.x)
-import { GoogleGenAI } from "@google/genai";
-const ai = new GoogleGenAI({
-  apiKey: process.env.GOOGLE_API_KEY,
-  httpOptions: { baseUrl: "http://localhost:5555" },
-});
-
-// @google/generative-ai (v0.x)
-import { GoogleGenerativeAI } from "@google/generative-ai";
-const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY!);
-const model = genAI.getGenerativeModel(
-  { model: "gemini-2.0-flash" },
-  { baseUrl: "http://localhost:5555" },
-);
-```
-
-### JSON Fixture Files
-
-Define fixtures as JSON — one file per feature, loaded with `loadFixtureFile` or `loadFixtureDir`.
-
-**Text responses** — match on a substring of the last user message:
-
-```json
-{
-  "fixtures": [
-    {
-      "match": { "userMessage": "stock price of AAPL" },
-      "response": { "content": "The current stock price of Apple Inc. (AAPL) is $150.25." }
-    },
-    {
-      "match": { "userMessage": "capital of France" },
-      "response": { "content": "The capital of France is Paris." }
-    }
-  ]
-}
-```
-
-**Tool call responses** — the agent framework receives these as tool calls and executes them:
-
-```json
-{
-  "fixtures": [
-    {
-      "match": { "userMessage": "one step with eggs" },
-      "response": {
-        "toolCalls": [
-          {
-            "name": "generate_task_steps",
-            "arguments": "{\"steps\":[{\"description\":\"Crack eggs into bowl\",\"status\":\"enabled\"},{\"description\":\"Preheat oven to 350F\",\"status\":\"enabled\"}]}"
-          }
-        ]
-      }
-    },
-    {
-      "match": { "userMessage": "background color to blue" },
-      "response": {
-        "toolCalls": [
-          {
-            "name": "change_background",
-            "arguments": "{\"background\":\"blue\"}"
-          }
-        ]
-      }
-    }
-  ]
-}
-```
-
-### Fixture Load Order Matters
-
-Fixtures are evaluated first-match-wins. When two fixtures could match the same message, load the more specific one first:
-
-```typescript
-// Load HITL fixtures first — "one step with eggs" is more specific than
-// "plan to make brownies" which also appears in the HITL user message
-mockServer.loadFixtureFile(path.join(FIXTURES_DIR, "human-in-the-loop.json"));
-
-// Then load everything else — earlier matches take priority
-mockServer.loadFixtureDir(FIXTURES_DIR);
-```
-
-### Predicate-Based Routing
-
-When substring matching isn't enough — for example, when the last user message is the same across multiple requests but the system prompt differs — use predicates:
-
-```typescript
-// Supervisor agent: same user message every time, but system prompt
-// contains state flags like "Flights found: false"
-mockServer.addFixture({
-  match: {
-    predicate: (req) => {
-      const sysMsg = req.messages.find((m) => m.role === "system");
-      return sysMsg?.content?.includes("Flights found: false") ?? false;
-    },
-  },
-  response: {
-    toolCalls: [
-      {
-        name: "supervisor_response",
-        arguments: '{"answer":"Let me find flights for you!","next_agent":"flights_agent"}',
-      },
-    ],
-  },
-});
-
-mockServer.addFixture({
-  match: {
-    predicate: (req) => {
-      const sys = req.messages.find((m) => m.role === "system")?.content ?? "";
-      return sys.includes("Flights found: true") && sys.includes("Hotels found: false");
-    },
-  },
-  response: {
-    toolCalls: [
-      {
-        name: "supervisor_response",
-        arguments: '{"answer":"Now let me find hotels.","next_agent":"hotels_agent"}',
-      },
-    ],
-  },
-});
-```
-
-### Tool Result Catch-All
-
-After a tool executes, the next request contains a `role: "tool"` message with the result. Add a catch-all for these so the conversation can continue:
-
-```typescript
-const toolResultFixture = {
-  match: {
-    predicate: (req) => {
-      const last = req.messages[req.messages.length - 1];
-      return last?.role === "tool";
-    },
-  },
-  response: { content: "Done! I've completed that for you." },
-};
-mockServer.addFixture(toolResultFixture);
-
-// Move it to the front so it matches before substring-based fixtures
-// (the last user message hasn't changed, so substring fixtures would
-// match the same fixture again otherwise)
-const fixtures = (mockServer as any).fixtures;
-const idx = fixtures.indexOf(toolResultFixture);
-if (idx > 0) {
-  fixtures.splice(idx, 1);
-  fixtures.unshift(toolResultFixture);
-}
-```
-
-### Universal Catch-All
-
-Append a catch-all last to handle any request that doesn't match a specific fixture, preventing 404s from crashing the test:
-
-```typescript
-mockServer.addFixture({
-  match: { predicate: () => true },
-  response: { content: "I understand. How can I help you with that?" },
-});
-```
-
-## Programmatic API
-
-### `new LLMock(options?)`
-
-Create a new mock server instance.
-
-| Option      | Type     | Default       | Description                         |
-| ----------- | -------- | ------------- | ----------------------------------- |
-| `port`      | `number` | `0` (random)  | Port to listen on                   |
-| `host`      | `string` | `"127.0.0.1"` | Host to bind to                     |
-| `latency`   | `number` | `0`           | Default ms delay between SSE chunks |
-| `chunkSize` | `number` | `20`          | Default characters per SSE chunk    |
-
-### `LLMock.create(options?)`
-
-Static factory — creates an instance and starts it in one call. Returns `Promise<LLMock>`.
-
-### Server Lifecycle
-
-| Method    | Returns           | Description                            |
-| --------- | ----------------- | -------------------------------------- |
-| `start()` | `Promise<string>` | Start the server, returns the base URL |
-| `stop()`  | `Promise<void>`   | Stop the server                        |
-| `url`     | `string`          | Base URL (throws if not started)       |
-| `baseUrl` | `string`          | Alias for `url`                        |
-| `port`    | `number`          | Listening port (throws if not started) |
-
-### Fixture Registration
-
-All registration methods return `this` for chaining.
-
-#### `on(match, response, opts?)`
-
-Register a fixture with full control over match criteria.
-
-```typescript
-mock.on({ userMessage: /weather/i, model: "gpt-4" }, { content: "It's sunny!" }, { latency: 50 });
-```
-
-#### `onMessage(pattern, response, opts?)`
-
-Shorthand — matches on the last user message.
-
-```typescript
-mock.onMessage("hello", { content: "Hi!" });
-mock.onMessage(/greet/i, { content: "Hey there!" });
-```
-
-#### `onToolCall(name, response, opts?)`
-
-Shorthand — matches when the request contains a tool with the given name.
-
-```typescript
-mock.onToolCall("get_weather", {
-  toolCalls: [{ name: "get_weather", arguments: '{"location":"SF"}' }],
-});
-```
-
-#### `onToolResult(id, response, opts?)`
-
-Shorthand — matches when a tool result message has the given `tool_call_id`.
-
-```typescript
-mock.onToolResult("call_abc123", { content: "Temperature is 72F" });
-```
-
-#### `addFixture(fixture)` / `addFixtures(fixtures)`
-
-Add raw `Fixture` objects directly (appended to the end of the list).
-
-#### `prependFixture(fixture)`
-
-Insert a fixture at the **front** of the list so it matches before all existing fixtures.
-Useful for catch-all predicates that must fire before substring-based fixtures.
-
-```typescript
-mock.prependFixture({
-  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
-  response: { content: "Done!" },
-});
-```
-
-#### `getFixtures()`
-
-Returns a `readonly Fixture[]` view of all registered fixtures. Useful for
-debugging and logging fixture statistics without accessing private internals.
-
-```typescript
-const fixtures = mock.getFixtures();
-console.log(`${fixtures.length} fixtures loaded`);
-```
-
-#### `loadFixtureFile(path)` / `loadFixtureDir(path)`
-
-Load fixtures from JSON files on disk. See [Fixture Files](#json-fixture-files) above.
-
-#### `clearFixtures()`
-
-Remove all registered fixtures.
-
-### Error Injection
-
-#### `nextRequestError(status, errorBody?)`
-
-Queue a one-shot error for the very next request. The error fires once, then auto-removes itself.
-
-```typescript
-mock.nextRequestError(429, {
-  message: "Rate limited",
-  type: "rate_limit_error",
-});
-
-// Next request → 429 error
-// Subsequent requests → normal fixture matching
-```
-
-### Request Journal
-
-Every request to all API endpoints (`/v1/chat/completions`, `/v1/responses`, `/v1/messages`, and Gemini endpoints) is recorded in a journal.
-
-#### Programmatic Access
-
-| Method             | Returns                | Description                           |
-| ------------------ | ---------------------- | ------------------------------------- |
-| `getRequests()`    | `JournalEntry[]`       | All recorded requests                 |
-| `getLastRequest()` | `JournalEntry \| null` | Most recent request                   |
-| `clearRequests()`  | `void`                 | Clear the journal                     |
-| `journal`          | `Journal`              | Direct access to the journal instance |
-
-```typescript
-await fetch(mock.url + "/v1/chat/completions", { ... });
-
-const last = mock.getLastRequest();
-expect(last?.body.messages).toContainEqual({
-  role: "user",
-  content: "hello",
-});
-```
-
-#### HTTP Endpoints
-
-The server also exposes journal data over HTTP (useful in CLI mode):
-
-- `GET /v1/_requests` — returns all journal entries as JSON. Supports `?limit=N`.
-- `DELETE /v1/_requests` — clears the journal. Returns 204.
-
-### Reset
-
-#### `reset()`
-
-Clear all fixtures **and** the journal in one call. Works before or after the server is started.
-
-```typescript
-afterEach(() => {
-  mock.reset();
-});
-```
-
-## Fixture Matching
-
-Fixtures are evaluated in registration order (first match wins). A fixture matches when **all** specified fields match the incoming request (AND logic).
-
-| Field         | Type               | Matches on                                    |
-| ------------- | ------------------ | --------------------------------------------- |
-| `userMessage` | `string \| RegExp` | Content of the last `role: "user"` message    |
-| `toolName`    | `string`           | Name of a tool in the request's `tools` array |
-| `toolCallId`  | `string`           | `tool_call_id` on a `role: "tool"` message    |
-| `model`       | `string \| RegExp` | The `model` field in the request              |
-| `predicate`   | `(req) => boolean` | Arbitrary matching function                   |
-
-## Fixture Responses
-
-### Text
-
-```typescript
-{
-  content: "Hello world";
-}
-```
-
-Streams as SSE chunks, splitting `content` by `chunkSize`. With `stream: false`, returns a standard `chat.completion` JSON object.
-
-### Tool Calls
-
-```typescript
-{
-  toolCalls: [{ name: "get_weather", arguments: '{"location":"SF"}' }];
-}
-```
-
-### Errors
-
-```typescript
-{
-  error: { message: "Rate limited", type: "rate_limit_error" },
-  status: 429
-}
-```
-
-## API Endpoints
-
-The server handles:
-
-- **POST `/v1/chat/completions`** — OpenAI Chat Completions API (streaming and non-streaming)
-- **POST `/v1/responses`** — OpenAI Responses API (streaming and non-streaming)
-- **POST `/v1/messages`** — Anthropic Claude Messages API (streaming and non-streaming)
-- **POST `/v1beta/models/{model}:generateContent`** — Google Gemini (non-streaming)
-- **POST `/v1beta/models/{model}:streamGenerateContent`** — Google Gemini (streaming)
-
-WebSocket endpoints:
-
-- **WS `/v1/responses`** — OpenAI Responses API over WebSocket
-- **WS `/v1/realtime`** — OpenAI Realtime API (text + tool calls)
-- **WS `/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent`** — Gemini Live ([unverified](#gemini-live-bidigeneratecontent))
-
-All endpoints share the same fixture pool — the same fixtures work across all providers. Requests are translated to a common format internally for fixture matching.
+## Features
 
-## WebSocket APIs
+- **[Multi-provider support](https://llmock.com/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.com/chat-completions.html), [OpenAI Responses](https://llmock.com/responses-api.html), [Anthropic Claude](https://llmock.com/claude-messages.html), [Google Gemini](https://llmock.com/gemini.html), [AWS Bedrock](https://llmock.com/aws-bedrock.html), [Azure OpenAI](https://llmock.com/azure-openai.html)
+- **[Embeddings API](https://llmock.com/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions
+- **[Structured output / JSON mode](https://llmock.com/structured-output.html)** — `response_format`, `json_schema`, and function calling
+- **[Sequential responses](https://llmock.com/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call
+- **[Streaming physics](https://llmock.com/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing
+- **[WebSocket APIs](https://llmock.com/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live
+- **[Error injection](https://llmock.com/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats
+- **[Request journal](https://llmock.com/docs.html)** — Record, inspect, and assert on every request
+- **[Fixture validation](https://llmock.com/fixtures.html)** — Schema validation at load time with `--validate-on-load`
+- **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing
+- **[Docker + Helm](https://llmock.com/docker.html)** — Container image and Helm chart for CI/CD pipelines
+- **[Drift detection](https://llmock.com/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes
+- **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly
 
-The same fixtures that drive HTTP responses also work over WebSocket transport. llmock implements RFC 6455 WebSocket framing with zero external dependencies — connect, send events, and receive streaming responses in real provider formats.
-
-Only text and tool call paths are supported over WebSocket. Audio, video, and binary frames are not implemented.
-
-### OpenAI Responses API (WebSocket)
-
-Connect to `ws://localhost:5555/v1/responses` and send a `response.create` event. The server streams back the same events as OpenAI's real WebSocket Responses API:
-
-```jsonc
-// → Client sends:
-{
-  "type": "response.create",
-  "model": "gpt-4o",
-  "instructions": "You are a helpful assistant.",
-  "input": [
-    { "type": "message", "role": "user", "content": [{ "type": "input_text", "text": "Hello" }] },
-  ],
-}
-
-// ← Server streams:
-// {"type": "response.created", ...}
-// {"type": "response.output_item.added", ...}
-// {"type": "response.content_part.added", ...}
-// {"type": "response.output_item.done", ...}
-// {"type": "response.done", ...}
-```
-
-### OpenAI Realtime API
-
-Connect to `ws://localhost:5555/v1/realtime`. The Realtime API uses a session-based protocol — configure the session, add conversation items, then request a response:
-
-```jsonc
-// → Configure session:
-{ "type": "session.update", "session": { "modalities": ["text"], "model": "gpt-4o-realtime" } }
-
-// → Add a user message:
-{
-  "type": "conversation.item.create",
-  "item": {
-    "type": "message",
-    "role": "user",
-    "content": [{ "type": "input_text", "text": "What is the capital of France?" }]
-  }
-}
-
-// → Request a response:
-{ "type": "response.create" }
-
-// ← Server streams:
-// {"type": "response.created", ...}
-// {"type": "response.text.delta", "delta": "The"}
-// {"type": "response.text.delta", "delta": " capital"}
-// ...
-// {"type": "response.text.done", ...}
-// {"type": "response.done", ...}
-```
-
-### Gemini Live (BidiGenerateContent)
-
-Connect to `ws://localhost:5555/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent`. Gemini Live uses a setup/content/response flow.
-
-> **⚠️ Unverified**: As of March 2026, Google's only `bidiGenerateContent`-capable models are audio-only — no text-capable model exists for this endpoint. llmock implements the text-based protocol as documented in Google's [Live API reference](https://ai.google.dev/api/live), but the response shapes have not been verified against real API output. Code you write against this mock may need adjustment when Google ships a text-capable Live model. See [DRIFT.md](DRIFT.md#gemini-live-unverified) for details and the automated canary that tracks model availability.
-
-```jsonc
-// → Setup message (must be first):
-{ "setup": { "model": "models/gemini-2.5-flash", "generationConfig": { "responseModalities": ["TEXT"] } } }
-
-// → Send user content:
-{ "clientContent": { "turns": [{ "role": "user", "parts": [{ "text": "Hello" }] }], "turnComplete": true } }
-
-// ← Server streams:
-// {"setupComplete": {}}
-// {"serverContent": {"modelTurn": {"parts": [{"text": "Hello"}]}, "turnComplete": false}}
-// {"serverContent": {"modelTurn": {"parts": [{"text": "!"}]}, "turnComplete": true}}
-```
-
-## CLI
-
-The package includes a standalone server binary:
+## CLI Quick Reference
 
 ```bash
 llmock [options]
@@ -613,98 +115,15 @@ llmock -p 8080 -f ./my-fixtures
 llmock --latency 100 --chunk-size 5
 ```
 
-## Advanced Usage
-
-### Low-level Server
-
-If you need the raw HTTP server without the `LLMock` wrapper:
-
-```typescript
-import { createServer } from "@copilotkit/llmock";
-
-const fixtures = [{ match: { userMessage: "hi" }, response: { content: "Hello!" } }];
-
-const { server, journal, url } = await createServer(fixtures, { port: 0 });
-// ... use it ...
-server.close();
-```
-
-### Per-Fixture Timing
-
-```typescript
-mock.on({ userMessage: "slow" }, { content: "Finally..." }, { latency: 200, chunkSize: 5 });
-```
-
-## Claude Code Integration
+## Documentation
 
-llmock ships with a [Claude Code](https://docs.anthropic.com/en/docs/claude-code) skill that teaches your AI assistant how to write fixtures correctly — match fields, response types, agent loop patterns, gotchas, and debugging techniques. Available as the `/write-fixtures` slash command.
+Full API reference, fixture format, E2E patterns, and provider-specific guides:
 
-### Option 1: Plugin install (recommended)
-
-```bash
-# Add the marketplace (one time)
-/plugin marketplace add CopilotKit/llmock
-
-# Install the plugin
-/plugin install llmock@copilotkit-tools
-```
-
-The skill appears as `/llmock:write-fixtures`.
-
-### Option 2: Local plugin from node_modules
-
-```bash
-claude --plugin-dir ./node_modules/@copilotkit/llmock
-```
-
-Same result, no marketplace needed. Good for trying it out.
-
-### Option 3: Add directory
-
-```bash
-claude --add-dir ./node_modules/@copilotkit/llmock
-```
-
-The skill appears as `/write-fixtures` for the session.
-
-### Option 4: Copy to your project
-
-```bash
-mkdir -p .claude/commands
-cp node_modules/@copilotkit/llmock/.claude/commands/write-fixtures.md .claude/commands/
-```
-
-Permanently available as `/write-fixtures` in your project. Commit to share with your team.
-
-## Future Direction
-
-Areas where llmock could grow, and explicit non-goals for the current scope.
-
-### WebSocket APIs
-
-- **Audio and multimodal**: OpenAI Realtime API audio buffers, voice activity detection, and audio transcription are not implemented. Gemini Live audio/video input and output are similarly out of scope. Only text and tool call paths are supported over WebSocket.
-- **Binary WebSocket frames**: Only text frames are processed; binary frames are silently ignored.
-- **WebSocket compression**: `permessage-deflate` is not supported.
-- **Session persistence**: Realtime and Gemini Live sessions exist only for the lifetime of a single WebSocket connection. There is no cross-connection session resumption.
-
-### Fixtures
-
-- **Request metadata in predicates**: Predicate functions receive only the `ChatCompletionRequest`, not HTTP headers, method, or URL.
-- **Multi-turn conversation state**: Fixtures are stateless — there is no built-in way to sequence responses across multiple requests in a conversation.
-- **Validation on load**: Schema validation is available via `--validate-on-load` (CLI) and `validateFixtures()` (programmatic API), but it is opt-in and not enabled by default.
-- **Inheritance and aliasing**: No `$ref` or `extends` mechanism for fixture reuse across files.
-
-### Testing
-
-- **Live API drift detection**: The `drift` test suite runs against real OpenAI, Anthropic, and Gemini APIs to catch response format drift. See [DRIFT.md](DRIFT.md) for details on the three-layer triangulation approach, how to run tests, and how to fix detected drift. Runs daily in CI; requires API keys.
-- **Token counts**: Usage fields are always zero across all providers.
-- **Vision/image content**: Image content parts are not handled by any provider.
+**[llmock.com/docs.html](https://llmock.com/docs.html)**
 
 ## Real-World Usage
 
-[CopilotKit](https://github.com/CopilotKit/CopilotKit) uses llmock across its test suite to verify AI agent behavior across multiple LLM providers without hitting real APIs. The tests cover streaming text, tool calls, and multi-turn conversations across both v1 and v2 runtimes.
-
-See the [CopilotKit test suite](https://github.com/CopilotKit/CopilotKit/search?q=llmock&type=code) for real-world examples of llmock in action.
+[CopilotKit](https://github.com/CopilotKit/CopilotKit) uses llmock across its test suite to verify AI agent behavior across multiple LLM providers without hitting real APIs.
 
 ## License
 
diff --git a/docs/index.html b/docs/index.html
index c8c3dbd0..ca177036 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -945,16 +945,6 @@
           transform: translateY(0);
         }
       }
-      @keyframes sseLine {
-        from {
-          opacity: 0;
-          transform: translateX(-8px);
-        }
-        to {
-          opacity: 1;
-          transform: translateX(0);
-        }
-      }
       @keyframes blink {
         50% {
           opacity: 0;
@@ -1046,8 +1036,8 @@
         <ul class="nav-links">
           <li><a href="#features">Features</a></li>
           <li><a href="#examples">Examples</a></li>
-          <li><a href="#reliability">Reliability</a></li>
           <li><a href="#comparison">Comparison</a></li>
+          <li><a href="#reliability">Reliability</a></li>
           <li><a href="docs.html">Docs</a></li>
           <li>
             <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
@@ -1068,7 +1058,16 @@
       <div class="container">
         <div class="hero-badge">
           <span class="dot"></span>
-          Zero dependencies &middot; Node.js builtins only
+          Zero dependencies &middot; Node.js builtins only &middot;
+          <a
+            href="https://www.npmjs.com/package/@copilotkit/llmock"
+            target="_blank"
+            style="display: inline-flex; align-items: center; vertical-align: middle"
+            ><img
+              src="https://img.shields.io/npm/v/@copilotkit/llmock?style=flat-square&color=e8e8f0&labelColor=2a2a3a&label=Version"
+              alt="npm version"
+              style="height: 16px"
+          /></a>
         </div>
 
         <h1>Deterministic <span class="highlight">mock LLM</span> server for testing</h1>
@@ -1079,9 +1078,7 @@ <h1>Deterministic <span class="highlight">mock LLM</span> server for testing</h1
         </p>
 
         <div class="hero-actions">
-          <a href="https://github.com/CopilotKit/llmock#readme" class="btn btn-primary">
-            Get Started
-          </a>
+          <a href="docs.html" class="btn btn-primary"> Get Started </a>
           <a
             href="https://www.npmjs.com/package/@copilotkit/llmock"
             class="btn btn-secondary"
@@ -1447,185 +1444,6 @@ <h3>WebSocket APIs</h3>
       </div>
     </section>
 
-    <!-- ═══ Reliability / Drift Detection ═══════════════════════════ -->
-    <section id="reliability" class="reveal">
-      <div class="container">
-        <span class="section-label">Reliability</span>
-        <h2 class="section-title">Verified against real APIs. Every day.</h2>
-        <p class="section-desc">
-          A mock that doesn't match reality is worse than no mock &mdash; your tests pass, but
-          production breaks. llmock runs three-way drift detection that compares SDK types, real API
-          responses, and mock output to catch shape mismatches before you do.
-        </p>
-
-        <!-- Triangle diagram -->
-        <div class="triangle-wrapper">
-          <svg viewBox="0 0 600 420" fill="none" xmlns="http://www.w3.org/2000/svg">
-            <!-- SDK → Real (left edge) -->
-            <line
-              x1="245"
-              y1="105"
-              x2="130"
-              y2="280"
-              stroke="var(--border)"
-              stroke-width="1.5"
-              stroke-dasharray="6 4"
-            />
-            <polygon points="127,274 137,278 133,286" fill="var(--border)" />
-            <!-- SDK → Mock (right edge) -->
-            <line
-              x1="355"
-              y1="105"
-              x2="470"
-              y2="280"
-              stroke="var(--border)"
-              stroke-width="1.5"
-              stroke-dasharray="6 4"
-            />
-            <polygon points="473,274 463,278 467,286" fill="var(--border)" />
-            <!-- Real ↔ Mock (bottom edge) -->
-            <line
-              x1="195"
-              y1="355"
-              x2="405"
-              y2="355"
-              stroke="var(--border)"
-              stroke-width="1.5"
-              stroke-dasharray="6 4"
-            />
-            <polygon points="200,349 190,355 200,361" fill="var(--border)" />
-            <polygon points="400,349 410,355 400,361" fill="var(--border)" />
-            <!-- Edge labels (horizontal, centered on each line) -->
-            <rect x="131" y="182" width="85" height="20" rx="4" fill="var(--bg-deep)" />
-            <text
-              x="173"
-              y="196"
-              text-anchor="middle"
-              fill="var(--text-dim)"
-              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
-              font-size="11"
-            >
-              SDK = Real?
-            </text>
-            <rect x="360" y="182" width="90" height="20" rx="4" fill="var(--bg-deep)" />
-            <text
-              x="405"
-              y="196"
-              text-anchor="middle"
-              fill="var(--text-dim)"
-              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
-              font-size="11"
-            >
-              SDK = Mock?
-            </text>
-            <rect x="255" y="338" width="90" height="20" rx="4" fill="var(--bg-deep)" />
-            <text
-              x="300"
-              y="352"
-              text-anchor="middle"
-              fill="var(--text-dim)"
-              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
-              font-size="11"
-            >
-              Real = Mock?
-            </text>
-          </svg>
-          <div class="tri-node sdk">
-            <div class="node-icon">{ }</div>
-            <h3>SDK Types</h3>
-            <p>What TypeScript types say the shape should be</p>
-          </div>
-          <div class="tri-node real">
-            <div class="node-icon">&#8644;</div>
-            <h3>Real API</h3>
-            <p>What OpenAI, Claude, Gemini actually return</p>
-          </div>
-          <div class="tri-node mock">
-            <div class="node-icon">&#9881;</div>
-            <h3>llmock</h3>
-            <p>What the mock produces for the same request</p>
-          </div>
-        </div>
-
-        <!-- Diagnosis cards -->
-        <div class="diagnosis-grid">
-          <div class="diagnosis-card">
-            <div class="diag-header">
-              <div class="diag-dot" style="background: var(--error)"></div>
-              <h4>Mock doesn't match real</h4>
-            </div>
-            <p>
-              llmock needs updating &mdash; test fails immediately. The SDK comparison tells us why
-              it drifted.
-            </p>
-          </div>
-          <div class="diagnosis-card">
-            <div class="diag-header">
-              <div class="diag-dot" style="background: var(--warning)"></div>
-              <h4>Provider changed, SDK is behind</h4>
-            </div>
-            <p>
-              Early warning &mdash; the real API has new fields that neither the SDK nor llmock know
-              about yet.
-            </p>
-          </div>
-          <div class="diagnosis-card">
-            <div class="diag-header">
-              <div class="diag-dot" style="background: var(--accent)"></div>
-              <h4>All three agree</h4>
-            </div>
-            <p>No drift &mdash; the mock matches reality and the SDK types are current.</p>
-          </div>
-        </div>
-
-        <!-- Drift report snippet -->
-        <div class="drift-report">
-          <div class="report-header">$ pnpm test:drift</div>
-          <span class="severity-critical">[critical]</span>
-          <span class="drift-label">LLMOCK DRIFT</span> &mdash; field in SDK + real API but missing
-          from mock<br />
-          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
-            >choices[].message.refusal</span
-          ><br />
-          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;null &nbsp;&nbsp;
-          <span class="field-label">Real:</span> null &nbsp;&nbsp;
-          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
-          <div class="divider"></div>
-          <span class="severity-critical">[critical]</span>
-          <span class="drift-label">TYPE MISMATCH</span> &mdash; real API and mock disagree on
-          type<br />
-          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
-            >content[].input</span
-          ><br />
-          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;object &nbsp;&nbsp;
-          <span class="field-label">Real:</span> object &nbsp;&nbsp;
-          <span class="field-label">Mock:</span> string<br />
-          <div class="divider"></div>
-          <span class="severity-warning">[warning]</span>
-          <span class="drift-label">PROVIDER ADDED FIELD</span> &mdash; in real API but not in SDK
-          or mock<br />
-          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
-            >choices[].message.annotations</span
-          ><br />
-          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;absent&gt;
-          &nbsp;&nbsp; <span class="field-label">Real:</span> array &nbsp;&nbsp;
-          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
-          <div class="divider"></div>
-          <span class="severity-ok" style="font-size: 0.85rem">&#10003;</span>
-          <span class="report-summary"
-            >2 critical (test fails) &middot; 1 warning (logged) &middot; detected before any user
-            reported it</span
-          >
-        </div>
-
-        <!-- CI footer -->
-        <div class="ci-footer">
-          <div class="ci-badge"><span class="dot"></span> Daily CI</div>
-          <span class="ci-text">Drift tests across 4 providers run automatically every day.</span>
-        </div>
-      </div>
-    </section>
-
     <!-- ═══ Comparison ═══════════════════════════════════════════════ -->
     <section id="comparison" class="comparison reveal">
       <div class="container">
@@ -1891,6 +1709,185 @@ <h2 class="section-title">How llmock compares</h2>
       </div>
     </section>
 
+    <!-- ═══ Reliability / Drift Detection ═══════════════════════════ -->
+    <section id="reliability" class="reveal">
+      <div class="container">
+        <span class="section-label">Reliability</span>
+        <h2 class="section-title">Verified against real APIs. Every day.</h2>
+        <p class="section-desc">
+          A mock that doesn't match reality is worse than no mock &mdash; your tests pass, but
+          production breaks. llmock runs three-way drift detection that compares SDK types, real API
+          responses, and mock output to catch shape mismatches before you do.
+        </p>
+
+        <!-- Triangle diagram -->
+        <div class="triangle-wrapper">
+          <svg viewBox="0 0 600 420" fill="none" xmlns="http://www.w3.org/2000/svg">
+            <!-- SDK → Real (left edge) -->
+            <line
+              x1="245"
+              y1="105"
+              x2="130"
+              y2="280"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="127,274 137,278 133,286" fill="var(--border)" />
+            <!-- SDK → Mock (right edge) -->
+            <line
+              x1="355"
+              y1="105"
+              x2="470"
+              y2="280"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="473,274 463,278 467,286" fill="var(--border)" />
+            <!-- Real ↔ Mock (bottom edge) -->
+            <line
+              x1="195"
+              y1="355"
+              x2="405"
+              y2="355"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="200,349 190,355 200,361" fill="var(--border)" />
+            <polygon points="400,349 410,355 400,361" fill="var(--border)" />
+            <!-- Edge labels (horizontal, centered on each line) -->
+            <rect x="131" y="182" width="85" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="173"
+              y="196"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              SDK = Real?
+            </text>
+            <rect x="360" y="182" width="90" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="405"
+              y="196"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              SDK = Mock?
+            </text>
+            <rect x="255" y="338" width="90" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="300"
+              y="352"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              Real = Mock?
+            </text>
+          </svg>
+          <div class="tri-node sdk">
+            <div class="node-icon">{ }</div>
+            <h3>SDK Types</h3>
+            <p>What TypeScript types say the shape should be</p>
+          </div>
+          <div class="tri-node real">
+            <div class="node-icon">&#8644;</div>
+            <h3>Real API</h3>
+            <p>What OpenAI, Claude, Gemini actually return</p>
+          </div>
+          <div class="tri-node mock">
+            <div class="node-icon">&#9881;</div>
+            <h3>llmock</h3>
+            <p>What the mock produces for the same request</p>
+          </div>
+        </div>
+
+        <!-- Diagnosis cards -->
+        <div class="diagnosis-grid">
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--error)"></div>
+              <h4>Mock doesn't match real</h4>
+            </div>
+            <p>
+              llmock needs updating &mdash; test fails immediately. The SDK comparison tells us why
+              it drifted.
+            </p>
+          </div>
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--warning)"></div>
+              <h4>Provider changed, SDK is behind</h4>
+            </div>
+            <p>
+              Early warning &mdash; the real API has new fields that neither the SDK nor llmock know
+              about yet.
+            </p>
+          </div>
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--accent)"></div>
+              <h4>All three agree</h4>
+            </div>
+            <p>No drift &mdash; the mock matches reality and the SDK types are current.</p>
+          </div>
+        </div>
+
+        <!-- Drift report snippet -->
+        <div class="drift-report">
+          <div class="report-header">$ pnpm test:drift</div>
+          <span class="severity-critical">[critical]</span>
+          <span class="drift-label">LLMOCK DRIFT</span> &mdash; field in SDK + real API but missing
+          from mock<br />
+          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
+            >choices[].message.refusal</span
+          ><br />
+          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;null &nbsp;&nbsp;
+          <span class="field-label">Real:</span> null &nbsp;&nbsp;
+          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
+          <div class="divider"></div>
+          <span class="severity-critical">[critical]</span>
+          <span class="drift-label">TYPE MISMATCH</span> &mdash; real API and mock disagree on
+          type<br />
+          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
+            >content[].input</span
+          ><br />
+          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;object &nbsp;&nbsp;
+          <span class="field-label">Real:</span> object &nbsp;&nbsp;
+          <span class="field-label">Mock:</span> string<br />
+          <div class="divider"></div>
+          <span class="severity-warning">[warning]</span>
+          <span class="drift-label">PROVIDER ADDED FIELD</span> &mdash; in real API but not in SDK
+          or mock<br />
+          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
+            >choices[].message.annotations</span
+          ><br />
+          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;absent&gt;
+          &nbsp;&nbsp; <span class="field-label">Real:</span> array &nbsp;&nbsp;
+          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
+          <div class="divider"></div>
+          <span class="severity-ok" style="font-size: 0.85rem">&#10003;</span>
+          <span class="report-summary"
+            >2 critical (test fails) &middot; 1 warning (logged) &middot; detected before any user
+            reported it</span
+          >
+        </div>
+
+        <!-- CI footer -->
+        <div class="ci-footer">
+          <div class="ci-badge"><span class="dot"></span> Daily CI</div>
+          <span class="ci-text">Drift tests across 4 providers run automatically every day.</span>
+        </div>
+      </div>
+    </section>
+
     <!-- ═══ Claude Code Integration ═══════════════════════════════════ -->
     <section id="claude-code" class="reveal">
       <div class="container">
diff --git a/eslint.config.mjs b/eslint.config.mjs
index bb7e36a0..f099e62b 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -5,7 +5,7 @@ export default tseslint.config(
   eslint.configs.recommended,
   ...tseslint.configs.recommended,
   prettier,
-  { ignores: ["dist/", "node_modules/", "fixtures/"] },
+  { ignores: ["dist/", "node_modules/", "fixtures/", ".worktrees/"] },
   {
     files: ["*.config.{js,mjs,ts,cjs}"],
     languageOptions: { globals: { module: "readonly", require: "readonly" } },
diff --git a/package.json b/package.json
index d6f83f00..692e02d6 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@copilotkit/llmock",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)",
   "license": "MIT",
   "packageManager": "pnpm@10.28.2",
@@ -27,7 +27,6 @@
     "dist",
     "fixtures",
     ".claude-plugin",
-    ".claude",
     "skills"
   ],
   "publishConfig": {
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
deleted file mode 120000
index ed1187ca..00000000
--- a/skills/write-fixtures/SKILL.md
+++ /dev/null
@@ -1 +0,0 @@
-../../.claude/commands/write-fixtures.md
\ No newline at end of file
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
new file mode 100644
index 00000000..cfaeb24f
--- /dev/null
+++ b/skills/write-fixtures/SKILL.md
@@ -0,0 +1,357 @@
+---
+name: write-fixtures
+description: Use when writing test fixtures for @copilotkit/llmock — mock LLM responses, tool call sequences, error injection, multi-turn agent loops, embeddings, structured output, sequential responses, or debugging fixture mismatches
+---
+
+# Writing llmock Test Fixtures
+
+## What llmock Is
+
+Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs.
+
+## Core Mental Model
+
+- **Fixtures** = match criteria + response
+- **First-match-wins** — order matters
+- All providers share one fixture pool (provider adapters normalize to `ChatCompletionRequest`)
+- Fixtures are live — mutations after `start()` take effect immediately
+- Sequential responses are supported via `sequenceIndex` (match count tracked per fixture)
+
+## Match Field Reference
+
+| Field            | Type                                      | Matches Against                                                               |
+| ---------------- | ----------------------------------------- | ----------------------------------------------------------------------------- |
+| `userMessage`    | `string`                                  | Substring of last `role: "user"` message text                                 |
+| `userMessage`    | `RegExp`                                  | Pattern test on last `role: "user"` message text                              |
+| `inputText`      | `string`                                  | Substring of embedding input text (concatenated if multiple inputs)           |
+| `inputText`      | `RegExp`                                  | Pattern test on embedding input text                                          |
+| `toolName`       | `string`                                  | Exact match on any tool in request's `tools[]` array (by `function.name`)     |
+| `toolCallId`     | `string`                                  | Exact match on `tool_call_id` of last `role: "tool"` message                  |
+| `model`          | `string`                                  | Exact match on `req.model`                                                    |
+| `model`          | `RegExp`                                  | Pattern test on `req.model`                                                   |
+| `responseFormat` | `string`                                  | Exact match on `req.response_format.type` (`"json_object"`, `"json_schema"`)  |
+| `sequenceIndex`  | `number`                                  | Matches only when this fixture's match count equals the given index (0-based) |
+| `predicate`      | `(req: ChatCompletionRequest) => boolean` | Custom function — full access to request                                      |
+
+**AND logic**: all specified fields must match. Empty match `{}` = catch-all.
+
+Multi-part content (e.g., `[{type: "text", text: "hello"}]`) is automatically extracted — `userMessage` matching works regardless of content format.
+
+## Response Types
+
+### Text
+
+```typescript
+{
+  content: "Hello!";
+}
+```
+
+### Tool Calls
+
+```typescript
+{
+  toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }];
+}
+```
+
+**`arguments` MUST be a JSON string**, not an object. This is the #1 mistake.
+
+### Embedding
+
+```typescript
+{
+  embedding: [0.1, 0.2, 0.3, -0.5, 0.8];
+}
+```
+
+The embedding vector is returned for each input in the request. If no embedding fixture matches, deterministic embeddings are auto-generated from the input text hash — you only need fixtures when you want specific vectors.
+
+### Error
+
+```typescript
+{ error: { message: "Rate limited", type: "rate_limit_error" }, status: 429 }
+```
+
+## Common Patterns
+
+### Basic text fixture
+
+```typescript
+mock.onMessage("hello", { content: "Hi there!" });
+```
+
+### Tool call → tool result → final response (3-step agent loop)
+
+The most common pattern. Fixture 1 triggers the tool call, fixture 2 handles the tool result.
+
+```typescript
+// Step 1: User asks about weather → LLM calls tool
+mock.onMessage("weather", {
+  toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
+});
+
+// Step 2: Tool result comes back → LLM responds with text
+mock.addFixture({
+  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
+  response: { content: "It's 72°F in San Francisco." },
+});
+```
+
+**Why predicate, not userMessage?** After a tool call, the client replays the same conversation with the tool result appended. The user message hasn't changed — `userMessage: "weather"` would match the SAME fixture again, creating an infinite loop.
+
+### Embedding fixture
+
+```typescript
+// Match specific input text
+mock.onEmbedding("search query", {
+  embedding: [0.1, 0.2, 0.3, 0.4, 0.5],
+});
+
+// Match with regex
+mock.onEmbedding(/product.*description/, {
+  embedding: [0.9, -0.1, 0.5, 0.3, 0.2],
+});
+```
+
+### Structured output / JSON mode
+
+```typescript
+// onJsonOutput auto-sets responseFormat: "json_object" and stringifies objects
+mock.onJsonOutput("extract entities", {
+  entities: [
+    { name: "Acme Corp", type: "company" },
+    { name: "Jane Doe", type: "person" },
+  ],
+});
+
+// Equivalent manual form:
+mock.addFixture({
+  match: { userMessage: "extract entities", responseFormat: "json_object" },
+  response: { content: '{"entities":[...]}' },
+});
+```
+
+### Sequential responses (same match, different responses)
+
+```typescript
+// First call returns tool call, second returns text
+mock.on(
+  { userMessage: "status", sequenceIndex: 0 },
+  { toolCalls: [{ name: "check_status", arguments: "{}" }] },
+);
+mock.on({ userMessage: "status", sequenceIndex: 1 }, { content: "All systems operational." });
+```
+
+Match counts are tracked per fixture group and reset with `reset()` or `resetMatchCounts()`.
+
+### Streaming physics (realistic timing)
+
+```typescript
+mock.onMessage(
+  "tell me a story",
+  { content: "Once upon a time..." },
+  {
+    streamingProfile: {
+      ttft: 200, // 200ms before first token
+      tps: 30, // 30 tokens per second after that
+      jitter: 0.1, // ±10% random variance
+    },
+  },
+);
+```
+
+### Predicate-based routing (same user message, different context)
+
+Common in supervisor/orchestrator patterns where the system prompt changes:
+
+```typescript
+mock.addFixture({
+  match: {
+    predicate: (req) => {
+      const sys = req.messages.find((m) => m.role === "system")?.content ?? "";
+      return typeof sys === "string" && sys.includes("Flights found: false");
+    },
+  },
+  response: { toolCalls: [{ name: "search_flights", arguments: "{}" }] },
+});
+```
+
+### Catch-all (always add one)
+
+Prevents unmatched requests from returning 404 and crashing the test:
+
+```typescript
+mock.addFixture({
+  match: { predicate: () => true },
+  response: { content: "I understand. How can I help?" },
+});
+```
+
+### Tool result catch-all with prependFixture
+
+Must go at the front so it matches before substring-based fixtures:
+
+```typescript
+mock.prependFixture({
+  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
+  response: { content: "Done!" },
+});
+```
+
+### Stream interruption simulation (v1.3.0+)
+
+```typescript
+mock.onMessage(
+  "long response",
+  { content: "This will be cut short..." },
+  {
+    truncateAfterChunks: 3, // Stop after 3 SSE chunks
+    disconnectAfterMs: 500, // Or disconnect after 500ms
+  },
+);
+```
+
+### Error injection (one-shot)
+
+```typescript
+mock.nextRequestError(429, { message: "Rate limited", type: "rate_limit_error" });
+// Next request gets 429, then fixture auto-removes itself
+```
+
+### JSON fixture files
+
+```json
+{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hi!" }
+    },
+    {
+      "match": { "inputText": "search query" },
+      "response": { "embedding": [0.1, 0.2, 0.3] }
+    },
+    {
+      "match": { "userMessage": "status", "sequenceIndex": 0 },
+      "response": { "content": "First response" }
+    }
+  ]
+}
+```
+
+JSON files cannot use `RegExp` or `predicate` — those are code-only features. `streamingProfile` is supported in JSON fixture files.
+
+Load with `mock.loadFixtureFile("./fixtures/greetings.json")` or `mock.loadFixtureDir("./fixtures/")`.
+
+## API Endpoints
+
+All providers share the same fixture pool — write fixtures once, they work for any endpoint.
+
+| Endpoint                                         | Provider      | Protocol  |
+| ------------------------------------------------ | ------------- | --------- |
+| `POST /v1/chat/completions`                      | OpenAI        | HTTP      |
+| `POST /v1/responses`                             | OpenAI        | HTTP + WS |
+| `POST /v1/messages`                              | Anthropic     | HTTP      |
+| `POST /v1/embeddings`                            | OpenAI        | HTTP      |
+| `POST /v1beta/models/{model}:{method}`           | Google Gemini | HTTP      |
+| `POST /model/{modelId}/invoke`                   | AWS Bedrock   | HTTP      |
+| `POST /openai/deployments/{id}/chat/completions` | Azure OpenAI  | HTTP      |
+| `POST /openai/deployments/{id}/embeddings`       | Azure OpenAI  | HTTP      |
+| `GET /health`                                    | —             | HTTP      |
+| `GET /ready`                                     | —             | HTTP      |
+| `GET /v1/models`                                 | OpenAI-compat | HTTP      |
+| `WS /v1/responses`                               | OpenAI        | WebSocket |
+| `WS /v1/realtime`                                | OpenAI        | WebSocket |
+| `WS /ws/google.ai...BidiGenerateContent`         | Gemini Live   | WebSocket |
+
+## Critical Gotchas
+
+1. **Order matters** — first match wins. Specific fixtures before general ones. Use `prependFixture()` to force priority.
+
+2. **`arguments` must be a JSON string** — `"arguments": "{\"key\":\"value\"}"` not `"arguments": {"key":"value"}`. The type system enforces this but JSON fixtures can get it wrong silently.
+
+3. **Latency is per-chunk, not total** — `latency: 100` means 100ms between each SSE chunk, not 100ms total response time. Similarly, `truncateAfterChunks` and `disconnectAfterMs` are for simulating stream interruptions (added in v1.3.0).
+
+4. **`streamingProfile` takes precedence over `latency`** — when both are set on a fixture, `streamingProfile` controls timing. Use one or the other.
+
+5. **Tool result messages don't change the user message** — after a tool call, the client sends the same conversation + tool result. Matching on `userMessage` will hit the SAME fixture again → infinite loop. Always use `predicate` checking `role === "tool"` for tool results.
+
+6. **`clearFixtures()` preserves the array reference** — uses `.length = 0`, not reassignment. The running server reads the same array object.
+
+7. **Journal records everything** — including 404 "no match" responses. Use `mock.getLastRequest()` to debug mismatches.
+
+8. **All providers share fixtures** — a fixture matching "hello" works whether the request comes via `/v1/chat/completions` (OpenAI), `/v1/messages` (Anthropic), Gemini, Bedrock, or Azure endpoints.
+
+9. **WebSocket uses the same fixture pool** — no special setup needed for WebSocket-based APIs (OpenAI Responses WS, Realtime, Gemini Live).
+
+10. **Embeddings auto-generate if no fixture matches** — deterministic vectors are generated from the input text hash. You don't need a catch-all for embedding requests.
+
+11. **Sequential response counts are tracked per fixture** — counts reset with `reset()` or `resetMatchCounts()`. The count increments after each match of that fixture group (all fixtures sharing the same non-`sequenceIndex` match fields).
+
+12. **Bedrock uses Anthropic Messages format internally** — the adapter normalizes Bedrock requests to `ChatCompletionRequest`, so the same fixtures work. Bedrock is non-streaming only.
+
+13. **Azure OpenAI routes through the same handlers** — `/openai/deployments/{id}/chat/completions` maps to the completions handler, `/openai/deployments/{id}/embeddings` maps to the embeddings handler. Fixtures work unchanged.
+
+## Debugging Fixture Mismatches
+
+When a fixture doesn't match:
+
+1. **Inspect what the server received**: `mock.getLastRequest()` → check `body.messages` array
+2. **Check fixture order**: `mock.getFixtures()` returns fixtures in registration order
+3. **For `userMessage`**: match is against the LAST `role: "user"` message only, substring match (not exact)
+4. **Check the journal**: `mock.getRequests()` shows all requests including which fixture matched (or `null` for 404)
+
+## E2E Test Setup Pattern
+
+```typescript
+import { LLMock } from "@copilotkit/llmock";
+
+// Setup — port: 0 picks a random available port
+const mock = new LLMock({ port: 0 });
+mock.loadFixtureDir("./fixtures");
+await mock.start();
+process.env.OPENAI_BASE_URL = `${mock.url}/v1`;
+
+// Per-test cleanup
+afterEach(() => mock.reset()); // clears fixtures AND journal
+
+// Teardown
+afterAll(async () => await mock.stop());
+```
+
+### Static factory shorthand
+
+```typescript
+const mock = await LLMock.create({ port: 0 }); // creates + starts in one call
+```
+
+## API Quick Reference
+
+| Method                                  | Purpose                                     |
+| --------------------------------------- | ------------------------------------------- |
+| `addFixture(f)`                         | Append fixture (last priority)              |
+| `addFixtures(f[])`                      | Append multiple                             |
+| `prependFixture(f)`                     | Insert at front (highest priority)          |
+| `clearFixtures()`                       | Remove all fixtures                         |
+| `getFixtures()`                         | Read current fixture list                   |
+| `on(match, response, opts?)`            | Shorthand for `addFixture`                  |
+| `onMessage(pattern, response, opts?)`   | Match by user message                       |
+| `onEmbedding(pattern, response, opts?)` | Match by embedding input text               |
+| `onJsonOutput(pattern, json, opts?)`    | Match by user message with `responseFormat` |
+| `onToolCall(name, response, opts?)`     | Match by tool name in `tools[]`             |
+| `onToolResult(id, response, opts?)`     | Match by `tool_call_id`                     |
+| `nextRequestError(status, body?)`       | One-shot error, auto-removes                |
+| `loadFixtureFile(path)`                 | Load JSON fixture file                      |
+| `loadFixtureDir(path)`                  | Load all JSON files in directory            |
+| `start()`                               | Start server, returns URL                   |
+| `stop()`                                | Stop server                                 |
+| `reset()`                               | Clear fixtures + journal + match counts     |
+| `resetMatchCounts()`                    | Clear sequence match counts only            |
+| `getRequests()`                         | All journal entries                         |
+| `getLastRequest()`                      | Most recent journal entry                   |
+| `clearRequests()`                       | Clear journal only                          |
+| `url` / `baseUrl`                       | Server URL (throws if not started)          |
+| `port`                                  | Server port number                          |
+
+Sequential responses use `on()` with `sequenceIndex` in the match — there is no dedicated convenience method.
diff --git a/src/index.ts b/src/index.ts
index 47dcebf1..0b0fdf14 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -78,5 +78,7 @@ export type {
   SSEToolCallDelta,
   MockServerOptions,
   StreamingProfile,
+  FixtureOpts,
+  EmbeddingFixtureOpts,
   ToolCallMessage,
 } from "./types.js";
diff --git a/src/llmock.ts b/src/llmock.ts
index 62d514c9..8306acef 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -1,4 +1,11 @@
-import type { Fixture, FixtureMatch, FixtureResponse, MockServerOptions } from "./types.js";
+import type {
+  EmbeddingFixtureOpts,
+  Fixture,
+  FixtureMatch,
+  FixtureOpts,
+  FixtureResponse,
+  MockServerOptions,
+} from "./types.js";
 import { createServer, type ServerInstance } from "./server.js";
 import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
 import { Journal } from "./journal.js";
@@ -52,16 +59,7 @@ export class LLMock {
 
   // ---- Convenience ----
 
-  on(
-    match: FixtureMatch,
-    response: FixtureResponse,
-    opts?: {
-      latency?: number;
-      chunkSize?: number;
-      truncateAfterChunks?: number;
-      disconnectAfterMs?: number;
-    },
-  ): this {
+  on(match: FixtureMatch, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.addFixture({
       match,
       response,
@@ -69,67 +67,28 @@ export class LLMock {
     });
   }
 
-  onMessage(
-    pattern: string | RegExp,
-    response: FixtureResponse,
-    opts?: {
-      latency?: number;
-      chunkSize?: number;
-      truncateAfterChunks?: number;
-      disconnectAfterMs?: number;
-    },
-  ): this {
+  onMessage(pattern: string | RegExp, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.on({ userMessage: pattern }, response, opts);
   }
 
   onEmbedding(
     pattern: string | RegExp,
     response: FixtureResponse,
-    opts?: {
-      latency?: number;
-      chunkSize?: number;
-    },
+    opts?: EmbeddingFixtureOpts,
   ): this {
     return this.on({ inputText: pattern }, response, opts);
   }
 
-  onJsonOutput(
-    pattern: string | RegExp,
-    jsonContent: object | string,
-    opts?: {
-      latency?: number;
-      chunkSize?: number;
-      truncateAfterChunks?: number;
-      disconnectAfterMs?: number;
-    },
-  ): this {
+  onJsonOutput(pattern: string | RegExp, jsonContent: object | string, opts?: FixtureOpts): this {
     const content = typeof jsonContent === "string" ? jsonContent : JSON.stringify(jsonContent);
     return this.on({ userMessage: pattern, responseFormat: "json_object" }, { content }, opts);
   }
 
-  onToolCall(
-    name: string,
-    response: FixtureResponse,
-    opts?: {
-      latency?: number;
-      chunkSize?: number;
-      truncateAfterChunks?: number;
-      disconnectAfterMs?: number;
-    },
-  ): this {
+  onToolCall(name: string, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.on({ toolName: name }, response, opts);
   }
 
-  onToolResult(
-    id: string,
-    response: FixtureResponse,
-    opts?: {
-      latency?: number;
-      chunkSize?: number;
-      truncateAfterChunks?: number;
-      disconnectAfterMs?: number;
-    },
-  ): this {
+  onToolResult(id: string, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.on({ toolCallId: id }, response, opts);
   }
 
@@ -220,7 +179,7 @@ export class LLMock {
     }
     const { server } = this.serverInstance;
     await new Promise<void>((resolve, reject) => {
-      server.close((err) => (err ? reject(err) : resolve()));
+      server.close((err: Error | undefined) => (err ? reject(err) : resolve()));
     });
     this.serverInstance = null;
   }
diff --git a/src/types.ts b/src/types.ts
index 977bd251..175fda83 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -103,6 +103,9 @@ export interface Fixture {
   streamingProfile?: StreamingProfile;
 }
 
+export type FixtureOpts = Omit<Fixture, "match" | "response">;
+export type EmbeddingFixtureOpts = Pick<FixtureOpts, "latency" | "chunkSize" | "streamingProfile">;
+
 // Fixture file format (JSON on disk)
 
 export interface FixtureFile {