From 0029d937ebe28169bf128bd12dfa7082a7c3cd41 Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Tue, 11 Nov 2025 16:58:43 -0500 Subject: [PATCH 1/7] Add prompt-caching examples for fetch API - Add comprehensive documentation in docs/prompt-caching.md - Add 4 separate example files in typescript/fetch/src/prompt-caching/: - system-message-cache.ts: Cache on system message - user-message-cache.ts: Cache on user message - multi-message-cache.ts: Cache in multi-turn conversation - no-cache-control.ts: Control scenario (no caching) - Add shared workspace with LARGE_SYSTEM_PROMPT constant - Add Bun monorepo structure with workspaces - Add Makefile for build orchestration - Add biome.jsonc for code quality --- .gitignore | 6 + Makefile | 33 +++ README.md | 106 ++++++++++ biome.jsonc | 99 +++++++++ docs/prompt-caching.md | 193 +++++++++++++++++ typescript/README.md | 51 +++++ typescript/fetch/README.md | 27 +++ typescript/fetch/package.json | 16 ++ typescript/fetch/src/prompt-caching/README.md | 98 +++++++++ .../src/prompt-caching/multi-message-cache.ts | 199 ++++++++++++++++++ .../src/prompt-caching/no-cache-control.ts | 188 +++++++++++++++++ .../src/prompt-caching/user-message-cache.ts | 193 +++++++++++++++++ typescript/fetch/tsconfig.json | 16 ++ typescript/package.json | 21 ++ typescript/shared/package.json | 17 ++ typescript/shared/src/constants.ts | 144 +++++++++++++ typescript/shared/src/types.ts | 74 +++++++ typescript/shared/tsconfig.json | 16 ++ 18 files changed, 1497 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 biome.jsonc create mode 100644 docs/prompt-caching.md create mode 100644 typescript/README.md create mode 100644 typescript/fetch/README.md create mode 100644 typescript/fetch/package.json create mode 100644 typescript/fetch/src/prompt-caching/README.md create mode 100644 typescript/fetch/src/prompt-caching/multi-message-cache.ts create mode 100644 typescript/fetch/src/prompt-caching/no-cache-control.ts create mode 100644 typescript/fetch/src/prompt-caching/user-message-cache.ts create mode 100644 typescript/fetch/tsconfig.json create mode 100644 typescript/package.json create mode 100644 typescript/shared/package.json create mode 100644 typescript/shared/src/constants.ts create mode 100644 typescript/shared/src/types.ts create mode 100644 typescript/shared/tsconfig.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b32d8d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +node_modules/ +typescript/node_modules/ +typescript/*/node_modules/ +typescript/bun.lock +*.log +.DS_Store diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fa05ddf --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +# Makefile - Root orchestration for openrouter-examples + +.PHONY: help examples typescript install clean + +help: + @echo "OpenRouter Examples - Available commands:" + @echo "" + @echo " make examples - Run all examples" + @echo " make typescript - Run TypeScript monorepo examples" + @echo " make install - Install TypeScript dependencies" + @echo " make clean - Clean node_modules and lockfiles" + @echo "" + +# Run all examples +examples: typescript + +# Run TypeScript monorepo examples +typescript: + @echo "=== Running TypeScript examples ===" + @cd typescript && bun examples + +# Install TypeScript dependencies +install: + @echo "=== Installing TypeScript dependencies ===" + @cd typescript && bun install + +# Clean build artifacts +clean: + @echo "=== Cleaning TypeScript artifacts ===" + @rm -rf typescript/node_modules + @rm -rf typescript/*/node_modules + @rm -rf typescript/bun.lock + @echo "Clean complete" diff --git a/README.md b/README.md new file mode 100644 index 0000000..8c34391 --- /dev/null +++ b/README.md @@ -0,0 +1,106 @@ +# OpenRouter Examples + +Comprehensive, tested, executable examples demonstrating OpenRouter features across multiple ecosystems. + +## Quick Start + +```bash +# Set your API key +export OPENROUTER_API_KEY="your-key-here" + +# Run all examples +make examples + +# Or run specific ecosystems +make curl # Run curl examples +make typescript # Run TypeScript monorepo examples +``` + +## Repository Structure + +``` +. +├── curl/ - Shell script examples +├── typescript/ - TypeScript monorepo (Bun workspaces) +│ ├── shared/ - Shared constants and types +│ ├── fetch/ - Raw fetch API examples +│ ├── ai-sdk-v5/ - Vercel AI SDK v5 examples +│ ├── effect-ai/ - Effect-TS examples +│ └── openrouter-sdk/ - OpenRouter SDK examples (TODO) +├── docs/ - Feature documentation +└── Makefile - Unified command interface +``` + +## Features + +### Prompt Caching +- **Documentation**: [docs/prompt-caching.md](docs/prompt-caching.md) +- **Examples**: + - [curl/prompt-caching.sh](curl/prompt-caching.sh) + - [typescript/fetch/src/prompt-caching.ts](typescript/fetch/src/prompt-caching.ts) + - [typescript/ai-sdk-v5/src/prompt-caching.ts](typescript/ai-sdk-v5/src/prompt-caching.ts) + - [typescript/effect-ai/src/prompt-caching.ts](typescript/effect-ai/src/prompt-caching.ts) + +## Prerequisites + +- Bun runtime: `curl -fsSL https://bun.sh/install | bash` +- OpenRouter API key: [https://openrouter.ai/keys](https://openrouter.ai/keys) +- For curl examples: `jq` (JSON processor) + +## Installation + +```bash +# Install TypeScript dependencies +make install + +# Or manually +cd typescript && bun install +``` + +## Running Examples + +### All Examples +```bash +make examples +``` + +### By Ecosystem +```bash +make curl # Shell scripts with curl + jq +make typescript # All TypeScript examples (fetch, AI SDK, Effect) +``` + +### Individual Examples +```bash +# curl +bash curl/prompt-caching.sh + +# TypeScript +cd typescript/fetch && bun examples +cd typescript/ai-sdk-v5 && bun examples +cd typescript/effect-ai && bun examples +``` + +## Benefits + +### For Users +1. **Copy-paste ready** - All examples are runnable as-is +2. **Tested and proven** - Every example has been verified to work +3. **Evidence-based** - Examples show expected outputs and verification +4. **Multiple ecosystems** - Choose the one that matches your stack + +### For Developers +1. **Single source of truth** - Constants defined once in `typescript/shared/` +2. **Type safety** - Shared types across all TypeScript examples +3. **Consistent patterns** - Each ecosystem follows its own idioms +4. **Easy maintenance** - Bun monorepo for TypeScript workspaces + +## Contributing + +See individual ecosystem READMEs: +- [curl/README.md](curl/README.md) +- [typescript/README.md](typescript/README.md) + +## License + +See [LICENSE.md](LICENSE.md) diff --git a/biome.jsonc b/biome.jsonc new file mode 100644 index 0000000..d45186a --- /dev/null +++ b/biome.jsonc @@ -0,0 +1,99 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.3.3/schema.json", + "vcs": { + "enabled": true, + "clientKind": "git", + "useIgnoreFile": true + }, + "files": { + "ignoreUnknown": false, + "ignore": [ + "**/*.json", + "!biome.json" + ] + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2, + "lineWidth": 100, + "attributePosition": "multiline" + }, + "organizeImports": { + "enabled": true + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true, + "complexity": { + "useLiteralKeys": "off", + "noExtraBooleanCast": "off", + "noForEach": "off", + "noBannedTypes": "error", + "noUselessSwitchCase": "off" + }, + "style": { + "noNonNullAssertion": "off", + "useNodejsImportProtocol": "off", + "useTemplate": "off", + "useBlockStatements": "error", + "noParameterAssign": "error", + "useConst": "error", + "useAsConstAssertion": "error", + "useDefaultParameterLast": "error", + "useEnumInitializers": "error", + "useSelfClosingElements": "error", + "useSingleVarDeclarator": "error", + "noUnusedTemplateLiteral": "error", + "useNumberNamespace": "error", + "noInferrableTypes": "error", + "noUselessElse": "error", + "useImportType": "error" + }, + "correctness": { + "noUnusedImports": "error", + "useExhaustiveDependencies": "off", + "noUnknownFunction": "off", + "noChildrenProp": "off", + "noInnerDeclarations": "error" + }, + "suspicious": { + "noExplicitAny": "error", + "noArrayIndexKey": "off", + "noAssignInExpressions": "error", + "noAsyncPromiseExecutor": "off", + "noFallthroughSwitchClause": "error", + "noConsole": "off", + "noDoubleEquals": { + "level": "error", + "options": { + "ignoreNull": false + } + }, + "noExtraNonNullAssertion": "error" + }, + "performance": { + "recommended": true, + "noAccumulatingSpread": "error" + }, + "security": { + "recommended": true + } + } + }, + "javascript": { + "formatter": { + "lineWidth": 100, + "arrowParentheses": "always", + "jsxQuoteStyle": "single", + "attributePosition": "multiline", + "quoteProperties": "asNeeded", + "trailingCommas": "all", + "semicolons": "always", + "bracketSpacing": true, + "bracketSameLine": false, + "quoteStyle": "single" + } + } +} diff --git a/docs/prompt-caching.md b/docs/prompt-caching.md new file mode 100644 index 0000000..e1871f8 --- /dev/null +++ b/docs/prompt-caching.md @@ -0,0 +1,193 @@ +# Prompt Caching + +Reduce AI model costs by caching prompt messages across supported providers. + +## Overview + +Prompt caching allows you to save on inference costs by caching portions of your prompts. Most providers automatically enable caching, but some (like Anthropic and Google Gemini) require explicit `cache_control` breakpoints. + +When using caching, OpenRouter makes a best-effort to route requests to the same provider to leverage the warm cache. If that provider is unavailable, OpenRouter routes to the next-best provider. + +## Inspecting Cache Usage + +You can verify cache savings through: + +1. The [Activity page](/activity) detail view +2. The `/api/v1/generation` API ([docs](/api-reference/overview#querying-cost-and-stats)) +3. `usage: {include: true}` in your request to see cache tokens in the response + +The `cache_discount` field shows how much you saved. Some providers (like Anthropic) have negative discounts on cache writes but positive discounts on cache reads. + +## Provider Support + +### OpenAI + +**Automatic caching** - no configuration needed. + +- **Cache writes**: no cost +- **Cache reads**: 0.25x or 0.50x of input token price (model-dependent) +- **Minimum**: 1024 tokens + +[OpenAI pricing](https://platform.openai.com/docs/pricing) | [Documentation](https://platform.openai.com/docs/guides/prompt-caching) + +### Grok + +**Automatic caching** - no configuration needed. + +- **Cache writes**: no cost +- **Cache reads**: 0.10x of input token price + +[Grok pricing](https://docs.x.ai/docs/models#models-and-pricing) + +### Moonshot AI + +**Automatic caching** - no configuration needed. + +- **Cache writes**: no cost +- **Cache reads**: 0.10x of input token price + +[Moonshot documentation](https://platform.moonshot.ai/docs/api/caching) + +### Groq + +**Automatic caching** - no configuration needed (Kimi K2 models only). + +- **Cache writes**: no cost +- **Cache reads**: 0.00x of input token price (free) + +[Groq documentation](https://console.groq.com/docs/prompt-caching) + +### DeepSeek + +**Automatic caching** - no configuration needed. + +- **Cache writes**: same as input token price +- **Cache reads**: 0.10x of input token price + +### Anthropic Claude + +**Manual caching** - requires `cache_control` breakpoints. + +- **Cache writes**: 1.25x of input token price +- **Cache reads**: 0.10x of input token price +- **Limit**: 4 breakpoints per request +- **TTL**: 5 minutes +- **Best for**: Large bodies of text (character cards, CSV data, RAG data, book chapters) + +[Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) + +**System message example:** + +```json +{ + "messages": [ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are a historian studying the fall of the Roman Empire. You know this book well:" + }, + { + "type": "text", + "text": "HUGE TEXT BODY", + "cache_control": {"type": "ephemeral"} + } + ] + }, + { + "role": "user", + "content": [{"type": "text", "text": "What triggered the collapse?"}] + } + ] +} +``` + +**User message example:** + +```json +{ + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Given the book below:"}, + { + "type": "text", + "text": "HUGE TEXT BODY", + "cache_control": {"type": "ephemeral"} + }, + {"type": "text", "text": "Name all the characters"} + ] + } + ] +} +``` + +### Google Gemini + +**Two caching modes**: Implicit (automatic) and explicit (manual). + +#### Implicit Caching (Gemini 2.5 Pro & Flash) + +**Automatic caching** - no configuration needed. + +- **Cache writes**: no storage cost +- **Cache reads**: 0.25x of input token price +- **TTL**: 3-5 minutes (variable) +- **Minimum**: 2000 tokens (2.5 Flash), 32000 tokens (2.5 Pro) + +**Tip**: Keep the initial portion of message arrays consistent between requests. Push variations (user questions, dynamic context) toward the end. + +[Google announcement](https://developers.googleblog.com/en/gemini-2-5-models-now-support-implicit-caching/) + +#### Explicit Caching (Legacy) + +**Manual caching** - requires `cache_control` breakpoints (similar to Anthropic). + +- **Cache writes**: Input token price + 5 minutes storage +- **Cache reads**: 0.25x of input token price +- **TTL**: 5 minutes (fixed, does not refresh) +- **Minimum**: 4096 tokens (most models), 2000 tokens (2.5 Flash), 32000 tokens (2.5 Pro) + +**Note**: OpenRouter manages cache lifecycle - you don't create/update/delete caches manually. + +**Tip**: You can include multiple `cache_control` breakpoints for Anthropic compatibility. OpenRouter uses only the last breakpoint for Gemini. + +[Google pricing docs](https://ai.google.dev/gemini-api/docs/pricing) + +**System message example:** + +```json +{ + "messages": [ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are a historian studying the fall of the Roman Empire. Below is an extensive reference book:" + }, + { + "type": "text", + "text": "HUGE TEXT BODY HERE", + "cache_control": {"type": "ephemeral"} + } + ] + }, + { + "role": "user", + "content": [{"type": "text", "text": "What triggered the collapse?"}] + } + ] +} +``` + +## Examples + +See ecosystem-specific examples: + +- **TypeScript + fetch**: [typescript/fetch/src/prompt-caching/](../typescript/fetch/src/prompt-caching/) + - [user-message-cache.ts](../typescript/fetch/src/prompt-caching/user-message-cache.ts) + - [multi-message-cache.ts](../typescript/fetch/src/prompt-caching/multi-message-cache.ts) + - [no-cache-control.ts](../typescript/fetch/src/prompt-caching/no-cache-control.ts) (control) diff --git a/typescript/README.md b/typescript/README.md new file mode 100644 index 0000000..2995dc5 --- /dev/null +++ b/typescript/README.md @@ -0,0 +1,51 @@ +# TypeScript Examples + +A Bun monorepo containing OpenRouter examples across different TypeScript ecosystems. + +## Structure + +- **shared/** - Shared constants and utilities (LARGE_SYSTEM_PROMPT, types, etc.) +- **fetch/** - Raw fetch API examples +- **ai-sdk-v5/** - Vercel AI SDK v5 examples (using ai v4.x package) +- **effect-ai/** - Effect-TS AI examples +- **openrouter-sdk/** - OpenRouter TypeScript SDK examples (TODO) + +## Prerequisites + +- Bun runtime: `curl -fsSL https://bun.sh/install | bash` +- `OPENROUTER_API_KEY` environment variable + +## Installation + +```bash +# From repository root +make install + +# Or from the typescript/ directory +cd typescript +bun install +``` + +## Running Examples + +```bash +# From repository root +export OPENROUTER_API_KEY="your-key-here" +make typescript + +# Or from the typescript/ directory +cd typescript +bun examples + +# Or run individual workspaces +cd fetch && bun examples +cd ai-sdk-v5 && bun examples +cd effect-ai && bun examples +``` + +## Workspace Benefits + +1. **Shared constants** - LARGE_SYSTEM_PROMPT defined once in `shared/` +2. **Consistent dependencies** - Managed at monorepo root with Bun workspaces +3. **Type sharing** - Common types available across workspaces +4. **Easy testing** - Run all examples from one location with `make typescript` or `bun examples` diff --git a/typescript/fetch/README.md b/typescript/fetch/README.md new file mode 100644 index 0000000..0278535 --- /dev/null +++ b/typescript/fetch/README.md @@ -0,0 +1,27 @@ +# TypeScript + fetch Examples + +Raw HTTP examples using TypeScript and the native `fetch` API. + +## Prerequisites + +- Bun runtime: `curl -fsSL https://bun.sh/install | bash` +- `OPENROUTER_API_KEY` environment variable + +## Running Examples + +```bash +# From monorepo root (typescript/) +bun examples + +# Or from this workspace +cd fetch +bun examples +``` + +## Features + +- [prompt-caching.ts](./src/prompt-caching.ts) - Anthropic caching with TypeScript types + +## Dependencies + +- `@openrouter-examples/shared` - Shared constants (LARGE_SYSTEM_PROMPT) and types diff --git a/typescript/fetch/package.json b/typescript/fetch/package.json new file mode 100644 index 0000000..b1ac767 --- /dev/null +++ b/typescript/fetch/package.json @@ -0,0 +1,16 @@ +{ + "name": "@openrouter-examples/fetch", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "examples": "bun run src/prompt-caching/user-message-cache.ts && bun run src/prompt-caching/multi-message-cache.ts && bun run src/prompt-caching/no-cache-control.ts", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@openrouter-examples/shared": "workspace:*" + }, + "devDependencies": { + "@types/bun": "latest" + } +} diff --git a/typescript/fetch/src/prompt-caching/README.md b/typescript/fetch/src/prompt-caching/README.md new file mode 100644 index 0000000..7de5a81 --- /dev/null +++ b/typescript/fetch/src/prompt-caching/README.md @@ -0,0 +1,98 @@ +# Anthropic Prompt Caching Examples + +This directory contains examples demonstrating Anthropic's prompt caching feature via OpenRouter using the raw fetch API. + +## What is Prompt Caching? + +Anthropic's prompt caching allows you to cache large portions of your prompts (like system messages or context documents) to: +- **Reduce costs** - Cached tokens cost significantly less than regular tokens +- **Improve latency** - Cached content is processed faster on subsequent requests +- **Enable larger contexts** - Use more context without proportional cost increases + +Cache TTL: 5 minutes for ephemeral caches + +## Examples + +### 1. System Message Cache (`system-message-cache.ts`) +The most common pattern - cache a large system prompt: +```bash +bun run typescript/fetch/src/prompt-caching/system-message-cache.ts +``` + +**Pattern**: System message with content-level `cache_control` + +### 2. User Message Cache (`user-message-cache.ts`) +Cache large context in user messages (e.g., uploading documents): +```bash +bun run typescript/fetch/src/prompt-caching/user-message-cache.ts +``` + +**Pattern**: User message with content-level `cache_control` on context block + +### 3. Multi-Message Cache (`multi-message-cache.ts`) +Cache system prompt across multi-turn conversations: +```bash +bun run typescript/fetch/src/prompt-caching/multi-message-cache.ts +``` + +**Pattern**: System message cache persists through conversation history + +### 4. No Cache Control (`no-cache-control.ts`) +Control scenario - no caching should occur: +```bash +bun run typescript/fetch/src/prompt-caching/no-cache-control.ts +``` + +**Pattern**: Same structure but NO `cache_control` markers (validates methodology) + +## How to Use Cache Control + +```typescript +const requestBody = { + model: 'anthropic/claude-3.5-sonnet', + stream_options: { + include_usage: true, // CRITICAL: Required for cache metrics + }, + messages: [ + { + role: 'system', + content: [ + { + type: 'text', + text: 'Your large system prompt here...', + cache_control: { type: 'ephemeral' }, // Cache this block + }, + ], + }, + { + role: 'user', + content: 'Your question here', + }, + ], +}; +``` + +## Important Notes + +### OpenRouter Format Transformation +OpenRouter transforms Anthropic's native response format to OpenAI-compatible format: +- **Anthropic native**: `usage.cache_read_input_tokens`, `usage.cache_creation_input_tokens` +- **OpenRouter returns**: `usage.prompt_tokens_details.cached_tokens` (OpenAI-compatible) + +### Requirements for Caching +1. **stream_options.include_usage = true** - CRITICAL, otherwise no usage details +2. **Minimum 2048+ tokens** - Smaller content may not be cached reliably +3. **cache_control on content blocks** - Not on message level +4. **Exact match** - Cache only hits on identical content + +### Expected Behavior +- **First call**: `cached_tokens = 0` (cache miss, creates cache) +- **Second call**: `cached_tokens > 0` (cache hit, reads from cache) +- **Control**: `cached_tokens = 0` on both calls (no cache_control) + +## Scientific Method +All examples follow scientific method principles: +- **Hypothesis**: cache_control triggers Anthropic caching +- **Experiment**: Make identical calls twice +- **Evidence**: Measure via `usage.prompt_tokens_details.cached_tokens` +- **Analysis**: Compare first call (miss) vs second call (hit) diff --git a/typescript/fetch/src/prompt-caching/multi-message-cache.ts b/typescript/fetch/src/prompt-caching/multi-message-cache.ts new file mode 100644 index 0000000..e59e6a8 --- /dev/null +++ b/typescript/fetch/src/prompt-caching/multi-message-cache.ts @@ -0,0 +1,199 @@ +/** + * Example: Anthropic Prompt Caching - Multi-Message Conversation + * + * This example demonstrates Anthropic prompt caching in a multi-message conversation via OpenRouter. + * + * Scientific Method: + * - Hypothesis: cache_control at content-item level triggers Anthropic caching + * - Experiment: Make identical calls twice and measure cache hit via usage metrics + * - Evidence: usage.prompt_tokens_details.cached_tokens (OpenAI-compatible format) + * + * IMPORTANT: OpenRouter transforms Anthropic's native response format to OpenAI-compatible format: + * - Anthropic native: usage.cache_read_input_tokens, usage.cache_creation_input_tokens + * - OpenRouter returns: usage.prompt_tokens_details.cached_tokens (OpenAI-compatible) + * + * Anthropic Cache Requirements: + * - **CRITICAL**: stream_options.include_usage must be set to true (otherwise no usage details) + * - Minimum 2048+ tokens to cache reliably (we use 30k+ char system prompt from shared) + * - cache_control: {type: "ephemeral"} on content items + * - TTL: 5 minutes for ephemeral caches + * + * Pattern: Multi-message conversation with cache_control + * - System message with cache + * - Multiple user/assistant exchanges + * - Cache should persist across the conversation + * + * To run: bun run typescript/fetch/src/multi-message-cache.ts + */ + +import { LARGE_SYSTEM_PROMPT } from '@openrouter-examples/shared/constants'; +import type { ChatCompletionResponse } from '@openrouter-examples/shared/types'; + +// OpenRouter API endpoint +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; + +/** + * Make a chat completion request to OpenRouter with Anthropic caching + */ +async function makeRequest( + requestBody: unknown, + description: string, +): Promise { + console.log(`\n${description}`); + + if (!process.env.OPENROUTER_API_KEY) { + throw new Error('OPENROUTER_API_KEY environment variable is not set'); + } + + const response = await fetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'Anthropic Cache - Multi-Message', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP error! status: ${response.status}, body: ${errorText}`); + } + + const data = (await response.json()) as ChatCompletionResponse; + + // Show cache-relevant metrics in OpenAI-compatible format + const cachedTokens = data.usage.prompt_tokens_details?.cached_tokens ?? 0; + const promptTokens = data.usage.prompt_tokens; + const completionTokens = data.usage.completion_tokens; + + const metrics: string[] = [`prompt=${promptTokens}`, `completion=${completionTokens}`]; + + if (cachedTokens > 0) { + metrics.push(`cached=${cachedTokens} ✓ (CACHE HIT)`); + } else { + metrics.push('cached=0 (CACHE MISS)'); + } + + console.log(` ${metrics.join(', ')}`); + + return data; +} + +/** + * Main example + */ +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ Anthropic Prompt Caching - Multi-Message with cache_control ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + console.log('Testing cache_control on system message in a multi-message conversation'); + console.log( + `System prompt size: ${LARGE_SYSTEM_PROMPT.length} characters (~${Math.round(LARGE_SYSTEM_PROMPT.length / 4)} tokens)`, + ); + console.log(); + console.log('Expected behavior:'); + console.log(' 1st call: cached_tokens = 0 (cache miss, creates cache)'); + console.log(' 2nd call: cached_tokens > 0 (cache hit, reads from cache)'); + console.log(); + + try { + const requestBody = { + model: 'anthropic/claude-3.5-sonnet', + stream_options: { + include_usage: true, // CRITICAL: Required for cached_tokens to be populated + }, + messages: [ + { + role: 'system', + content: [ + { + type: 'text', + text: LARGE_SYSTEM_PROMPT, + cache_control: { type: 'ephemeral' }, + }, + ], + }, + { + role: 'user', + content: "Hello, what's your name?", + }, + { + role: 'assistant', + content: "I'm Claude, an AI assistant created by Anthropic.", + }, + { + role: 'user', + content: 'What programming languages do you know?', + }, + ], + }; + + // First call - should create cache + const response1 = await makeRequest( + requestBody, + 'First Call (Cache Miss Expected)', + ); + + // Wait 1 second between calls to ensure they're processed separately + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Second identical call - should hit cache + const response2 = await makeRequest( + requestBody, + 'Second Call (Cache Hit Expected)', + ); + + // Verify cache behavior using OpenAI-compatible format + console.log('\n' + '='.repeat(80)); + console.log('ANALYSIS'); + console.log('='.repeat(80)); + + const cached1 = response1.usage.prompt_tokens_details?.cached_tokens ?? 0; + const cached2 = response2.usage.prompt_tokens_details?.cached_tokens ?? 0; + + console.log(`First call: cached_tokens=${cached1} (expected: 0, cache miss creates cache)`); + console.log(`Second call: cached_tokens=${cached2} (expected: >0, cache hit reads from cache)`); + + if (cached1 === 0) { + console.log('✓ First call cache miss (created cache for future requests)'); + } else { + console.log(`⚠ First call unexpectedly had cached tokens: ${cached1}`); + } + + if (cached2 > 0) { + console.log(`✓ Second call cache hit: ${cached2} tokens read from cache`); + } else { + console.log(`✗ Second call did NOT hit cache (cached_tokens=${cached2})`); + } + + const success = cached1 === 0 && cached2 > 0; + console.log(`\nResult: ${success ? '✓ CACHE WORKING' : '✗ CACHE NOT WORKING'}`); + + if (success) { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✓ SUCCESS - Multi-message caching is working correctly'); + console.log('════════════════════════════════════════════════════════════════════════════'); + } else { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✗ FAILURE - Multi-message caching is not working as expected'); + console.log('════════════════════════════════════════════════════════════════════════════'); + } + } catch (error) { + console.error('\n❌ ERROR during testing:'); + + if (error instanceof Error) { + console.error('Error message:', error.message); + console.error('Stack trace:', error.stack); + } else { + console.error('Unknown error:', error); + } + + process.exit(1); + } +} + +// Run the example +main(); diff --git a/typescript/fetch/src/prompt-caching/no-cache-control.ts b/typescript/fetch/src/prompt-caching/no-cache-control.ts new file mode 100644 index 0000000..25ac0a7 --- /dev/null +++ b/typescript/fetch/src/prompt-caching/no-cache-control.ts @@ -0,0 +1,188 @@ +/** + * Example: Anthropic Prompt Caching - Control (No cache_control) + * + * This example demonstrates the CONTROL scenario: no cache_control markers. + * + * Scientific Method: + * - Hypothesis: Without cache_control, no caching should occur + * - Experiment: Make identical calls twice without cache_control + * - Evidence: usage.prompt_tokens_details.cached_tokens should remain 0 + * + * IMPORTANT: OpenRouter transforms Anthropic's native response format to OpenAI-compatible format: + * - Anthropic native: usage.cache_read_input_tokens, usage.cache_creation_input_tokens + * - OpenRouter returns: usage.prompt_tokens_details.cached_tokens (OpenAI-compatible) + * + * Purpose: This control scenario ensures our test methodology is sound + * - Same large system prompt + * - NO cache_control markers + * - Should NOT see cache metrics + * + * To run: bun run typescript/fetch/src/no-cache-control.ts + */ + +import { LARGE_SYSTEM_PROMPT } from '@openrouter-examples/shared/constants'; +import type { ChatCompletionResponse } from '@openrouter-examples/shared/types'; + +// OpenRouter API endpoint +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; + +/** + * Make a chat completion request to OpenRouter WITHOUT Anthropic caching + */ +async function makeRequest( + requestBody: unknown, + description: string, +): Promise { + console.log(`\n${description}`); + + if (!process.env.OPENROUTER_API_KEY) { + throw new Error('OPENROUTER_API_KEY environment variable is not set'); + } + + const response = await fetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'Anthropic Cache - Control (No Cache)', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP error! status: ${response.status}, body: ${errorText}`); + } + + const data = (await response.json()) as ChatCompletionResponse; + + // Show cache-relevant metrics in OpenAI-compatible format + const cachedTokens = data.usage.prompt_tokens_details?.cached_tokens ?? 0; + const promptTokens = data.usage.prompt_tokens; + const completionTokens = data.usage.completion_tokens; + + const metrics: string[] = [`prompt=${promptTokens}`, `completion=${completionTokens}`]; + + if (cachedTokens > 0) { + metrics.push(`cached=${cachedTokens} ⚠ (UNEXPECTED CACHE HIT)`); + } else { + metrics.push('cached=0 (EXPECTED - NO CACHE)'); + } + + console.log(` ${metrics.join(', ')}`); + + return data; +} + +/** + * Main example + */ +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ Anthropic Prompt Caching - Control (No cache_control) ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + console.log('Testing WITHOUT cache_control (control scenario)'); + console.log( + `System prompt size: ${LARGE_SYSTEM_PROMPT.length} characters (~${Math.round(LARGE_SYSTEM_PROMPT.length / 4)} tokens)`, + ); + console.log(); + console.log('Expected behavior:'); + console.log(' 1st call: cached_tokens = 0 (no cache_control)'); + console.log(' 2nd call: cached_tokens = 0 (no cache_control)'); + console.log(); + + try { + const requestBody = { + model: 'anthropic/claude-3.5-sonnet', + stream_options: { + include_usage: true, // CRITICAL: Required for cached_tokens to be populated + }, + messages: [ + { + role: 'system', + content: [ + { + type: 'text', + text: LARGE_SYSTEM_PROMPT, + // NO cache_control - this is the control + }, + ], + }, + { + role: 'user', + content: 'What are the key principles you follow?', + }, + ], + }; + + // First call + const response1 = await makeRequest( + requestBody, + 'First Call (No Cache Expected)', + ); + + // Wait 1 second between calls to ensure they're processed separately + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Second identical call + const response2 = await makeRequest( + requestBody, + 'Second Call (No Cache Expected)', + ); + + // Verify cache behavior using OpenAI-compatible format + console.log('\n' + '='.repeat(80)); + console.log('ANALYSIS (CONTROL)'); + console.log('='.repeat(80)); + + const cached1 = response1.usage.prompt_tokens_details?.cached_tokens ?? 0; + const cached2 = response2.usage.prompt_tokens_details?.cached_tokens ?? 0; + + console.log(`First call: cached_tokens=${cached1} (expected: 0, no cache_control)`); + console.log(`Second call: cached_tokens=${cached2} (expected: 0, no cache_control)`); + + if (cached1 === 0 && cached2 === 0) { + console.log('✓ No cache metrics present (expected for control - no cache_control)'); + } else { + console.log('✗ Unexpected cache metrics in control scenario'); + } + + const success = cached1 === 0 && cached2 === 0; + console.log(`\nResult: ${success ? '✓ CONTROL VALID' : '✗ CONTROL INVALID'}`); + + if (success) { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✓ SUCCESS - Control scenario confirms no false positives'); + console.log('════════════════════════════════════════════════════════════════════════════'); + console.log(); + console.log('This control validates that:'); + console.log('- Cache metrics are NOT present without cache_control'); + console.log('- Our test methodology is sound'); + console.log('- Positive results in other examples are genuine cache hits'); + } else { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✗ FAILURE - Control scenario shows unexpected cache behavior'); + console.log('════════════════════════════════════════════════════════════════════════════'); + console.log(); + console.log('This invalidates our testing methodology:'); + console.log('- Cache metrics appearing without cache_control suggests false positives'); + console.log('- Need to investigate why caching occurs without explicit markers'); + } + } catch (error) { + console.error('\n❌ ERROR during testing:'); + + if (error instanceof Error) { + console.error('Error message:', error.message); + console.error('Stack trace:', error.stack); + } else { + console.error('Unknown error:', error); + } + + process.exit(1); + } +} + +// Run the example +main(); diff --git a/typescript/fetch/src/prompt-caching/user-message-cache.ts b/typescript/fetch/src/prompt-caching/user-message-cache.ts new file mode 100644 index 0000000..48a1084 --- /dev/null +++ b/typescript/fetch/src/prompt-caching/user-message-cache.ts @@ -0,0 +1,193 @@ +/** + * Example: Anthropic Prompt Caching - User Message + * + * This example demonstrates Anthropic prompt caching on a user message via OpenRouter. + * + * Scientific Method: + * - Hypothesis: cache_control at content-item level triggers Anthropic caching + * - Experiment: Make identical calls twice and measure cache hit via usage metrics + * - Evidence: usage.prompt_tokens_details.cached_tokens (OpenAI-compatible format) + * + * IMPORTANT: OpenRouter transforms Anthropic's native response format to OpenAI-compatible format: + * - Anthropic native: usage.cache_read_input_tokens, usage.cache_creation_input_tokens + * - OpenRouter returns: usage.prompt_tokens_details.cached_tokens (OpenAI-compatible) + * + * Anthropic Cache Requirements: + * - **CRITICAL**: stream_options.include_usage must be set to true (otherwise no usage details) + * - Minimum 2048+ tokens to cache reliably (we use 30k+ char system prompt from shared) + * - cache_control: {type: "ephemeral"} on content items + * - TTL: 5 minutes for ephemeral caches + * + * Pattern: User message with content-level cache_control (less common but valid) + * - User message with content array + * - cache_control on text content block + * + * To run: bun run typescript/fetch/src/user-message-cache.ts + */ + +import { LARGE_SYSTEM_PROMPT } from '@openrouter-examples/shared/constants'; +import type { ChatCompletionResponse } from '@openrouter-examples/shared/types'; + +// OpenRouter API endpoint +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; + +/** + * Make a chat completion request to OpenRouter with Anthropic caching + */ +async function makeRequest( + requestBody: unknown, + description: string, +): Promise { + console.log(`\n${description}`); + + if (!process.env.OPENROUTER_API_KEY) { + throw new Error('OPENROUTER_API_KEY environment variable is not set'); + } + + const response = await fetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'Anthropic Cache - User Message', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP error! status: ${response.status}, body: ${errorText}`); + } + + const data = (await response.json()) as ChatCompletionResponse; + + // Show cache-relevant metrics in OpenAI-compatible format + const cachedTokens = data.usage.prompt_tokens_details?.cached_tokens ?? 0; + const promptTokens = data.usage.prompt_tokens; + const completionTokens = data.usage.completion_tokens; + + const metrics: string[] = [`prompt=${promptTokens}`, `completion=${completionTokens}`]; + + if (cachedTokens > 0) { + metrics.push(`cached=${cachedTokens} ✓ (CACHE HIT)`); + } else { + metrics.push('cached=0 (CACHE MISS)'); + } + + console.log(` ${metrics.join(', ')}`); + + return data; +} + +/** + * Main example + */ +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ Anthropic Prompt Caching - User Message with cache_control ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + console.log('Testing cache_control on user message content block'); + console.log( + `Context size: ${LARGE_SYSTEM_PROMPT.length} characters (~${Math.round(LARGE_SYSTEM_PROMPT.length / 4)} tokens)`, + ); + console.log(); + console.log('Expected behavior:'); + console.log(' 1st call: cached_tokens = 0 (cache miss, creates cache)'); + console.log(' 2nd call: cached_tokens > 0 (cache hit, reads from cache)'); + console.log(); + + try { + // Use a large context document in the user message + const largeContext = `Here is a comprehensive TypeScript codebase to analyze:\n\n${LARGE_SYSTEM_PROMPT}`; + + const requestBody = { + model: 'anthropic/claude-3.5-sonnet', + stream_options: { + include_usage: true, // CRITICAL: Required for cached_tokens to be populated + }, + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: largeContext, + cache_control: { type: 'ephemeral' }, // Cache this content block + }, + { + type: 'text', + text: 'Based on this codebase, what are the main patterns used?', + }, + ], + }, + ], + }; + + // First call - should create cache + const response1 = await makeRequest( + requestBody, + 'First Call (Cache Miss Expected)', + ); + + // Wait 1 second between calls to ensure they're processed separately + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Second identical call - should hit cache + const response2 = await makeRequest( + requestBody, + 'Second Call (Cache Hit Expected)', + ); + + // Verify cache behavior using OpenAI-compatible format + console.log('\n' + '='.repeat(80)); + console.log('ANALYSIS'); + console.log('='.repeat(80)); + + const cached1 = response1.usage.prompt_tokens_details?.cached_tokens ?? 0; + const cached2 = response2.usage.prompt_tokens_details?.cached_tokens ?? 0; + + console.log(`First call: cached_tokens=${cached1} (expected: 0, cache miss creates cache)`); + console.log(`Second call: cached_tokens=${cached2} (expected: >0, cache hit reads from cache)`); + + if (cached1 === 0) { + console.log('✓ First call cache miss (created cache for future requests)'); + } else { + console.log(`⚠ First call unexpectedly had cached tokens: ${cached1}`); + } + + if (cached2 > 0) { + console.log(`✓ Second call cache hit: ${cached2} tokens read from cache`); + } else { + console.log(`✗ Second call did NOT hit cache (cached_tokens=${cached2})`); + } + + const success = cached1 === 0 && cached2 > 0; + console.log(`\nResult: ${success ? '✓ CACHE WORKING' : '✗ CACHE NOT WORKING'}`); + + if (success) { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✓ SUCCESS - User message caching is working correctly'); + console.log('════════════════════════════════════════════════════════════════════════════'); + } else { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✗ FAILURE - User message caching is not working as expected'); + console.log('════════════════════════════════════════════════════════════════════════════'); + } + } catch (error) { + console.error('\n❌ ERROR during testing:'); + + if (error instanceof Error) { + console.error('Error message:', error.message); + console.error('Stack trace:', error.stack); + } else { + console.error('Unknown error:', error); + } + + process.exit(1); + } +} + +// Run the example +main(); diff --git a/typescript/fetch/tsconfig.json b/typescript/fetch/tsconfig.json new file mode 100644 index 0000000..822a727 --- /dev/null +++ b/typescript/fetch/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "lib": ["ES2022"], + "moduleResolution": "bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "types": ["bun-types"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules"] +} diff --git a/typescript/package.json b/typescript/package.json new file mode 100644 index 0000000..8ca384c --- /dev/null +++ b/typescript/package.json @@ -0,0 +1,21 @@ +{ + "name": "openrouter-examples-typescript", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "examples": "bun --filter '*' examples", + "typecheck": "bun --filter '*' typecheck", + "stylecheck": "biome check .", + "format": "biome format --write ." + }, + "workspaces": [ + "shared", + "fetch" + ], + "devDependencies": { + "@types/bun": "latest", + "@biomejs/biome": "^1.9.4", + "typescript": "^5.7.2" + } +} diff --git a/typescript/shared/package.json b/typescript/shared/package.json new file mode 100644 index 0000000..3ddb46c --- /dev/null +++ b/typescript/shared/package.json @@ -0,0 +1,17 @@ +{ + "name": "@openrouter-examples/shared", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "typecheck": "tsc --noEmit" + }, + "exports": { + "./constants": "./src/constants.ts", + "./types": "./src/types.ts" + }, + "devDependencies": { + "@types/bun": "latest", + "typescript": "^5.7.2" + } +} diff --git a/typescript/shared/src/constants.ts b/typescript/shared/src/constants.ts new file mode 100644 index 0000000..ccc442b --- /dev/null +++ b/typescript/shared/src/constants.ts @@ -0,0 +1,144 @@ +/** + * Shared constants for OpenRouter examples + * + * This module contains large context strings and other constants + * used across multiple example ecosystems to ensure consistency + * and avoid duplication. + */ + +/** + * Large system prompt (30k+ chars) for testing Anthropic caching + * + * This exceeds Anthropic's 2048 token minimum for reliable caching. + * Used in examples to demonstrate cache creation and cache hits. + * + * In real-world usage, this might be: + * - Product documentation + * - Codebase context + * - Character cards + * - RAG (Retrieval Augmented Generation) data + * - Long-form instructions + */ +export const LARGE_SYSTEM_PROMPT = + `You are an expert TypeScript developer and software architect with deep knowledge of: + +TypeScript Language Features: +- Advanced type system including conditional types, mapped types, template literal types +- Generic constraints and variance +- Type inference and type narrowing +- Discriminated unions and exhaustive checking +- Module resolution and declaration files +- Decorator patterns and metadata reflection +- Utility types (Partial, Pick, Omit, Record, etc.) + +Effect-TS Framework: +- Effect data type for modeling success/failure/dependencies +- Layers for dependency injection +- Services and contexts +- Error handling with tagged errors +- Resource management with Scope +- Concurrency primitives (Fiber, Queue, Deferred) +- Testing with TestClock and TestContext +- Stream processing +- Schema validation with @effect/schema + +AI SDK and Provider Patterns: +- Language model abstraction layers +- Streaming vs non-streaming responses +- Tool calling and function execution +- Multi-modal input handling (text, images, files) +- Prompt caching strategies +- Provider-specific capabilities +- Error handling and retries +- Token usage tracking + +Software Engineering Best Practices: +- Scientific method in development (hypothesis, experiment, measure, analyze) +- Test-driven development with reproducible tests +- Type-safe API design +- Functional programming patterns +- Immutable data structures +- Separation of concerns +- Dependency injection +- Error handling strategies +- Performance optimization +- Documentation and code comments + +OpenRouter API: +- Multi-provider routing +- Model selection and fallbacks +- Cost optimization +- Rate limiting +- Provider-specific features +- Header passthrough for provider capabilities +- Usage metrics and analytics +- Error codes and debugging + +Anthropic Claude Models: +- Claude 3 family (Opus, Sonnet, Haiku) +- Claude 3.5 Sonnet +- Extended thinking mode +- Vision capabilities +- Tool use patterns +- Prompt caching (ephemeral and standard) +- System prompts vs user messages +- Message structure requirements +- Content blocks vs string messages +- Cache control placement + +You provide clear, concise, type-safe code examples with detailed explanations. +You prioritize correctness, maintainability, and performance. +You follow the scientific method: state hypotheses, run experiments, measure results, draw evidence-based conclusions. +You write tests that prove your code works rather than assuming it works. +You use Effect-TS patterns for error handling and dependency management when appropriate. +You understand the tradeoffs between different approaches and explain them clearly. + +When writing code you: +1. Start with type definitions to clarify the contract +2. Implement with compile-time safety +3. Add runtime validation where needed +4. Write tests that verify behavior +5. Document assumptions and edge cases +6. Consider error cases and recovery strategies +7. Optimize for readability first, performance second +8. Use descriptive names that reveal intent +9. Keep functions small and focused +10. Avoid premature abstraction + +When debugging you: +1. Reproduce the issue with a minimal test case +2. Form hypotheses about the root cause +3. Add logging/instrumentation to gather evidence +4. Test each hypothesis systematically +5. Verify the fix with regression tests +6. Document the issue and solution + +When reviewing code you check for: +- Type safety and correctness +- Error handling completeness +- Test coverage of critical paths +- Clear naming and documentation +- Performance implications +- Security considerations +- Maintainability and extensibility +- Adherence to project conventions + +Remember: Always provide evidence for your conclusions. "It should work" is not evidence. "The test passes with output X" is evidence.`.repeat( + 10, + ); // Repeat 10x to ensure ~30k chars, ~7.5k tokens + +/** + * Model identifier for Anthropic Claude 3.5 Sonnet via OpenRouter + * + * This model supports: + * - Prompt caching with cache_control breakpoints + * - Vision capabilities + * - Tool use + * - Extended context windows + */ +export const ANTHROPIC_MODEL = 'anthropic/claude-3.5-sonnet'; + +/** + * Alternative model with beta features + */ +export const ANTHROPIC_MODEL_BETA = 'anthropic/claude-3-5-sonnet:beta'; diff --git a/typescript/shared/src/types.ts b/typescript/shared/src/types.ts new file mode 100644 index 0000000..2bb0bde --- /dev/null +++ b/typescript/shared/src/types.ts @@ -0,0 +1,74 @@ +/** + * Shared TypeScript types for OpenRouter examples + */ + +/** + * Cache control configuration for Anthropic caching + */ +export interface CacheControl { + type: 'ephemeral'; +} + +/** + * Text content block with optional cache control + */ +export interface TextContent { + type: 'text'; + text: string; + cache_control?: CacheControl; +} + +/** + * Message roles in chat completions + */ +export type MessageRole = 'system' | 'user' | 'assistant'; + +/** + * Chat message with content + */ +export interface Message { + role: MessageRole; + content: string | TextContent[]; +} + +/** + * OpenAI-compatible usage metrics + * (OpenRouter transforms Anthropic's native format to this) + */ +export interface Usage { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + prompt_tokens_details?: { + cached_tokens?: number; // Also called cached_input_tokens + cache_creation_input_tokens?: number; + audio_tokens?: number; + }; + completion_tokens_details?: { + reasoning_tokens?: number; + }; +} + +/** + * Chat completion response (OpenAI-compatible format) + */ +export interface ChatCompletionResponse { + id: string; + model: string; + choices: Array<{ + index: number; + message: { + role: string; + content: string; + }; + finish_reason: string; + }>; + usage: Usage; +} + +/** + * Stream options for usage tracking + */ +export interface StreamOptions { + include_usage: boolean; +} diff --git a/typescript/shared/tsconfig.json b/typescript/shared/tsconfig.json new file mode 100644 index 0000000..822a727 --- /dev/null +++ b/typescript/shared/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "lib": ["ES2022"], + "moduleResolution": "bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "types": ["bun-types"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules"] +} From e6c95f5a976ba6039e339061a0a12990aaf6ee7f Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Tue, 11 Nov 2025 18:15:41 -0500 Subject: [PATCH 2/7] Run biome format on prompt-caching examples --- .../fetch/src/prompt-caching/multi-message-cache.ts | 10 ++-------- .../fetch/src/prompt-caching/no-cache-control.ts | 10 ++-------- .../fetch/src/prompt-caching/user-message-cache.ts | 10 ++-------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/typescript/fetch/src/prompt-caching/multi-message-cache.ts b/typescript/fetch/src/prompt-caching/multi-message-cache.ts index e59e6a8..160072c 100644 --- a/typescript/fetch/src/prompt-caching/multi-message-cache.ts +++ b/typescript/fetch/src/prompt-caching/multi-message-cache.ts @@ -132,19 +132,13 @@ async function main() { }; // First call - should create cache - const response1 = await makeRequest( - requestBody, - 'First Call (Cache Miss Expected)', - ); + const response1 = await makeRequest(requestBody, 'First Call (Cache Miss Expected)'); // Wait 1 second between calls to ensure they're processed separately await new Promise((resolve) => setTimeout(resolve, 1000)); // Second identical call - should hit cache - const response2 = await makeRequest( - requestBody, - 'Second Call (Cache Hit Expected)', - ); + const response2 = await makeRequest(requestBody, 'Second Call (Cache Hit Expected)'); // Verify cache behavior using OpenAI-compatible format console.log('\n' + '='.repeat(80)); diff --git a/typescript/fetch/src/prompt-caching/no-cache-control.ts b/typescript/fetch/src/prompt-caching/no-cache-control.ts index 25ac0a7..e6ff03f 100644 --- a/typescript/fetch/src/prompt-caching/no-cache-control.ts +++ b/typescript/fetch/src/prompt-caching/no-cache-control.ts @@ -118,19 +118,13 @@ async function main() { }; // First call - const response1 = await makeRequest( - requestBody, - 'First Call (No Cache Expected)', - ); + const response1 = await makeRequest(requestBody, 'First Call (No Cache Expected)'); // Wait 1 second between calls to ensure they're processed separately await new Promise((resolve) => setTimeout(resolve, 1000)); // Second identical call - const response2 = await makeRequest( - requestBody, - 'Second Call (No Cache Expected)', - ); + const response2 = await makeRequest(requestBody, 'Second Call (No Cache Expected)'); // Verify cache behavior using OpenAI-compatible format console.log('\n' + '='.repeat(80)); diff --git a/typescript/fetch/src/prompt-caching/user-message-cache.ts b/typescript/fetch/src/prompt-caching/user-message-cache.ts index 48a1084..35d98be 100644 --- a/typescript/fetch/src/prompt-caching/user-message-cache.ts +++ b/typescript/fetch/src/prompt-caching/user-message-cache.ts @@ -126,19 +126,13 @@ async function main() { }; // First call - should create cache - const response1 = await makeRequest( - requestBody, - 'First Call (Cache Miss Expected)', - ); + const response1 = await makeRequest(requestBody, 'First Call (Cache Miss Expected)'); // Wait 1 second between calls to ensure they're processed separately await new Promise((resolve) => setTimeout(resolve, 1000)); // Second identical call - should hit cache - const response2 = await makeRequest( - requestBody, - 'Second Call (Cache Hit Expected)', - ); + const response2 = await makeRequest(requestBody, 'Second Call (Cache Hit Expected)'); // Verify cache behavior using OpenAI-compatible format console.log('\n' + '='.repeat(80)); From e25d9d318072bd0c573d58755acfe25bbdb7a70f Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Tue, 11 Nov 2025 18:18:36 -0500 Subject: [PATCH 3/7] Simplify examples section to link only to folder --- docs/prompt-caching.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/prompt-caching.md b/docs/prompt-caching.md index e1871f8..02098b6 100644 --- a/docs/prompt-caching.md +++ b/docs/prompt-caching.md @@ -188,6 +188,3 @@ The `cache_discount` field shows how much you saved. Some providers (like Anthro See ecosystem-specific examples: - **TypeScript + fetch**: [typescript/fetch/src/prompt-caching/](../typescript/fetch/src/prompt-caching/) - - [user-message-cache.ts](../typescript/fetch/src/prompt-caching/user-message-cache.ts) - - [multi-message-cache.ts](../typescript/fetch/src/prompt-caching/multi-message-cache.ts) - - [no-cache-control.ts](../typescript/fetch/src/prompt-caching/no-cache-control.ts) (control) From dfa3461574d2b2726dee3749f89868766ad908d1 Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Tue, 11 Nov 2025 18:39:59 -0500 Subject: [PATCH 4/7] Simplify fetch prompt-caching README to link to main docs --- typescript/fetch/src/prompt-caching/README.md | 101 +++--------------- 1 file changed, 15 insertions(+), 86 deletions(-) diff --git a/typescript/fetch/src/prompt-caching/README.md b/typescript/fetch/src/prompt-caching/README.md index 7de5a81..ab049ed 100644 --- a/typescript/fetch/src/prompt-caching/README.md +++ b/typescript/fetch/src/prompt-caching/README.md @@ -1,98 +1,27 @@ -# Anthropic Prompt Caching Examples +# Prompt Caching Examples (TypeScript + fetch) -This directory contains examples demonstrating Anthropic's prompt caching feature via OpenRouter using the raw fetch API. +Examples demonstrating prompt caching with the fetch API. -## What is Prompt Caching? +## Documentation -Anthropic's prompt caching allows you to cache large portions of your prompts (like system messages or context documents) to: -- **Reduce costs** - Cached tokens cost significantly less than regular tokens -- **Improve latency** - Cached content is processed faster on subsequent requests -- **Enable larger contexts** - Use more context without proportional cost increases +For full prompt caching documentation including all providers, pricing, and configuration details, see: +- **[Prompt Caching Guide](../../../../docs/prompt-caching.md)** -Cache TTL: 5 minutes for ephemeral caches +## Examples in This Directory -## Examples +- `user-message-cache.ts` - Cache large context in user messages +- `multi-message-cache.ts` - Cache system prompt across multi-turn conversations +- `no-cache-control.ts` - Control scenario (validates methodology) -### 1. System Message Cache (`system-message-cache.ts`) -The most common pattern - cache a large system prompt: -```bash -bun run typescript/fetch/src/prompt-caching/system-message-cache.ts -``` - -**Pattern**: System message with content-level `cache_control` +## Quick Start -### 2. User Message Cache (`user-message-cache.ts`) -Cache large context in user messages (e.g., uploading documents): ```bash +# Run an example bun run typescript/fetch/src/prompt-caching/user-message-cache.ts ``` -**Pattern**: User message with content-level `cache_control` on context block - -### 3. Multi-Message Cache (`multi-message-cache.ts`) -Cache system prompt across multi-turn conversations: -```bash -bun run typescript/fetch/src/prompt-caching/multi-message-cache.ts -``` - -**Pattern**: System message cache persists through conversation history - -### 4. No Cache Control (`no-cache-control.ts`) -Control scenario - no caching should occur: -```bash -bun run typescript/fetch/src/prompt-caching/no-cache-control.ts -``` - -**Pattern**: Same structure but NO `cache_control` markers (validates methodology) - -## How to Use Cache Control - -```typescript -const requestBody = { - model: 'anthropic/claude-3.5-sonnet', - stream_options: { - include_usage: true, // CRITICAL: Required for cache metrics - }, - messages: [ - { - role: 'system', - content: [ - { - type: 'text', - text: 'Your large system prompt here...', - cache_control: { type: 'ephemeral' }, // Cache this block - }, - ], - }, - { - role: 'user', - content: 'Your question here', - }, - ], -}; -``` - -## Important Notes - -### OpenRouter Format Transformation -OpenRouter transforms Anthropic's native response format to OpenAI-compatible format: -- **Anthropic native**: `usage.cache_read_input_tokens`, `usage.cache_creation_input_tokens` -- **OpenRouter returns**: `usage.prompt_tokens_details.cached_tokens` (OpenAI-compatible) - -### Requirements for Caching -1. **stream_options.include_usage = true** - CRITICAL, otherwise no usage details -2. **Minimum 2048+ tokens** - Smaller content may not be cached reliably -3. **cache_control on content blocks** - Not on message level -4. **Exact match** - Cache only hits on identical content - -### Expected Behavior -- **First call**: `cached_tokens = 0` (cache miss, creates cache) -- **Second call**: `cached_tokens > 0` (cache hit, reads from cache) -- **Control**: `cached_tokens = 0` on both calls (no cache_control) +## Key Requirements (Anthropic) -## Scientific Method -All examples follow scientific method principles: -- **Hypothesis**: cache_control triggers Anthropic caching -- **Experiment**: Make identical calls twice -- **Evidence**: Measure via `usage.prompt_tokens_details.cached_tokens` -- **Analysis**: Compare first call (miss) vs second call (hit) +- `stream_options.include_usage = true` - Required for cache metrics +- Minimum 2048+ tokens to cache reliably +- `cache_control: {type: "ephemeral"}` on content blocks (not message-level) From f85e1ccbeb50b6b40502ac42e6dee343aeacac25 Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Tue, 11 Nov 2025 18:47:53 -0500 Subject: [PATCH 5/7] Rename prompt caching examples with anthropic prefix --- .../{multi-message-cache.ts => anthropic-multi-message-cache.ts} | 0 .../{no-cache-control.ts => anthropic-no-cache-control.ts} | 0 .../{user-message-cache.ts => anthropic-user-message-cache.ts} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename typescript/fetch/src/prompt-caching/{multi-message-cache.ts => anthropic-multi-message-cache.ts} (100%) rename typescript/fetch/src/prompt-caching/{no-cache-control.ts => anthropic-no-cache-control.ts} (100%) rename typescript/fetch/src/prompt-caching/{user-message-cache.ts => anthropic-user-message-cache.ts} (100%) diff --git a/typescript/fetch/src/prompt-caching/multi-message-cache.ts b/typescript/fetch/src/prompt-caching/anthropic-multi-message-cache.ts similarity index 100% rename from typescript/fetch/src/prompt-caching/multi-message-cache.ts rename to typescript/fetch/src/prompt-caching/anthropic-multi-message-cache.ts diff --git a/typescript/fetch/src/prompt-caching/no-cache-control.ts b/typescript/fetch/src/prompt-caching/anthropic-no-cache-control.ts similarity index 100% rename from typescript/fetch/src/prompt-caching/no-cache-control.ts rename to typescript/fetch/src/prompt-caching/anthropic-no-cache-control.ts diff --git a/typescript/fetch/src/prompt-caching/user-message-cache.ts b/typescript/fetch/src/prompt-caching/anthropic-user-message-cache.ts similarity index 100% rename from typescript/fetch/src/prompt-caching/user-message-cache.ts rename to typescript/fetch/src/prompt-caching/anthropic-user-message-cache.ts From 89cb44c5270702eade6e2b2ab6e38f4a6660a7f1 Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Wed, 12 Nov 2025 17:21:56 -0500 Subject: [PATCH 6/7] Use glob pattern runner instead of listing filenames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded filename list with run-examples.ts that auto-discovers all .ts files in src/ Benefits: - Add new example → automatically included in 'bun examples' - Rename example → no package.json update needed - Impossible for package.json to reference non-existent files Also fixes stale filenames (user-message-cache.ts → anthropic-user-message-cache.ts) --- typescript/fetch/package.json | 2 +- typescript/fetch/run-examples.ts | 57 ++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100755 typescript/fetch/run-examples.ts diff --git a/typescript/fetch/package.json b/typescript/fetch/package.json index b1ac767..812f80e 100644 --- a/typescript/fetch/package.json +++ b/typescript/fetch/package.json @@ -4,7 +4,7 @@ "private": true, "type": "module", "scripts": { - "examples": "bun run src/prompt-caching/user-message-cache.ts && bun run src/prompt-caching/multi-message-cache.ts && bun run src/prompt-caching/no-cache-control.ts", + "examples": "bun run run-examples.ts", "typecheck": "tsc --noEmit" }, "dependencies": { diff --git a/typescript/fetch/run-examples.ts b/typescript/fetch/run-examples.ts new file mode 100755 index 0000000..67429a3 --- /dev/null +++ b/typescript/fetch/run-examples.ts @@ -0,0 +1,57 @@ +#!/usr/bin/env bun +/** + * Run all example files in the src/ directory + * Each example is run in a separate process to handle process.exit() calls + */ + +import { readdirSync, statSync } from 'fs'; +import { join } from 'path'; +import { $ } from 'bun'; + +const srcDir = join(import.meta.dir, 'src'); + +// Recursively find all .ts files in src/ +function findExamples(dir: string): string[] { + const entries = readdirSync(dir); + const files: string[] = []; + + for (const entry of entries) { + const fullPath = join(dir, entry); + const stat = statSync(fullPath); + + if (stat.isDirectory()) { + files.push(...findExamples(fullPath)); + } else if (entry.endsWith('.ts')) { + files.push(fullPath); + } + } + + return files.sort(); +} + +const examples = findExamples(srcDir); +console.log(`Found ${examples.length} example(s)\n`); + +let failed = 0; +for (const example of examples) { + const relativePath = example.replace(import.meta.dir + '/', ''); + console.log(`\n${'='.repeat(80)}`); + console.log(`Running: ${relativePath}`); + console.log('='.repeat(80)); + + try { + await $`bun run ${example}`.quiet(); + console.log(`✅ ${relativePath} completed successfully`); + } catch (error) { + console.error(`❌ ${relativePath} failed`); + failed++; + } +} + +console.log(`\n${'='.repeat(80)}`); +console.log(`Results: ${examples.length - failed}/${examples.length} passed`); +console.log('='.repeat(80)); + +if (failed > 0) { + process.exit(1); +} From 8be9df3804076bdfe1cc117e55fe79182170e022 Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Wed, 12 Nov 2025 18:01:03 -0500 Subject: [PATCH 7/7] Remove filename references from docs to prevent sync issues --- README.md | 6 +++--- typescript/fetch/README.md | 2 +- typescript/fetch/src/prompt-caching/README.md | 11 +---------- .../prompt-caching/anthropic-multi-message-cache.ts | 2 -- .../src/prompt-caching/anthropic-no-cache-control.ts | 2 -- .../prompt-caching/anthropic-user-message-cache.ts | 2 -- 6 files changed, 5 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 8c34391..789f941 100644 --- a/README.md +++ b/README.md @@ -37,9 +37,9 @@ make typescript # Run TypeScript monorepo examples - **Documentation**: [docs/prompt-caching.md](docs/prompt-caching.md) - **Examples**: - [curl/prompt-caching.sh](curl/prompt-caching.sh) - - [typescript/fetch/src/prompt-caching.ts](typescript/fetch/src/prompt-caching.ts) - - [typescript/ai-sdk-v5/src/prompt-caching.ts](typescript/ai-sdk-v5/src/prompt-caching.ts) - - [typescript/effect-ai/src/prompt-caching.ts](typescript/effect-ai/src/prompt-caching.ts) + - [typescript/fetch/src/prompt-caching/](typescript/fetch/src/prompt-caching/) + - [typescript/ai-sdk-v5/src/prompt-caching/](typescript/ai-sdk-v5/src/prompt-caching/) + - [typescript/effect-ai/src/prompt-caching/](typescript/effect-ai/src/prompt-caching/) ## Prerequisites diff --git a/typescript/fetch/README.md b/typescript/fetch/README.md index 0278535..dcb6050 100644 --- a/typescript/fetch/README.md +++ b/typescript/fetch/README.md @@ -20,7 +20,7 @@ bun examples ## Features -- [prompt-caching.ts](./src/prompt-caching.ts) - Anthropic caching with TypeScript types +- [prompt-caching](./src/prompt-caching/) - Anthropic caching examples ## Dependencies diff --git a/typescript/fetch/src/prompt-caching/README.md b/typescript/fetch/src/prompt-caching/README.md index ab049ed..52d19e5 100644 --- a/typescript/fetch/src/prompt-caching/README.md +++ b/typescript/fetch/src/prompt-caching/README.md @@ -9,16 +9,7 @@ For full prompt caching documentation including all providers, pricing, and conf ## Examples in This Directory -- `user-message-cache.ts` - Cache large context in user messages -- `multi-message-cache.ts` - Cache system prompt across multi-turn conversations -- `no-cache-control.ts` - Control scenario (validates methodology) - -## Quick Start - -```bash -# Run an example -bun run typescript/fetch/src/prompt-caching/user-message-cache.ts -``` +See the TypeScript files in this directory for specific examples. ## Key Requirements (Anthropic) diff --git a/typescript/fetch/src/prompt-caching/anthropic-multi-message-cache.ts b/typescript/fetch/src/prompt-caching/anthropic-multi-message-cache.ts index 160072c..9f422f3 100644 --- a/typescript/fetch/src/prompt-caching/anthropic-multi-message-cache.ts +++ b/typescript/fetch/src/prompt-caching/anthropic-multi-message-cache.ts @@ -22,8 +22,6 @@ * - System message with cache * - Multiple user/assistant exchanges * - Cache should persist across the conversation - * - * To run: bun run typescript/fetch/src/multi-message-cache.ts */ import { LARGE_SYSTEM_PROMPT } from '@openrouter-examples/shared/constants'; diff --git a/typescript/fetch/src/prompt-caching/anthropic-no-cache-control.ts b/typescript/fetch/src/prompt-caching/anthropic-no-cache-control.ts index e6ff03f..9e9810a 100644 --- a/typescript/fetch/src/prompt-caching/anthropic-no-cache-control.ts +++ b/typescript/fetch/src/prompt-caching/anthropic-no-cache-control.ts @@ -16,8 +16,6 @@ * - Same large system prompt * - NO cache_control markers * - Should NOT see cache metrics - * - * To run: bun run typescript/fetch/src/no-cache-control.ts */ import { LARGE_SYSTEM_PROMPT } from '@openrouter-examples/shared/constants'; diff --git a/typescript/fetch/src/prompt-caching/anthropic-user-message-cache.ts b/typescript/fetch/src/prompt-caching/anthropic-user-message-cache.ts index 35d98be..89cde1f 100644 --- a/typescript/fetch/src/prompt-caching/anthropic-user-message-cache.ts +++ b/typescript/fetch/src/prompt-caching/anthropic-user-message-cache.ts @@ -21,8 +21,6 @@ * Pattern: User message with content-level cache_control (less common but valid) * - User message with content array * - cache_control on text content block - * - * To run: bun run typescript/fetch/src/user-message-cache.ts */ import { LARGE_SYSTEM_PROMPT } from '@openrouter-examples/shared/constants';