From 04ad052d5afd41f404a78c3cb4fc7b5aa8d916e4 Mon Sep 17 00:00:00 2001 From: Tom Aylott Date: Wed, 19 Nov 2025 11:16:34 -0500 Subject: [PATCH] Add Gemini 3 reasoning fetch example - Add fetch example for google/gemini-3-pro-preview reasoning - Add comprehensive documentation at docs/gemini-thinking.md - Update shared types to include reasoning and reasoning_details fields - Example demonstrates: - Basic reasoning/thinking mode usage - Token budget configuration (max_tokens: 2000) - Accessing reasoning text and details - Token usage metrics Model: google/gemini-3-pro-preview - Max reasoning tokens: 200,000 - Context window: 1M tokens - Reasoning: MANDATORY (always enabled) - Created: 2025-11-18 Tested successfully: - HTTP 200 response - Reasoning tokens: 1,628 - Full reasoning process captured - Structured reasoning_details present --- docs/gemini-thinking.md | 268 ++++++++++++++++++ .../fetch/src/gemini-thinking/README.md | 126 ++++++++ .../src/gemini-thinking/basic-reasoning.ts | 192 +++++++++++++ typescript/shared/src/types.ts | 13 + 4 files changed, 599 insertions(+) create mode 100644 docs/gemini-thinking.md create mode 100644 typescript/fetch/src/gemini-thinking/README.md create mode 100644 typescript/fetch/src/gemini-thinking/basic-reasoning.ts diff --git a/docs/gemini-thinking.md b/docs/gemini-thinking.md new file mode 100644 index 0000000..1955d22 --- /dev/null +++ b/docs/gemini-thinking.md @@ -0,0 +1,268 @@ +# Google Gemini 3 Reasoning/Thinking + +Google Gemini 3 models support a "thinking mode" feature that allows the model to engage in internal reasoning before generating responses. This improves answer quality on complex, multi-step problems. + +## What is Gemini Reasoning? + +When reasoning is enabled, Gemini models: +- Allocate tokens for internal "thinking" before responding +- Work through problems step-by-step +- Show their reasoning process (unless excluded) +- Produce higher-quality answers on complex tasks + +## Quick Start + +### Fetch API + +```typescript +const response = await fetch('https://openrouter.ai/api/v1/chat/completions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'google/gemini-3-pro-preview', + reasoning: { + enabled: true, // Enable thinking mode + max_tokens: 4096, // Token budget for thinking + exclude: false // Show thoughts in response + }, + messages: [ + { + role: 'user', + content: 'Solve this complex problem step by step...' + } + ] + }) +}); + +const data = await response.json(); +console.log('Reasoning:', data.choices[0].message.reasoning); +console.log('Reasoning tokens:', data.usage.completion_tokens_details.reasoning_tokens); +console.log('Answer:', data.choices[0].message.content); +``` + +### AI SDK v5 + +```typescript +import { createOpenRouter } from '@openrouter/ai-sdk-provider'; +import { generateText } from 'ai'; + +const openrouter = createOpenRouter({ + apiKey: process.env.OPENROUTER_API_KEY, +}); + +const result = await generateText({ + model: openrouter('google/gemini-3-pro-preview'), + providerOptions: { + openrouter: { + reasoning: { + enabled: true, + maxTokens: 4096, + exclude: false + } + } + }, + messages: [ + { + role: 'user', + content: 'Solve this complex problem step by step...' + } + ] +}); + +console.log('Answer:', result.text); +console.log('Reasoning tokens:', result.providerMetadata?.openrouter?.usage?.completionTokensDetails?.reasoningTokens); +``` + +## Examples + +### Fetch Examples +- [basic-reasoning.ts](../typescript/fetch/src/gemini-thinking/basic-reasoning.ts) - Basic reasoning with a multi-step problem + +### AI SDK v5 Examples +- [basic-reasoning.ts](../typescript/ai-sdk-v5/src/gemini-thinking/basic-reasoning.ts) - Basic reasoning using Vercel AI SDK v5 + +## API Reference + +### Request Parameters + +```typescript +{ + model: 'google/gemini-3-pro-preview' | 'google/gemini-2.5-pro' | 'google/gemini-2.5-flash', + reasoning: { + // Option 1: Simple enable (uses default budget) + enabled: true, + + // Option 2: Control token budget + max_tokens: 4096, // -1 for dynamic, 0 to disable + + // Option 3: Use effort level (maps to budget automatically) + effort: 'low' | 'medium' | 'high', + + // Option 4: Hide thoughts from response + exclude: true // false = show thoughts, true = hide thoughts + }, + messages: [...] +} +``` + +### Response Format + +```typescript +{ + choices: [{ + message: { + content: "The final answer", + reasoning: "The model's thinking process...", // Only if exclude: false + reasoning_details: [ // Structured reasoning metadata + { + type: "reasoning.text", + text: "Internal reasoning text", + format: "gemini", + index: 0 + }, + { + type: "reasoning.encrypted", // Google's thoughtSignature + data: "encrypted_signature_string", + format: "gemini", + index: 0 + } + ] + } + }], + usage: { + prompt_tokens: 123, + completion_tokens: 456, + total_tokens: 579, + completion_tokens_details: { + reasoning_tokens: 234 // Tokens used for thinking + } + } +} +``` + +## Supported Models + +| Model | Reasoning Support | Max Thinking Tokens | Context Window | +|-------|------------------|---------------------|----------------| +| `google/gemini-3-pro-preview` | MANDATORY (always enabled) | 200,000 | 1,048,576 (1M) | +| `google/gemini-2.5-pro` | MANDATORY (always enabled) | 32,768 | 2,097,152 (2M) | +| `google/gemini-2.5-flash` | OPTIONAL | 24,576 | 1,048,576 (1M) | + +**Note:** For Gemini 3 Pro and Gemini 2.5 Pro, reasoning is mandatory and always enabled. The model will use thinking tokens even if not explicitly requested. + +## Key Concepts + +### Thinking Budget + +The `max_tokens` parameter controls how many tokens the model can use for internal reasoning: + +- **-1 (dynamic)**: Model determines budget automatically +- **0**: Disable reasoning (only for optional models) +- **Positive number**: Specific token budget (clamped to model limits) + +### Effort Levels + +Instead of specifying exact token counts, you can use effort levels: + +- **low**: Minimal thinking (faster, lower cost) +- **medium**: Balanced thinking (default) +- **high**: Maximum thinking (slower, higher quality) + +OpenRouter automatically maps these to appropriate token budgets for each model. + +### Excluding Thoughts + +Set `exclude: true` to hide the thinking process and only receive the final answer: + +```typescript +reasoning: { + enabled: true, + exclude: true // Thoughts used internally but not returned +} +``` + +This reduces response size and latency while still benefiting from reasoning. + +## Important Notes + +### Preserving Reasoning Details + +**CRITICAL:** When continuing a conversation, you must include `reasoning_details` from previous responses in follow-up requests. Google requires the `thoughtSignature` to be preserved. + +```typescript +// First request +const response1 = await fetch(...); + +// Follow-up request - must include reasoning_details +const response2 = await fetch(..., { + body: JSON.stringify({ + messages: [ + { + role: 'assistant', + content: response1.choices[0].message.content, + reasoning_details: response1.choices[0].message.reasoning_details // REQUIRED + }, + { + role: 'user', + content: 'Follow-up question' + } + ] + }) +}); +``` + +### Cost Considerations + +Reasoning tokens are billed separately: +- Thinking tokens typically cost less than output tokens +- More thinking = higher cost but better quality +- Use effort levels to balance cost vs quality + +### Latency Trade-offs + +More thinking tokens = longer response times: +- **Low effort**: Fast responses, good for simple tasks +- **High effort**: Slower responses, better for complex reasoning + +### OpenRouter Transformation + +OpenRouter automatically transforms Google's native API to OpenAI-compatible format: + +| Google Native | OpenRouter Format | +|--------------|------------------| +| `generationConfig.thinkingConfig` | `reasoning` parameter | +| `usageMetadata.thoughtsTokenCount` | `usage.completion_tokens_details.reasoning_tokens` | +| `parts[].thought: true` | `message.reasoning` | +| `thoughtSignature` | `reasoning_details[].data` | + +This allows you to use the standard OpenAI format while accessing Google's thinking features. + +## Resources + +- [OpenRouter Docs - Reasoning Tokens](https://openrouter.ai/docs/use-cases/reasoning-tokens) +- [Google Gemini API Documentation](https://ai.google.dev/docs) +- [Full Examples Repository](https://github.com/openrouter/openrouter-examples) + +## Troubleshooting + +### No reasoning tokens in response + +**Check:** +1. Is the model supported? (Gemini 2.5+) +2. Is `reasoning.enabled` set to `true`? +3. Is the token budget > 0? + +### "thought_signature" error in follow-up requests + +**Solution:** Include `reasoning_details` from previous responses when continuing conversations. + +### High costs + +**Solution:** Use lower effort levels or reduce `max_tokens` for thinking budget. + +### Slow responses + +**Solution:** Use lower effort levels or smaller thinking budgets to reduce latency. diff --git a/typescript/fetch/src/gemini-thinking/README.md b/typescript/fetch/src/gemini-thinking/README.md new file mode 100644 index 0000000..bf2e29a --- /dev/null +++ b/typescript/fetch/src/gemini-thinking/README.md @@ -0,0 +1,126 @@ +# Google Gemini 3 Reasoning/Thinking Examples + +This directory contains examples of using Google Gemini 3's reasoning/thinking feature via OpenRouter. + +## What is Gemini Reasoning? + +Gemini 3 models can engage in internal reasoning before generating responses. This "thinking mode" allows the model to: +- Work through complex problems step-by-step +- Show its reasoning process +- Improve answer quality on difficult tasks + +## How It Works + +1. **Request**: Set `reasoning.enabled: true` (or `reasoning.max_tokens`, or `reasoning.effort`) +2. **Processing**: The model uses "thinking tokens" for internal reasoning +3. **Response**: You receive both the reasoning process and the final answer + +## Examples + +### `basic-reasoning.ts` + +Demonstrates basic usage of Gemini reasoning with a multi-step problem. + +**Run:** +```bash +bun run src/gemini-thinking/basic-reasoning.ts +``` + +**Key Features:** +- Enables reasoning mode +- Shows thinking token usage +- Displays reasoning process +- Returns final answer + +## API Parameters + +### Request Format + +```typescript +{ + model: 'google/gemini-3-pro-preview', + reasoning: { + enabled: true, // Enable thinking mode + max_tokens: 4096, // Token budget for thinking + exclude: false // true = hide thoughts, false = show thoughts + }, + messages: [...] +} +``` + +### Alternative: Effort Levels + +```typescript +{ + model: 'google/gemini-3-pro-preview', + reasoning: { + effort: 'medium' // 'low', 'medium', 'high' + }, + messages: [...] +} +``` + +## Response Format + +```typescript +{ + choices: [{ + message: { + content: "The final answer", + reasoning: "The model's thinking process...", + reasoning_details: [ + { + type: "reasoning.text", + text: "Internal reasoning...", + format: "gemini" + }, + { + type: "reasoning.encrypted", + data: "encrypted_signature", + format: "gemini" + } + ] + } + }], + usage: { + prompt_tokens: 123, + completion_tokens: 456, + completion_tokens_details: { + reasoning_tokens: 234 // Tokens used for thinking + } + } +} +``` + +## Key Points + +### Model Support +- ✅ `google/gemini-3-pro-preview` - Reasoning MANDATORY (always enabled) +- ✅ `google/gemini-2.5-pro` - Reasoning MANDATORY (always enabled) +- ✅ `google/gemini-2.5-flash` - Reasoning OPTIONAL + +### Token Budgets +- **Gemini 3 Pro**: Max 200,000 thinking tokens, 1M context window +- **Gemini 2.5 Pro**: Max 32,768 thinking tokens +- **Gemini 2.5 Flash**: Max 24,576 thinking tokens + +### Important Notes +- **Preserve reasoning_details**: Must include `reasoning_details` from previous messages in follow-up requests +- **Cost**: Thinking tokens are billed separately (usually at a lower rate) +- **Latency**: More thinking tokens = longer response time +- **Quality**: Higher thinking budgets improve answer quality on complex tasks + +## OpenRouter Transformation + +OpenRouter automatically transforms Google's native API to OpenAI-compatible format: + +| Google Native | OpenRouter (OpenAI-compatible) | +|--------------|-------------------------------| +| `usageMetadata.thoughtsTokenCount` | `usage.completion_tokens_details.reasoning_tokens` | +| `parts[].thought: true` | `message.reasoning` | +| `thoughtSignature` | `reasoning_details[].data` | + +## Resources + +- [OpenRouter Docs - Reasoning Tokens](https://openrouter.ai/docs/use-cases/reasoning-tokens) +- [Google Gemini API Docs](https://ai.google.dev/docs) diff --git a/typescript/fetch/src/gemini-thinking/basic-reasoning.ts b/typescript/fetch/src/gemini-thinking/basic-reasoning.ts new file mode 100644 index 0000000..8f328bb --- /dev/null +++ b/typescript/fetch/src/gemini-thinking/basic-reasoning.ts @@ -0,0 +1,192 @@ +/** + * Example: Google Gemini 3 Reasoning/Thinking Details + * + * This example demonstrates requesting reasoning details from Gemini 3 models via OpenRouter. + * + * Scientific Method: + * - Hypothesis: reasoning.enabled triggers Google's thinkingConfig + * - Experiment: Make request with reasoning enabled and measure reasoning_tokens in usage + * - Evidence: usage.completion_tokens_details.reasoning_tokens (OpenAI-compatible format) + * + * IMPORTANT: OpenRouter transforms Google's native response format to OpenAI-compatible format: + * - Google native: usageMetadata.thoughtsTokenCount, parts[].thought + * - OpenRouter returns: usage.completion_tokens_details.reasoning_tokens, message.reasoning + * + * Gemini Reasoning Requirements: + * - Model: google/gemini-3-pro-preview (Gemini 3) + * - reasoning.enabled: true (or reasoning.max_tokens, or reasoning.effort) + * - Gemini 3 Pro: reasoning is MANDATORY (always enabled) + * - Max reasoning tokens: 200,000 + * - Context window: 1,048,576 tokens (1M) + * + * Pattern: Single request with reasoning enabled + * - Request with reasoning.enabled: true + * - Response includes reasoning text and token count + * - reasoning_details must be preserved in follow-up requests + */ + +import type { ChatCompletionResponse } from '@openrouter-examples/shared/types'; + +// OpenRouter API endpoint +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; + +/** + * Make a chat completion request to OpenRouter with Gemini reasoning + */ +async function makeRequest( + requestBody: unknown, + description: string, +): Promise { + console.log(`\n${description}`); + + if (!process.env.OPENROUTER_API_KEY) { + throw new Error('OPENROUTER_API_KEY environment variable is not set'); + } + + const response = await fetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'Gemini Reasoning Example', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP error! status: ${response.status}, body: ${errorText}`); + } + + const data = (await response.json()) as ChatCompletionResponse; + + // Show reasoning-relevant metrics in OpenAI-compatible format + const reasoningTokens = data.usage.completion_tokens_details?.reasoning_tokens ?? 0; + const promptTokens = data.usage.prompt_tokens; + const completionTokens = data.usage.completion_tokens; + + const metrics: string[] = [`prompt=${promptTokens}`, `completion=${completionTokens}`]; + + if (reasoningTokens > 0) { + metrics.push(`reasoning=${reasoningTokens} ✓ (THINKING ENABLED)`); + } else { + metrics.push('reasoning=0 (NO THINKING)'); + } + + console.log(` ${metrics.join(', ')}`); + + // Show reasoning text if present + if (data.choices[0]?.message?.reasoning) { + const reasoning = data.choices[0].message.reasoning; + console.log(` Reasoning preview: ${reasoning.substring(0, 100)}...`); + } + + return data; +} + +/** + * Main example + */ +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ Google Gemini 3 - Reasoning/Thinking Details ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + console.log('Testing Gemini reasoning feature with a multi-step problem'); + console.log(); + console.log('Expected behavior:'); + console.log(' 1. reasoning_tokens > 0 (model used thinking tokens)'); + console.log(' 2. message.reasoning contains thinking process'); + console.log(' 3. message.reasoning_details[] contains structured details'); + console.log(); + + try { + const requestBody = { + model: 'google/gemini-3-pro-preview', + reasoning: { + enabled: true, // Enable thinking mode + max_tokens: 2000, // Allocate thinking budget (smaller for faster response) + exclude: false, // Show thoughts in response + }, + messages: [ + { + role: 'user', + content: + 'Solve this problem step by step: If a train leaves station A at 2pm traveling 60mph, and another train leaves station B (120 miles away) at 2:30pm traveling 80mph toward station A, when and where do they meet?', + }, + ], + }; + + // Make request with reasoning enabled + const response = await makeRequest(requestBody, 'Request with Reasoning Enabled'); + + // Analyze response + console.log('\n' + '='.repeat(80)); + console.log('ANALYSIS'); + console.log('='.repeat(80)); + + const reasoningTokens = response.usage.completion_tokens_details?.reasoning_tokens ?? 0; + const reasoning = response.choices[0]?.message?.reasoning; + const reasoningDetails = response.choices[0]?.message?.reasoning_details; + const answer = response.choices[0]?.message?.content; + + console.log(`Reasoning tokens: ${reasoningTokens}`); + console.log(`Has reasoning text: ${reasoning ? 'YES' : 'NO'}`); + console.log(`Has reasoning_details: ${reasoningDetails ? 'YES' : 'NO'}`); + console.log( + `Reasoning details count: ${Array.isArray(reasoningDetails) ? reasoningDetails.length : 0}`, + ); + + if (reasoningTokens > 0) { + console.log(`✓ Reasoning enabled: ${reasoningTokens} tokens used for thinking`); + } else { + console.log('✗ No reasoning tokens detected'); + } + + if (reasoning) { + console.log('\n--- Reasoning Process ---'); + console.log(reasoning); + } + + if (reasoningDetails && Array.isArray(reasoningDetails)) { + console.log('\n--- Reasoning Details (structured) ---'); + for (const detail of reasoningDetails) { + console.log(`Type: ${detail.type}, Format: ${detail.format}`); + if (detail.type === 'reasoning.text') { + console.log(`Text preview: ${detail.text?.substring(0, 100)}...`); + } + } + } + + console.log('\n--- Final Answer ---'); + console.log(answer); + + const success = reasoningTokens > 0 && reasoning; + console.log(`\nResult: ${success ? '✓ REASONING WORKING' : '✗ REASONING NOT WORKING'}`); + + if (success) { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✓ SUCCESS - Gemini reasoning is working correctly'); + console.log('════════════════════════════════════════════════════════════════════════════'); + } else { + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('✗ FAILURE - Gemini reasoning is not working as expected'); + console.log('════════════════════════════════════════════════════════════════════════════'); + } + } catch (error) { + console.error('\n❌ ERROR during testing:'); + + if (error instanceof Error) { + console.error('Error message:', error.message); + console.error('Stack trace:', error.stack); + } else { + console.error('Unknown error:', error); + } + + process.exit(1); + } +} + +// Run the example +main(); diff --git a/typescript/shared/src/types.ts b/typescript/shared/src/types.ts index 2bb0bde..0dbd68f 100644 --- a/typescript/shared/src/types.ts +++ b/typescript/shared/src/types.ts @@ -49,6 +49,17 @@ export interface Usage { }; } +/** + * Reasoning detail object + */ +export interface ReasoningDetail { + type: 'reasoning.text' | 'reasoning.encrypted'; + text?: string; + data?: string; + format: string; + index?: number; +} + /** * Chat completion response (OpenAI-compatible format) */ @@ -60,6 +71,8 @@ export interface ChatCompletionResponse { message: { role: string; content: string; + reasoning?: string; + reasoning_details?: ReasoningDetail[]; }; finish_reason: string; }>;