From 04ad052d5afd41f404a78c3cb4fc7b5aa8d916e4 Mon Sep 17 00:00:00 2001
From: Tom Aylott <tom.aylott@openrouter.ai>
Date: Wed, 19 Nov 2025 11:16:34 -0500
Subject: [PATCH] Add Gemini 3 reasoning fetch example

- Add fetch example for google/gemini-3-pro-preview reasoning
- Add comprehensive documentation at docs/gemini-thinking.md
- Update shared types to include reasoning and reasoning_details fields
- Example demonstrates:
  - Basic reasoning/thinking mode usage
  - Token budget configuration (max_tokens: 2000)
  - Accessing reasoning text and details
  - Token usage metrics

Model: google/gemini-3-pro-preview
- Max reasoning tokens: 200,000
- Context window: 1M tokens
- Reasoning: MANDATORY (always enabled)
- Created: 2025-11-18

Tested successfully:
- HTTP 200 response
- Reasoning tokens: 1,628
- Full reasoning process captured
- Structured reasoning_details present
---
 docs/gemini-thinking.md                       | 268 ++++++++++++++++++
 .../fetch/src/gemini-thinking/README.md       | 126 ++++++++
 .../src/gemini-thinking/basic-reasoning.ts    | 192 +++++++++++++
 typescript/shared/src/types.ts                |  13 +
 4 files changed, 599 insertions(+)
 create mode 100644 docs/gemini-thinking.md
 create mode 100644 typescript/fetch/src/gemini-thinking/README.md
 create mode 100644 typescript/fetch/src/gemini-thinking/basic-reasoning.ts

diff --git a/docs/gemini-thinking.md b/docs/gemini-thinking.md
new file mode 100644
index 0000000..1955d22
--- /dev/null
+++ b/docs/gemini-thinking.md
@@ -0,0 +1,268 @@
+# Google Gemini 3 Reasoning/Thinking
+
+Google Gemini 3 models support a "thinking mode" feature that allows the model to engage in internal reasoning before generating responses. This improves answer quality on complex, multi-step problems.
+
+## What is Gemini Reasoning?
+
+When reasoning is enabled, Gemini models:
+- Allocate tokens for internal "thinking" before responding
+- Work through problems step-by-step
+- Show their reasoning process (unless excluded)
+- Produce higher-quality answers on complex tasks
+
+## Quick Start
+
+### Fetch API
+
+```typescript
+const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
+  method: 'POST',
+  headers: {
+    'Authorization': `Bearer ${OPENROUTER_API_KEY}`,
+    'Content-Type': 'application/json'
+  },
+  body: JSON.stringify({
+    model: 'google/gemini-3-pro-preview',
+    reasoning: {
+      enabled: true,        // Enable thinking mode
+      max_tokens: 4096,     // Token budget for thinking
+      exclude: false        // Show thoughts in response
+    },
+    messages: [
+      {
+        role: 'user',
+        content: 'Solve this complex problem step by step...'
+      }
+    ]
+  })
+});
+
+const data = await response.json();
+console.log('Reasoning:', data.choices[0].message.reasoning);
+console.log('Reasoning tokens:', data.usage.completion_tokens_details.reasoning_tokens);
+console.log('Answer:', data.choices[0].message.content);
+```
+
+### AI SDK v5
+
+```typescript
+import { createOpenRouter } from '@openrouter/ai-sdk-provider';
+import { generateText } from 'ai';
+
+const openrouter = createOpenRouter({
+  apiKey: process.env.OPENROUTER_API_KEY,
+});
+
+const result = await generateText({
+  model: openrouter('google/gemini-3-pro-preview'),
+  providerOptions: {
+    openrouter: {
+      reasoning: {
+        enabled: true,
+        maxTokens: 4096,
+        exclude: false
+      }
+    }
+  },
+  messages: [
+    {
+      role: 'user',
+      content: 'Solve this complex problem step by step...'
+    }
+  ]
+});
+
+console.log('Answer:', result.text);
+console.log('Reasoning tokens:', result.providerMetadata?.openrouter?.usage?.completionTokensDetails?.reasoningTokens);
+```
+
+## Examples
+
+### Fetch Examples
+- [basic-reasoning.ts](../typescript/fetch/src/gemini-thinking/basic-reasoning.ts) - Basic reasoning with a multi-step problem
+
+### AI SDK v5 Examples
+- [basic-reasoning.ts](../typescript/ai-sdk-v5/src/gemini-thinking/basic-reasoning.ts) - Basic reasoning using Vercel AI SDK v5
+
+## API Reference
+
+### Request Parameters
+
+```typescript
+{
+  model: 'google/gemini-3-pro-preview' | 'google/gemini-2.5-pro' | 'google/gemini-2.5-flash',
+  reasoning: {
+    // Option 1: Simple enable (uses default budget)
+    enabled: true,
+    
+    // Option 2: Control token budget
+    max_tokens: 4096,  // -1 for dynamic, 0 to disable
+    
+    // Option 3: Use effort level (maps to budget automatically)
+    effort: 'low' | 'medium' | 'high',
+    
+    // Option 4: Hide thoughts from response
+    exclude: true  // false = show thoughts, true = hide thoughts
+  },
+  messages: [...]
+}
+```
+
+### Response Format
+
+```typescript
+{
+  choices: [{
+    message: {
+      content: "The final answer",
+      reasoning: "The model's thinking process...",  // Only if exclude: false
+      reasoning_details: [  // Structured reasoning metadata
+        {
+          type: "reasoning.text",
+          text: "Internal reasoning text",
+          format: "gemini",
+          index: 0
+        },
+        {
+          type: "reasoning.encrypted",  // Google's thoughtSignature
+          data: "encrypted_signature_string",
+          format: "gemini",
+          index: 0
+        }
+      ]
+    }
+  }],
+  usage: {
+    prompt_tokens: 123,
+    completion_tokens: 456,
+    total_tokens: 579,
+    completion_tokens_details: {
+      reasoning_tokens: 234  // Tokens used for thinking
+    }
+  }
+}
+```
+
+## Supported Models
+
+| Model | Reasoning Support | Max Thinking Tokens | Context Window |
+|-------|------------------|---------------------|----------------|
+| `google/gemini-3-pro-preview` | MANDATORY (always enabled) | 200,000 | 1,048,576 (1M) |
+| `google/gemini-2.5-pro` | MANDATORY (always enabled) | 32,768 | 2,097,152 (2M) |
+| `google/gemini-2.5-flash` | OPTIONAL | 24,576 | 1,048,576 (1M) |
+
+**Note:** For Gemini 3 Pro and Gemini 2.5 Pro, reasoning is mandatory and always enabled. The model will use thinking tokens even if not explicitly requested.
+
+## Key Concepts
+
+### Thinking Budget
+
+The `max_tokens` parameter controls how many tokens the model can use for internal reasoning:
+
+- **-1 (dynamic)**: Model determines budget automatically
+- **0**: Disable reasoning (only for optional models)
+- **Positive number**: Specific token budget (clamped to model limits)
+
+### Effort Levels
+
+Instead of specifying exact token counts, you can use effort levels:
+
+- **low**: Minimal thinking (faster, lower cost)
+- **medium**: Balanced thinking (default)
+- **high**: Maximum thinking (slower, higher quality)
+
+OpenRouter automatically maps these to appropriate token budgets for each model.
+
+### Excluding Thoughts
+
+Set `exclude: true` to hide the thinking process and only receive the final answer:
+
+```typescript
+reasoning: {
+  enabled: true,
+  exclude: true  // Thoughts used internally but not returned
+}
+```
+
+This reduces response size and latency while still benefiting from reasoning.
+
+## Important Notes
+
+### Preserving Reasoning Details
+
+**CRITICAL:** When continuing a conversation, you must include `reasoning_details` from previous responses in follow-up requests. Google requires the `thoughtSignature` to be preserved.
+
+```typescript
+// First request
+const response1 = await fetch(...);
+
+// Follow-up request - must include reasoning_details
+const response2 = await fetch(..., {
+  body: JSON.stringify({
+    messages: [
+      {
+        role: 'assistant',
+        content: response1.choices[0].message.content,
+        reasoning_details: response1.choices[0].message.reasoning_details  // REQUIRED
+      },
+      {
+        role: 'user',
+        content: 'Follow-up question'
+      }
+    ]
+  })
+});
+```
+
+### Cost Considerations
+
+Reasoning tokens are billed separately:
+- Thinking tokens typically cost less than output tokens
+- More thinking = higher cost but better quality
+- Use effort levels to balance cost vs quality
+
+### Latency Trade-offs
+
+More thinking tokens = longer response times:
+- **Low effort**: Fast responses, good for simple tasks
+- **High effort**: Slower responses, better for complex reasoning
+
+### OpenRouter Transformation
+
+OpenRouter automatically transforms Google's native API to OpenAI-compatible format:
+
+| Google Native | OpenRouter Format |
+|--------------|------------------|
+| `generationConfig.thinkingConfig` | `reasoning` parameter |
+| `usageMetadata.thoughtsTokenCount` | `usage.completion_tokens_details.reasoning_tokens` |
+| `parts[].thought: true` | `message.reasoning` |
+| `thoughtSignature` | `reasoning_details[].data` |
+
+This allows you to use the standard OpenAI format while accessing Google's thinking features.
+
+## Resources
+
+- [OpenRouter Docs - Reasoning Tokens](https://openrouter.ai/docs/use-cases/reasoning-tokens)
+- [Google Gemini API Documentation](https://ai.google.dev/docs)
+- [Full Examples Repository](https://github.com/openrouter/openrouter-examples)
+
+## Troubleshooting
+
+### No reasoning tokens in response
+
+**Check:**
+1. Is the model supported? (Gemini 2.5+)
+2. Is `reasoning.enabled` set to `true`?
+3. Is the token budget > 0?
+
+### "thought_signature" error in follow-up requests
+
+**Solution:** Include `reasoning_details` from previous responses when continuing conversations.
+
+### High costs
+
+**Solution:** Use lower effort levels or reduce `max_tokens` for thinking budget.
+
+### Slow responses
+
+**Solution:** Use lower effort levels or smaller thinking budgets to reduce latency.
diff --git a/typescript/fetch/src/gemini-thinking/README.md b/typescript/fetch/src/gemini-thinking/README.md
new file mode 100644
index 0000000..bf2e29a
--- /dev/null
+++ b/typescript/fetch/src/gemini-thinking/README.md
@@ -0,0 +1,126 @@
+# Google Gemini 3 Reasoning/Thinking Examples
+
+This directory contains examples of using Google Gemini 3's reasoning/thinking feature via OpenRouter.
+
+## What is Gemini Reasoning?
+
+Gemini 3 models can engage in internal reasoning before generating responses. This "thinking mode" allows the model to:
+- Work through complex problems step-by-step
+- Show its reasoning process
+- Improve answer quality on difficult tasks
+
+## How It Works
+
+1. **Request**: Set `reasoning.enabled: true` (or `reasoning.max_tokens`, or `reasoning.effort`)
+2. **Processing**: The model uses "thinking tokens" for internal reasoning
+3. **Response**: You receive both the reasoning process and the final answer
+
+## Examples
+
+### `basic-reasoning.ts`
+
+Demonstrates basic usage of Gemini reasoning with a multi-step problem.
+
+**Run:**
+```bash
+bun run src/gemini-thinking/basic-reasoning.ts
+```
+
+**Key Features:**
+- Enables reasoning mode
+- Shows thinking token usage
+- Displays reasoning process
+- Returns final answer
+
+## API Parameters
+
+### Request Format
+
+```typescript
+{
+  model: 'google/gemini-3-pro-preview',
+  reasoning: {
+    enabled: true,        // Enable thinking mode
+    max_tokens: 4096,     // Token budget for thinking
+    exclude: false        // true = hide thoughts, false = show thoughts
+  },
+  messages: [...]
+}
+```
+
+### Alternative: Effort Levels
+
+```typescript
+{
+  model: 'google/gemini-3-pro-preview',
+  reasoning: {
+    effort: 'medium'  // 'low', 'medium', 'high'
+  },
+  messages: [...]
+}
+```
+
+## Response Format
+
+```typescript
+{
+  choices: [{
+    message: {
+      content: "The final answer",
+      reasoning: "The model's thinking process...",
+      reasoning_details: [
+        {
+          type: "reasoning.text",
+          text: "Internal reasoning...",
+          format: "gemini"
+        },
+        {
+          type: "reasoning.encrypted",
+          data: "encrypted_signature",
+          format: "gemini"
+        }
+      ]
+    }
+  }],
+  usage: {
+    prompt_tokens: 123,
+    completion_tokens: 456,
+    completion_tokens_details: {
+      reasoning_tokens: 234  // Tokens used for thinking
+    }
+  }
+}
+```
+
+## Key Points
+
+### Model Support
+- ✅ `google/gemini-3-pro-preview` - Reasoning MANDATORY (always enabled)
+- ✅ `google/gemini-2.5-pro` - Reasoning MANDATORY (always enabled)
+- ✅ `google/gemini-2.5-flash` - Reasoning OPTIONAL
+
+### Token Budgets
+- **Gemini 3 Pro**: Max 200,000 thinking tokens, 1M context window
+- **Gemini 2.5 Pro**: Max 32,768 thinking tokens
+- **Gemini 2.5 Flash**: Max 24,576 thinking tokens
+
+### Important Notes
+- **Preserve reasoning_details**: Must include `reasoning_details` from previous messages in follow-up requests
+- **Cost**: Thinking tokens are billed separately (usually at a lower rate)
+- **Latency**: More thinking tokens = longer response time
+- **Quality**: Higher thinking budgets improve answer quality on complex tasks
+
+## OpenRouter Transformation
+
+OpenRouter automatically transforms Google's native API to OpenAI-compatible format:
+
+| Google Native | OpenRouter (OpenAI-compatible) |
+|--------------|-------------------------------|
+| `usageMetadata.thoughtsTokenCount` | `usage.completion_tokens_details.reasoning_tokens` |
+| `parts[].thought: true` | `message.reasoning` |
+| `thoughtSignature` | `reasoning_details[].data` |
+
+## Resources
+
+- [OpenRouter Docs - Reasoning Tokens](https://openrouter.ai/docs/use-cases/reasoning-tokens)
+- [Google Gemini API Docs](https://ai.google.dev/docs)
diff --git a/typescript/fetch/src/gemini-thinking/basic-reasoning.ts b/typescript/fetch/src/gemini-thinking/basic-reasoning.ts
new file mode 100644
index 0000000..8f328bb
--- /dev/null
+++ b/typescript/fetch/src/gemini-thinking/basic-reasoning.ts
@@ -0,0 +1,192 @@
+/**
+ * Example: Google Gemini 3 Reasoning/Thinking Details
+ *
+ * This example demonstrates requesting reasoning details from Gemini 3 models via OpenRouter.
+ *
+ * Scientific Method:
+ * - Hypothesis: reasoning.enabled triggers Google's thinkingConfig
+ * - Experiment: Make request with reasoning enabled and measure reasoning_tokens in usage
+ * - Evidence: usage.completion_tokens_details.reasoning_tokens (OpenAI-compatible format)
+ *
+ * IMPORTANT: OpenRouter transforms Google's native response format to OpenAI-compatible format:
+ * - Google native: usageMetadata.thoughtsTokenCount, parts[].thought
+ * - OpenRouter returns: usage.completion_tokens_details.reasoning_tokens, message.reasoning
+ *
+ * Gemini Reasoning Requirements:
+ * - Model: google/gemini-3-pro-preview (Gemini 3)
+ * - reasoning.enabled: true (or reasoning.max_tokens, or reasoning.effort)
+ * - Gemini 3 Pro: reasoning is MANDATORY (always enabled)
+ * - Max reasoning tokens: 200,000
+ * - Context window: 1,048,576 tokens (1M)
+ *
+ * Pattern: Single request with reasoning enabled
+ * - Request with reasoning.enabled: true
+ * - Response includes reasoning text and token count
+ * - reasoning_details must be preserved in follow-up requests
+ */
+
+import type { ChatCompletionResponse } from '@openrouter-examples/shared/types';
+
+// OpenRouter API endpoint
+const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions';
+
+/**
+ * Make a chat completion request to OpenRouter with Gemini reasoning
+ */
+async function makeRequest(
+  requestBody: unknown,
+  description: string,
+): Promise<ChatCompletionResponse> {
+  console.log(`\n${description}`);
+
+  if (!process.env.OPENROUTER_API_KEY) {
+    throw new Error('OPENROUTER_API_KEY environment variable is not set');
+  }
+
+  const response = await fetch(OPENROUTER_API_URL, {
+    method: 'POST',
+    headers: {
+      Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`,
+      'Content-Type': 'application/json',
+      'HTTP-Referer': 'https://github.com/openrouter/examples',
+      'X-Title': 'Gemini Reasoning Example',
+    },
+    body: JSON.stringify(requestBody),
+  });
+
+  if (!response.ok) {
+    const errorText = await response.text();
+    throw new Error(`HTTP error! status: ${response.status}, body: ${errorText}`);
+  }
+
+  const data = (await response.json()) as ChatCompletionResponse;
+
+  // Show reasoning-relevant metrics in OpenAI-compatible format
+  const reasoningTokens = data.usage.completion_tokens_details?.reasoning_tokens ?? 0;
+  const promptTokens = data.usage.prompt_tokens;
+  const completionTokens = data.usage.completion_tokens;
+
+  const metrics: string[] = [`prompt=${promptTokens}`, `completion=${completionTokens}`];
+
+  if (reasoningTokens > 0) {
+    metrics.push(`reasoning=${reasoningTokens} ✓ (THINKING ENABLED)`);
+  } else {
+    metrics.push('reasoning=0 (NO THINKING)');
+  }
+
+  console.log(`  ${metrics.join(', ')}`);
+
+  // Show reasoning text if present
+  if (data.choices[0]?.message?.reasoning) {
+    const reasoning = data.choices[0].message.reasoning;
+    console.log(`  Reasoning preview: ${reasoning.substring(0, 100)}...`);
+  }
+
+  return data;
+}
+
+/**
+ * Main example
+ */
+async function main() {
+  console.log('╔════════════════════════════════════════════════════════════════════════════╗');
+  console.log('║           Google Gemini 3 - Reasoning/Thinking Details                    ║');
+  console.log('╚════════════════════════════════════════════════════════════════════════════╝');
+  console.log();
+  console.log('Testing Gemini reasoning feature with a multi-step problem');
+  console.log();
+  console.log('Expected behavior:');
+  console.log('  1. reasoning_tokens > 0 (model used thinking tokens)');
+  console.log('  2. message.reasoning contains thinking process');
+  console.log('  3. message.reasoning_details[] contains structured details');
+  console.log();
+
+  try {
+    const requestBody = {
+      model: 'google/gemini-3-pro-preview',
+      reasoning: {
+        enabled: true, // Enable thinking mode
+        max_tokens: 2000, // Allocate thinking budget (smaller for faster response)
+        exclude: false, // Show thoughts in response
+      },
+      messages: [
+        {
+          role: 'user',
+          content:
+            'Solve this problem step by step: If a train leaves station A at 2pm traveling 60mph, and another train leaves station B (120 miles away) at 2:30pm traveling 80mph toward station A, when and where do they meet?',
+        },
+      ],
+    };
+
+    // Make request with reasoning enabled
+    const response = await makeRequest(requestBody, 'Request with Reasoning Enabled');
+
+    // Analyze response
+    console.log('\n' + '='.repeat(80));
+    console.log('ANALYSIS');
+    console.log('='.repeat(80));
+
+    const reasoningTokens = response.usage.completion_tokens_details?.reasoning_tokens ?? 0;
+    const reasoning = response.choices[0]?.message?.reasoning;
+    const reasoningDetails = response.choices[0]?.message?.reasoning_details;
+    const answer = response.choices[0]?.message?.content;
+
+    console.log(`Reasoning tokens: ${reasoningTokens}`);
+    console.log(`Has reasoning text: ${reasoning ? 'YES' : 'NO'}`);
+    console.log(`Has reasoning_details: ${reasoningDetails ? 'YES' : 'NO'}`);
+    console.log(
+      `Reasoning details count: ${Array.isArray(reasoningDetails) ? reasoningDetails.length : 0}`,
+    );
+
+    if (reasoningTokens > 0) {
+      console.log(`✓ Reasoning enabled: ${reasoningTokens} tokens used for thinking`);
+    } else {
+      console.log('✗ No reasoning tokens detected');
+    }
+
+    if (reasoning) {
+      console.log('\n--- Reasoning Process ---');
+      console.log(reasoning);
+    }
+
+    if (reasoningDetails && Array.isArray(reasoningDetails)) {
+      console.log('\n--- Reasoning Details (structured) ---');
+      for (const detail of reasoningDetails) {
+        console.log(`Type: ${detail.type}, Format: ${detail.format}`);
+        if (detail.type === 'reasoning.text') {
+          console.log(`Text preview: ${detail.text?.substring(0, 100)}...`);
+        }
+      }
+    }
+
+    console.log('\n--- Final Answer ---');
+    console.log(answer);
+
+    const success = reasoningTokens > 0 && reasoning;
+    console.log(`\nResult: ${success ? '✓ REASONING WORKING' : '✗ REASONING NOT WORKING'}`);
+
+    if (success) {
+      console.log('\n════════════════════════════════════════════════════════════════════════════');
+      console.log('✓ SUCCESS - Gemini reasoning is working correctly');
+      console.log('════════════════════════════════════════════════════════════════════════════');
+    } else {
+      console.log('\n════════════════════════════════════════════════════════════════════════════');
+      console.log('✗ FAILURE - Gemini reasoning is not working as expected');
+      console.log('════════════════════════════════════════════════════════════════════════════');
+    }
+  } catch (error) {
+    console.error('\n❌ ERROR during testing:');
+
+    if (error instanceof Error) {
+      console.error('Error message:', error.message);
+      console.error('Stack trace:', error.stack);
+    } else {
+      console.error('Unknown error:', error);
+    }
+
+    process.exit(1);
+  }
+}
+
+// Run the example
+main();
diff --git a/typescript/shared/src/types.ts b/typescript/shared/src/types.ts
index 2bb0bde..0dbd68f 100644
--- a/typescript/shared/src/types.ts
+++ b/typescript/shared/src/types.ts
@@ -49,6 +49,17 @@ export interface Usage {
   };
 }
 
+/**
+ * Reasoning detail object
+ */
+export interface ReasoningDetail {
+  type: 'reasoning.text' | 'reasoning.encrypted';
+  text?: string;
+  data?: string;
+  format: string;
+  index?: number;
+}
+
 /**
  * Chat completion response (OpenAI-compatible format)
  */
@@ -60,6 +71,8 @@ export interface ChatCompletionResponse {
     message: {
       role: string;
       content: string;
+      reasoning?: string;
+      reasoning_details?: ReasoningDetail[];
     };
     finish_reason: string;
   }>;