From 77e0bd38d7a7bf2dad041971b9c9cfb019aec6f8 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Fri, 1 May 2026 14:32:07 -0700
Subject: [PATCH] Restore Gemini thinker for Kimi freebuff

---
 agents/base2/base2.ts                         |  18 ++
 .../integration/local-agents.test.ts          |  87 +++++++++-
 .../components/freebuff-model-selector.tsx    |  22 +--
 cli/src/hooks/use-send-message.ts             |  34 ++--
 cli/src/utils/local-agent-registry.ts         | 140 +++++++++++++---
 common/src/__tests__/free-agents.test.ts      |  38 +++++
 common/src/__tests__/freebuff-models.test.ts  |  20 ---
 common/src/constants/free-agents.ts           |  16 +-
 .../src/constants/freebuff-gemini-thinker.ts  |  16 ++
 common/src/constants/freebuff-models.ts       |   6 -
 .../completions/__tests__/completions.test.ts |  68 +++++++-
 web/src/app/api/v1/chat/completions/_post.ts  |  12 +-
 web/src/llm-api/types.ts                      |   4 +-
 .../free-session/__tests__/public-api.test.ts | 158 ++++++++++--------
 web/src/server/free-session/config.ts         |   2 -
 web/src/server/free-session/public-api.ts     |  26 ++-
 16 files changed, 480 insertions(+), 187 deletions(-)
 create mode 100644 common/src/__tests__/free-agents.test.ts
 create mode 100644 common/src/constants/freebuff-gemini-thinker.ts

diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index d398b2a920..75bdb4967b 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -1,4 +1,11 @@
 import { buildArray } from '@codebuff/common/util/array'
+import { FREEBUFF_KIMI_MODEL_ID } from '@codebuff/common/constants/freebuff-models'
+import {
+  FREEBUFF_GEMINI_THINKER_AGENT_ID,
+  FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
+  FREEBUFF_GEMINI_THINKER_STEP_PROMPT,
+  FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
+} from '@codebuff/common/constants/freebuff-gemini-thinker'
 
 import { publisher } from '../constants'
 import {
@@ -32,6 +39,7 @@ export function createBase2(
   const model =
     modelOverride ??
     (isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7')
+  const hasFreeGeminiThinker = isFree && model === FREEBUFF_KIMI_MODEL_ID
   const defaultProviderOptions = isFree
     ? {
         data_collection: 'deny' as const,
@@ -97,6 +105,7 @@ export function createBase2(
       isFree && 'code-reviewer-lite',
       isDefault && 'code-reviewer',
       isMax && 'code-reviewer-multi-prompt',
+      hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_AGENT_ID,
       'thinker-gpt',
       'context-pruner',
     ),
@@ -154,6 +163,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
     '- Spawn context-gathering agents (file pickers, code searchers, and web/docs researchers) before making edits. Use the list_directory and glob tools directly for searching and exploring the codebase.',
     isFree &&
       'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.',
+    hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
     isDefault &&
       '- Spawn the editor agent to implement the changes after you have gathered all the context you need.',
     (isDefault || isMax) &&
@@ -280,6 +290,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
           isDefault,
           isMax,
           isFree,
+          hasFreeGeminiThinker,
           hasNoValidation,
           noAskUser,
         }),
@@ -292,6 +303,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
           hasNoValidation,
           isSonnet,
           isFree,
+          hasFreeGeminiThinker,
           noAskUser,
         }),
 
@@ -340,6 +352,7 @@ function buildImplementationInstructionsPrompt({
   isDefault,
   isMax,
   isFree,
+  hasFreeGeminiThinker,
   hasNoValidation,
   noAskUser,
 }: {
@@ -348,6 +361,7 @@ function buildImplementationInstructionsPrompt({
   isDefault: boolean
   isMax: boolean
   isFree: boolean
+  hasFreeGeminiThinker: boolean
   hasNoValidation: boolean
   noAskUser: boolean
 }) {
@@ -365,6 +379,7 @@ ${buildArray(
     'After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice.',
   (isDefault || isMax || isFree) &&
     `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`,
+  hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
   (isDefault || isMax) &&
     `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the <think> tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`,
   isDefault &&
@@ -395,6 +410,7 @@ function buildImplementationStepPrompt({
   hasNoValidation,
   isSonnet,
   isFree,
+  hasFreeGeminiThinker,
   noAskUser,
 }: {
   isDefault: boolean
@@ -403,12 +419,14 @@ function buildImplementationStepPrompt({
   hasNoValidation: boolean
   isSonnet: boolean
   isFree: boolean
+  hasFreeGeminiThinker: boolean
   noAskUser: boolean
 }) {
   return buildArray(
     isMax &&
       `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`,
     'Consider loading relevant skills with the skill tool if they might help with the current task. Do not reload skills that were already loaded earlier in this conversation.',
+    hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_STEP_PROMPT,
     isMax &&
       `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`,
     (isDefault || isMax) &&
diff --git a/cli/src/__tests__/integration/local-agents.test.ts b/cli/src/__tests__/integration/local-agents.test.ts
index 5085e77843..2f72db75db 100644
--- a/cli/src/__tests__/integration/local-agents.test.ts
+++ b/cli/src/__tests__/integration/local-agents.test.ts
@@ -4,13 +4,16 @@ import path from 'path'
 
 import { validateAgents } from '@codebuff/sdk'
 import {
-  describe,
-  test,
-  expect,
-  beforeEach,
-  afterEach,
-  mock,
-} from 'bun:test'
+  FREEBUFF_GEMINI_THINKER_AGENT_ID,
+  FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
+  FREEBUFF_GEMINI_THINKER_STEP_PROMPT,
+  FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
+} from '@codebuff/common/constants/freebuff-gemini-thinker'
+import {
+  FREEBUFF_KIMI_MODEL_ID,
+  FREEBUFF_MINIMAX_MODEL_ID,
+} from '@codebuff/common/constants/freebuff-models'
+import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test'
 
 // Mock the logger to prevent analytics initialization errors in tests
 mock.module('../../utils/logger', () => ({
@@ -27,6 +30,7 @@ import { setProjectRoot, getProjectRoot } from '../../project-files'
 import {
   loadAgentDefinitions,
   loadLocalAgents,
+  configureFreebuffBaseAgentForModel,
   initializeAgentRegistry,
   findAgentsDirectory,
   getLoadedAgentsData,
@@ -37,6 +41,67 @@ import {
 
 const MODEL_NAME = 'anthropic/claude-sonnet-4'
 
+describe('configureFreebuffBaseAgentForModel', () => {
+  const makeBase2Free = () => ({
+    id: 'base2-free',
+    spawnableAgents: ['file-picker', FREEBUFF_GEMINI_THINKER_AGENT_ID],
+    systemPrompt: [
+      'before',
+      FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
+      'after',
+    ].join('\n'),
+    instructionsPrompt: [
+      'before',
+      FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
+      'after',
+    ].join('\n'),
+    stepPrompt: ['before', FREEBUFF_GEMINI_THINKER_STEP_PROMPT, 'after'].join(
+      '\n',
+    ),
+  })
+
+  test('keeps the Gemini thinker and prompt guidance for Kimi', () => {
+    const definition = makeBase2Free()
+
+    configureFreebuffBaseAgentForModel(definition, FREEBUFF_KIMI_MODEL_ID)
+
+    expect(definition.spawnableAgents).toContain(
+      FREEBUFF_GEMINI_THINKER_AGENT_ID,
+    )
+    expect(definition.systemPrompt).toContain(
+      FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
+    )
+    expect(definition.instructionsPrompt).toContain(
+      FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
+    )
+    expect(definition.stepPrompt).toContain(FREEBUFF_GEMINI_THINKER_STEP_PROMPT)
+  })
+
+  test('removes only exact Gemini thinker prompt guidance for MiniMax', () => {
+    const definition = makeBase2Free()
+    definition.systemPrompt +=
+      '\nUser text mentioning thinker-with-files-gemini should stay.'
+
+    configureFreebuffBaseAgentForModel(definition, FREEBUFF_MINIMAX_MODEL_ID)
+
+    expect(definition.spawnableAgents).not.toContain(
+      FREEBUFF_GEMINI_THINKER_AGENT_ID,
+    )
+    expect(definition.systemPrompt).not.toContain(
+      FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
+    )
+    expect(definition.instructionsPrompt).not.toContain(
+      FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
+    )
+    expect(definition.stepPrompt).not.toContain(
+      FREEBUFF_GEMINI_THINKER_STEP_PROMPT,
+    )
+    expect(definition.systemPrompt).toContain(
+      'User text mentioning thinker-with-files-gemini should stay.',
+    )
+  })
+})
+
 const writeAgentFile = (
   agentsDir: string,
   fileName: string,
@@ -408,7 +473,9 @@ describe('Local Agent Integration', () => {
     expect(uiAgent!.id).toBe('test-ui-agent')
     // File path should be populated for "Open file" UI links
     // Use realpathSync to normalize paths (on macOS, /var is a symlink to /private/var)
-    expect(realpathSync(uiAgent!.filePath!)).toBe(realpathSync(path.join(agentsDir, 'ui-agent.ts')))
+    expect(realpathSync(uiAgent!.filePath!)).toBe(
+      realpathSync(path.join(agentsDir, 'ui-agent.ts')),
+    )
   })
 
   test('loadLocalAgents sorts agents alphabetically by displayName', async () => {
@@ -735,7 +802,9 @@ describe('Local Agent Integration', () => {
     const data = getLoadedAgentsData()
     expect(data).not.toBeNull()
     expect(data!.agents.some((a) => a.id === 'test-announce-agent')).toBe(true)
-    expect(data!.agents.some((a) => a.displayName === 'Announce Test Agent')).toBe(true)
+    expect(
+      data!.agents.some((a) => a.displayName === 'Announce Test Agent'),
+    ).toBe(true)
   })
 
   // ============================================================================
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index 0001a4da9a..307c7557ba 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -5,7 +5,6 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import { Button } from './button'
 import {
   FALLBACK_FREEBUFF_MODEL_ID,
-  FREEBUFF_GEMINI_PRO_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
@@ -23,15 +22,8 @@ import { nextFreebuffModelId } from '../utils/freebuff-model-navigation'
 import type { KeyEvent } from '@opentui/core'
 
 const FREEBUFF_MODEL_SELECTOR_MODELS = [
-  ...FREEBUFF_MODELS.filter(
-    (model) => model.id === FREEBUFF_GEMINI_PRO_MODEL_ID,
-  ),
   ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID),
-  ...FREEBUFF_MODELS.filter(
-    (model) =>
-      model.id !== FREEBUFF_GEMINI_PRO_MODEL_ID &&
-      model.id !== FREEBUFF_KIMI_MODEL_ID,
-  ),
+  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID),
 ]
 
 /**
@@ -121,13 +113,7 @@ export const FreebuffModelSelector: React.FC = () => {
   // when the user's selection moves between queues. The tagline is shown
   // inline with the name now, so it's no longer part of this slot.
   const hintWidth = useMemo(
-    () =>
-      Math.max(
-        'No wait'.length,
-        '999 ahead'.length,
-        'Used today'.length,
-        'Limit used'.length,
-      ),
+    () => Math.max('No wait'.length, '999 ahead'.length, 'Limit used'.length),
     [],
   )
 
@@ -267,9 +253,7 @@ export const FreebuffModelSelector: React.FC = () => {
           const hint = !isAvailable
             ? 'Closed'
             : isQuotaExhausted
-              ? model.id === FREEBUFF_GEMINI_PRO_MODEL_ID
-                ? 'Used today'
-                : 'Limit used'
+              ? 'Limit used'
               : ahead === undefined
                 ? ''
                 : ahead === 0
diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts
index 03fc065c05..cdb67f2555 100644
--- a/cli/src/hooks/use-send-message.ts
+++ b/cli/src/hooks/use-send-message.ts
@@ -5,7 +5,11 @@ import { createStreamController } from './stream-state'
 import { useChatStore } from '../state/chat-store'
 import { getFreebuffInstanceId } from './use-freebuff-session'
 import { getCodebuffClient } from '../utils/codebuff-client'
-import { AGENT_MODE_TO_ID, AGENT_MODE_TO_COST_MODE, IS_FREEBUFF } from '../utils/constants'
+import {
+  AGENT_MODE_TO_ID,
+  AGENT_MODE_TO_COST_MODE,
+  IS_FREEBUFF,
+} from '../utils/constants'
 import { createEventHandlerState } from '../utils/create-event-handler-state'
 import { createRunConfig } from '../utils/create-run-config'
 import { loadAgentDefinitions } from '../utils/local-agent-registry'
@@ -108,7 +112,7 @@ export const useSendMessage = ({
   onBeforeMessageSend,
   mainAgentTimer,
   scrollToLatest,
-  onTimerEvent = () => { },
+  onTimerEvent = () => {},
   isQueuePausedRef,
   isProcessingQueueRef,
   resumeQueue,
@@ -295,13 +299,13 @@ export const useSendMessage = ({
           const errorsToAttach =
             validationResult.errors.length === 0
               ? [
-                // Hide this for now, as validate endpoint may be flaky and we don't want to bother users.
-                // {
-                //   id: NETWORK_ERROR_ID,
-                //   message:
-                //     'Agent validation failed. This may be due to a network issue or temporary server problem. Please try again.',
-                // },
-              ]
+                  // Hide this for now, as validate endpoint may be flaky and we don't want to bother users.
+                  // {
+                  //   id: NETWORK_ERROR_ID,
+                  //   message:
+                  //     'Agent validation failed. This may be due to a network issue or temporary server problem. Please try again.',
+                  // },
+                ]
               : validationResult.errors
 
           setMessages((prev) =>
@@ -457,12 +461,16 @@ export const useSendMessage = ({
           eventHandlerState,
           signal: abortController.signal,
           costMode: AGENT_MODE_TO_COST_MODE[agentMode],
-          extraCodebuffMetadata: freebuffInstanceId
-            ? { freebuff_instance_id: freebuffInstanceId }
-            : undefined,
+          extraCodebuffMetadata:
+            IS_FREEBUFF && freebuffInstanceId
+              ? { freebuff_instance_id: freebuffInstanceId }
+              : undefined,
         })
 
-        logger.info({ runConfig }, '[send-message] Sending message with sdk run config')
+        logger.info(
+          { runConfig },
+          '[send-message] Sending message with sdk run config',
+        )
         const runState = await client.run(runConfig)
 
         // Finalize: persist state and mark complete
diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts
index 6106b3928e..59b042e147 100644
--- a/cli/src/utils/local-agent-registry.ts
+++ b/cli/src/utils/local-agent-registry.ts
@@ -3,12 +3,22 @@ import os from 'os'
 import path from 'path'
 
 import { pluralize } from '@codebuff/common/util/string'
-import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codebuff/sdk'
+import {
+  loadLocalAgents as sdkLoadLocalAgents,
+  loadMCPConfigSync,
+} from '@codebuff/sdk'
 
 import type { MCPConfig } from '@codebuff/common/types/mcp'
 
 import { FREE_MODE_AGENT_MODELS } from '@codebuff/common/constants/free-agents'
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+  FREEBUFF_GEMINI_THINKER_AGENT_ID,
+  FREEBUFF_GEMINI_THINKER_PROMPT_LINES,
+} from '@codebuff/common/constants/freebuff-gemini-thinker'
+import {
+  FREEBUFF_KIMI_MODEL_ID,
+  FREEBUFF_MODELS,
+} from '@codebuff/common/constants/freebuff-models'
 
 import { getSelectedFreebuffModel } from '../state/freebuff-model-store'
 import { getProjectRoot } from '../project-files'
@@ -16,6 +26,8 @@ import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants'
 import { logger } from './logger'
 import * as bundledAgentsModule from '../agents/bundled-agents.generated'
 
+import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition'
+
 /** Agents whose hardcoded model gets swapped out for the user's currently
  *  selected freebuff model. Derived from the server's
  *  `FREE_MODE_AGENT_MODELS` — any agent whose allowlist contains every
@@ -26,8 +38,55 @@ const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS: ReadonlySet<string> = new Set(
     .filter(([, allowed]) => FREEBUFF_MODELS.every((m) => allowed.has(m.id)))
     .map(([agentId]) => agentId),
 )
+const FREEBUFF_GEMINI_THINKER_PROMPT_LINE_SET = new Set<string>(
+  FREEBUFF_GEMINI_THINKER_PROMPT_LINES,
+)
 
-import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition'
+type ConfigurableFreebuffBaseAgent = {
+  id: string
+  spawnableAgents?: string[]
+  systemPrompt?: string
+  instructionsPrompt?: string
+  stepPrompt?: string
+}
+
+function stripFreebuffGeminiThinkerPrompt(prompt: string): string {
+  return prompt
+    .split('\n')
+    .filter((line) => !FREEBUFF_GEMINI_THINKER_PROMPT_LINE_SET.has(line.trim()))
+    .join('\n')
+}
+
+export function configureFreebuffBaseAgentForModel(
+  def: ConfigurableFreebuffBaseAgent,
+  selectedModel: string,
+): void {
+  if (def.id !== 'base2-free') return
+
+  const hasGeminiThinker = selectedModel === FREEBUFF_KIMI_MODEL_ID
+  const spawnableAgents = def.spawnableAgents ?? []
+
+  def.spawnableAgents = hasGeminiThinker
+    ? Array.from(
+        new Set([...spawnableAgents, FREEBUFF_GEMINI_THINKER_AGENT_ID]),
+      )
+    : spawnableAgents.filter(
+        (agentId) => agentId !== FREEBUFF_GEMINI_THINKER_AGENT_ID,
+      )
+
+  if (hasGeminiThinker) return
+
+  for (const key of [
+    'systemPrompt',
+    'instructionsPrompt',
+    'stepPrompt',
+  ] as const) {
+    const prompt = def[key]
+    if (typeof prompt === 'string') {
+      def[key] = stripFreebuffGeminiThinkerPrompt(prompt)
+    }
+  }
+}
 
 // ============================================================================
 // Constants and types
@@ -56,12 +115,12 @@ let mcpServersCache: Record<string, MCPConfig> = {}
 /**
  * Initialize the agent registry by loading user agents via the SDK.
  * This must be called at CLI startup before any sync agent loading functions.
- * 
+ *
  * Agents are loaded from:
  * - {cwd}/.agents (project)
  * - {cwd}/../.agents (parent, e.g. monorepo root)
  * - ~/.agents (global, user's home directory)
- * 
+ *
  * Later directories take precedence, so project agents override global ones.
  */
 export async function initializeAgentRegistry(): Promise<void> {
@@ -72,7 +131,10 @@ export async function initializeAgentRegistry(): Promise<void> {
     userAgentFilePaths = buildAgentFilePathMap(getDefaultAgentDirs())
   } catch (error) {
     // Fall back to empty cache if SDK loading fails, but log a warning
-    logger.warn({ error }, 'Failed to load user agents from .agents directories')
+    logger.warn(
+      { error },
+      'Failed to load user agents from .agents directories',
+    )
     userAgentsCache = {}
     userAgentFilePaths = new Map()
   }
@@ -83,7 +145,10 @@ export async function initializeAgentRegistry(): Promise<void> {
     mcpServersCache = mcpConfig.mcpServers
     if (Object.keys(mcpServersCache).length > 0) {
       logger.debug(
-        { mcpServers: Object.keys(mcpServersCache), source: mcpConfig._sourceFilePath },
+        {
+          mcpServers: Object.keys(mcpServersCache),
+          source: mcpConfig._sourceFilePath,
+        },
         '[agents] Loaded MCP servers from mcp.json',
       )
     }
@@ -112,7 +177,7 @@ const getDefaultAgentDirs = (): string[] => {
 const buildAgentFilePathMap = (agentsDirs: string[]): Map<string, string> => {
   const idToPath = new Map<string, string>()
   const idRegex = /id\s*:\s*['"`]([^'"`]+)['"`]/i
-  
+
   const scanDirectory = (dir: string): void => {
     try {
       const entries = fs.readdirSync(dir, { withFileTypes: true })
@@ -122,7 +187,12 @@ const buildAgentFilePathMap = (agentsDirs: string[]): Map<string, string> => {
           scanDirectory(fullPath)
           continue
         }
-        if (!entry.isFile() || !entry.name.endsWith('.ts') || entry.name.endsWith('.d.ts') || entry.name.endsWith('.test.ts')) {
+        if (
+          !entry.isFile() ||
+          !entry.name.endsWith('.ts') ||
+          entry.name.endsWith('.d.ts') ||
+          entry.name.endsWith('.test.ts')
+        ) {
           continue
         }
         try {
@@ -139,7 +209,7 @@ const buildAgentFilePathMap = (agentsDirs: string[]): Map<string, string> => {
       // Skip directories that can't be read
     }
   }
-  
+
   // Scan all directories - later directories override earlier ones
   for (const agentsDir of agentsDirs) {
     scanDirectory(agentsDir)
@@ -235,13 +305,18 @@ const cachedAgentsByMode: Map<string, LocalAgentInfo[]> = new Map()
 
 /**
  * Load local agents for display in the '@' menu.
- * 
+ *
  * @param currentAgentMode - If provided, filters bundled agents to only include
  *   subagents of the current mode's agent (e.g., base2's spawnableAgents for DEFAULT mode).
  *   User's local agents from .agents/ are always included regardless of mode.
  */
-export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[] => {
-  const cacheKey = currentAgentMode ?? 'all'
+export const loadLocalAgents = (
+  currentAgentMode?: AgentMode,
+): LocalAgentInfo[] => {
+  const selectedFreebuffModel = IS_FREEBUFF ? getSelectedFreebuffModel() : null
+  const cacheKey = selectedFreebuffModel
+    ? `${currentAgentMode ?? 'all'}:${selectedFreebuffModel}`
+    : (currentAgentMode ?? 'all')
   const cached = cachedAgentsByMode.get(cacheKey)
   if (cached) {
     return cached
@@ -251,35 +326,45 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[]
   // compiled into the CLI binary at build time
   const bundledAgentsInfo = getBundledAgentsAsLocalInfo()
   const bundledAgents = getBundledAgents()
-  
+
   // Filter bundled agents to only include subagents of the current mode's agent
   let filteredBundledAgents: LocalAgentInfo[]
   if (currentAgentMode) {
     const currentAgentId = AGENT_MODE_TO_ID[currentAgentMode]
     const currentAgentDef = bundledAgents[currentAgentId]
+      ? {
+          ...bundledAgents[currentAgentId],
+          spawnableAgents: [
+            ...(bundledAgents[currentAgentId].spawnableAgents ?? []),
+          ],
+        }
+      : undefined
+    if (selectedFreebuffModel && currentAgentDef) {
+      configureFreebuffBaseAgentForModel(currentAgentDef, selectedFreebuffModel)
+    }
     const spawnableAgentIds = new Set(currentAgentDef?.spawnableAgents ?? [])
-    
+
     // Only include bundled agents that are in the spawnableAgents list
-    filteredBundledAgents = bundledAgentsInfo.filter(agent => 
-      spawnableAgentIds.has(agent.id)
+    filteredBundledAgents = bundledAgentsInfo.filter((agent) =>
+      spawnableAgentIds.has(agent.id),
     )
   } else {
     filteredBundledAgents = bundledAgentsInfo
   }
-  
+
   const results: LocalAgentInfo[] = [...filteredBundledAgents]
-  const includedIds = new Set(filteredBundledAgents.map(a => a.id))
+  const includedIds = new Set(filteredBundledAgents.map((a) => a.id))
 
   // Get user agents from the SDK-loaded cache
   // User agents are always included (not filtered by mode) and can override bundled agents
   const userAgents = getUserAgentsAsLocalInfo()
-  
+
   // Merge user agents - they override bundled agents with same ID
   // and are always included regardless of mode filtering
   for (const userAgent of userAgents) {
     if (includedIds.has(userAgent.id)) {
       // Replace bundled agent with user's version
-      const idx = results.findIndex(a => a.id === userAgent.id)
+      const idx = results.findIndex((a) => a.id === userAgent.id)
       if (idx !== -1) {
         results[idx] = userAgent
       }
@@ -292,7 +377,7 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[]
   const sorted = results.sort((a, b) =>
     a.displayName.localeCompare(b.displayName, 'en'),
   )
-  
+
   cachedAgentsByMode.set(cacheKey, sorted)
   return sorted
 }
@@ -306,7 +391,7 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[]
  * Bundled agents are compiled into the CLI binary at build time.
  * User agents from .agents/ are loaded via SDK at startup and cached.
  * User agents can override bundled agents with the same ID.
- * 
+ *
  * Additionally, all user agent IDs are automatically added to the spawnableAgents
  * of any base agent (agents with IDs starting with 'base'), so users can spawn
  * their custom agents without needing to modify the base agent definition.
@@ -314,17 +399,19 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[]
 export const loadAgentDefinitions = (): AgentDefinition[] => {
   // Start with bundled agents - these are the default Codebuff agents
   const bundledAgents = getBundledAgents()
-  const definitions: AgentDefinition[] = Object.values(bundledAgents).map(def => ({ ...def }))
+  const definitions: AgentDefinition[] = Object.values(bundledAgents).map(
+    (def) => ({ ...def }),
+  )
   const bundledIds = new Set(Object.keys(bundledAgents))
 
   // Get user agents from the SDK-loaded cache
   const userAgentDefs = getUserAgentDefinitions()
-  const userAgentIds = userAgentDefs.map(def => def.id)
+  const userAgentIds = userAgentDefs.map((def) => def.id)
 
   for (const agentDef of userAgentDefs) {
     // User agents override bundled agents with the same ID
     if (bundledIds.has(agentDef.id)) {
-      const idx = definitions.findIndex(d => d.id === agentDef.id)
+      const idx = definitions.findIndex((d) => d.id === agentDef.id)
       if (idx !== -1) {
         definitions[idx] = { ...agentDef }
       }
@@ -380,6 +467,7 @@ export const loadAgentDefinitions = (): AgentDefinition[] => {
       if (FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS.has(def.id)) {
         def.model = selectedModel
       }
+      configureFreebuffBaseAgentForModel(def, selectedModel)
     }
   }
 
diff --git a/common/src/__tests__/free-agents.test.ts b/common/src/__tests__/free-agents.test.ts
new file mode 100644
index 0000000000..e6370c9cc3
--- /dev/null
+++ b/common/src/__tests__/free-agents.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, test } from 'bun:test'
+
+import { FREEBUFF_GEMINI_PRO_MODEL_ID } from '../constants/freebuff-models'
+import { FREEBUFF_GEMINI_THINKER_AGENT_ID } from '../constants/freebuff-gemini-thinker'
+import {
+  isFreebuffGeminiThinkerAgent,
+  isFreeModeAllowedAgentModel,
+} from '../constants/free-agents'
+
+describe('free mode agent model allowlist', () => {
+  test('allows Gemini Pro for the thinker subagent but not the freebuff root', () => {
+    expect(
+      isFreeModeAllowedAgentModel('base2-free', FREEBUFF_GEMINI_PRO_MODEL_ID),
+    ).toBe(false)
+    expect(
+      isFreeModeAllowedAgentModel(
+        FREEBUFF_GEMINI_THINKER_AGENT_ID,
+        FREEBUFF_GEMINI_PRO_MODEL_ID,
+      ),
+    ).toBe(true)
+  })
+
+  test('recognizes the Gemini thinker agent in free mode', () => {
+    expect(isFreebuffGeminiThinkerAgent(FREEBUFF_GEMINI_THINKER_AGENT_ID)).toBe(
+      true,
+    )
+    expect(
+      isFreebuffGeminiThinkerAgent(
+        `codebuff/${FREEBUFF_GEMINI_THINKER_AGENT_ID}@0.0.1`,
+      ),
+    ).toBe(true)
+    expect(
+      isFreebuffGeminiThinkerAgent(
+        `other/${FREEBUFF_GEMINI_THINKER_AGENT_ID}@0.0.1`,
+      ),
+    ).toBe(false)
+  })
+})
diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts
index 10709e2360..fcf1d04db4 100644
--- a/common/src/__tests__/freebuff-models.test.ts
+++ b/common/src/__tests__/freebuff-models.test.ts
@@ -2,7 +2,6 @@ import { describe, expect, test } from 'bun:test'
 
 import {
   DEFAULT_FREEBUFF_MODEL_ID,
-  FREEBUFF_GEMINI_PRO_MODEL_ID,
   FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MODELS,
@@ -10,29 +9,10 @@ import {
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffDeploymentHours,
   isFreebuffModelId,
-  isFreebuffModelAvailable,
   isSupportedFreebuffModelId,
 } from '../constants/freebuff-models'
 
 describe('freebuff model availability', () => {
-  test('includes Gemini 3.1 Pro as an always-available option', () => {
-    expect(FREEBUFF_MODELS.map((model) => model.id)).toContain(
-      FREEBUFF_GEMINI_PRO_MODEL_ID,
-    )
-    expect(
-      isFreebuffModelAvailable(
-        FREEBUFF_GEMINI_PRO_MODEL_ID,
-        new Date('2026-01-05T18:00:00Z'),
-      ),
-    ).toBe(true)
-    expect(
-      isFreebuffModelAvailable(
-        FREEBUFF_GEMINI_PRO_MODEL_ID,
-        new Date('2026-01-05T12:00:00Z'),
-      ),
-    ).toBe(true)
-  })
-
   test('defaults to Kimi K2.6', () => {
     expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID)
   })
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 6d22152c5a..6bc97992d4 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -1,6 +1,10 @@
 import { parseAgentId } from '../util/agent-id-parsing'
 
-import { SUPPORTED_FREEBUFF_MODELS } from './freebuff-models'
+import { FREEBUFF_GEMINI_THINKER_AGENT_ID } from './freebuff-gemini-thinker'
+import {
+  FREEBUFF_GEMINI_PRO_MODEL_ID,
+  SUPPORTED_FREEBUFF_MODELS,
+} from './freebuff-models'
 
 import type { CostMode } from './model-config'
 
@@ -53,6 +57,9 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
+
+  // Kimi freebuff root may spawn Gemini Pro for deeper thinking.
+  [FREEBUFF_GEMINI_THINKER_AGENT_ID]: new Set([FREEBUFF_GEMINI_PRO_MODEL_ID]),
 }
 
 /**
@@ -93,6 +100,13 @@ export function isFreebuffRootAgent(fullAgentId: string): boolean {
   return FREEBUFF_ROOT_AGENT_ID_SET.has(agentId)
 }
 
+export function isFreebuffGeminiThinkerAgent(fullAgentId: string): boolean {
+  const { publisherId, agentId } = parseAgentId(fullAgentId)
+  if (!agentId) return false
+  if (publisherId && publisherId !== 'codebuff') return false
+  return agentId === FREEBUFF_GEMINI_THINKER_AGENT_ID
+}
+
 /**
  * Check if a specific agent is allowed to use a specific model in FREE mode.
  * This is the strictest check - validates both the agent AND model combination.
diff --git a/common/src/constants/freebuff-gemini-thinker.ts b/common/src/constants/freebuff-gemini-thinker.ts
new file mode 100644
index 0000000000..007ac18f00
--- /dev/null
+++ b/common/src/constants/freebuff-gemini-thinker.ts
@@ -0,0 +1,16 @@
+export const FREEBUFF_GEMINI_THINKER_AGENT_ID = 'thinker-with-files-gemini'
+
+export const FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION =
+  "Spawn the thinker-with-files-gemini agent for complex problems -- it's very smart. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths since it has no conversation history."
+
+export const FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT =
+  '- For complex problems, spawn the thinker-with-files-gemini agent after gathering context. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths.'
+
+export const FREEBUFF_GEMINI_THINKER_STEP_PROMPT =
+  'Spawn the thinker-with-files-gemini agent for complex problems, not routine edits. Pass the relevant filePaths.'
+
+export const FREEBUFF_GEMINI_THINKER_PROMPT_LINES = [
+  FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION,
+  FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT,
+  FREEBUFF_GEMINI_THINKER_STEP_PROMPT,
+] as const
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 246731a3f6..884cb6cfe9 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -42,12 +42,6 @@ interface LocalTimeFormatOptions {
 }
 
 export const FREEBUFF_MODELS = [
-  {
-    id: FREEBUFF_GEMINI_PRO_MODEL_ID,
-    displayName: 'Gemini 3.1 Pro',
-    tagline: 'Deepest, 1/day',
-    availability: 'always',
-  },
   {
     id: FREEBUFF_MINIMAX_MODEL_ID,
     displayName: 'MiniMax M2.7',
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index cf846131cf..70599bf6d1 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -161,6 +161,13 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           status: 'running',
         }
       }
+      if (runId === 'run-gemini-thinker-child') {
+        return {
+          agent_id: 'thinker-with-files-gemini',
+          ancestor_run_ids: ['run-free'],
+          status: 'running',
+        }
+      }
       if (runId === 'run-completed') {
         return {
           agent_id: 'agent-123',
@@ -823,7 +830,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       FETCH_PATH_TEST_TIMEOUT_MS,
     )
 
-    it('lets freebuff use Gemini 3.1 Pro through the free-mode allowlist', async () => {
+    it('rejects Gemini 3.1 Pro as a root freebuff model', async () => {
       const req = new NextRequest(
         'http://localhost:3000/api/v1/chat/completions',
         {
@@ -854,7 +861,9 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
       })
 
-      expect(response.status).toBe(200)
+      expect(response.status).toBe(403)
+      const body = await response.json()
+      expect(body.error).toBe('free_mode_invalid_agent_model')
     })
 
     it('rejects standalone free-mode reviewer runs even when the model is allowlisted', async () => {
@@ -864,7 +873,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
           body: JSON.stringify({
-            model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+            model: 'minimax/minimax-m2.7',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-reviewer-direct',
@@ -893,7 +902,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(body.error).toBe('free_mode_invalid_agent_hierarchy')
     })
 
-    it('counts child reviewer Gemini requests toward the free-mode request limit', async () => {
+    it('rejects the Gemini thinker subagent when the session gate rejects it', async () => {
       const response = await postChatCompletions({
         req: new NextRequest('http://localhost:3000/api/v1/chat/completions', {
           method: 'POST',
@@ -902,9 +911,10 @@ describe('/api/v1/chat/completions POST endpoint', () => {
             model: FREEBUFF_GEMINI_PRO_MODEL_ID,
             stream: false,
             codebuff_metadata: {
-              run_id: 'run-reviewer-child',
+              run_id: 'run-gemini-thinker-child',
               client_id: 'test-client-id-123',
               cost_mode: 'free',
+              freebuff_instance_id: 'inst-123',
             },
           }),
         }),
@@ -916,7 +926,53 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         fetch: mockFetch,
         insertMessageBigquery: mockInsertMessageBigquery,
         loggerWithContext: mockLoggerWithContext,
-        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+        checkSessionAdmissible: async (params) => {
+          expect(params.requireActiveSession).toBe(true)
+          expect(params.requestedModel).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID)
+          expect(params.claimedInstanceId).toBe('inst-123')
+          return {
+            ok: false,
+            code: 'session_model_mismatch',
+            message: 'This session is bound to minimax/minimax-m2.7.',
+          }
+        },
+      })
+
+      expect(response.status).toBe(409)
+      const body = await response.json()
+      expect(body.error).toBe('session_model_mismatch')
+    })
+
+    it('requires an active session check for the Gemini thinker subagent', async () => {
+      const response = await postChatCompletions({
+        req: new NextRequest('http://localhost:3000/api/v1/chat/completions', {
+          method: 'POST',
+          headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
+          body: JSON.stringify({
+            model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-gemini-thinker-child',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+              freebuff_instance_id: 'inst-123',
+            },
+          }),
+        }),
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: mockFetch,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: async (params) => {
+          expect(params.requireActiveSession).toBe(true)
+          expect(params.requestedModel).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID)
+          expect(params.claimedInstanceId).toBe('inst-123')
+          return { ok: true, reason: 'active', remainingMs: 60_000 }
+        },
       })
 
       expect(response.status).toBe(200)
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 0a7771d46d..6d2cf868ad 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -1,6 +1,7 @@
 import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
 import { BYOK_OPENROUTER_HEADER } from '@codebuff/common/constants/byok'
 import {
+  isFreebuffGeminiThinkerAgent,
   isFreebuffRootAgent,
   isFreeMode,
   isFreeModeAllowedAgentModel,
@@ -433,11 +434,11 @@ export async function postChatCompletions(params: {
       }
     }
 
-    // Freebuff waiting-room gate. Only enforced for free-mode requests, and
-    // only when FREEBUFF_WAITING_ROOM_ENABLED=true — otherwise this is a
-    // no-op that returns { ok: true, reason: 'disabled' } without a DB hit.
-    // Runs before the rate limiter so rejected requests don't burn a queued
-    // user's free-mode counters.
+    // Freebuff waiting-room gate. Usually enforced only when
+    // FREEBUFF_WAITING_ROOM_ENABLED=true; Gemini thinker children still force
+    // a DB-backed active-session check so their Kimi-only allowance comes from
+    // trusted server state. Runs before the rate limiter so rejected requests
+    // don't burn a queued user's free-mode counters.
     if (isFreeModeRequest) {
       const claimedInstanceId =
         typedBody.codebuff_metadata?.freebuff_instance_id
@@ -446,6 +447,7 @@ export async function postChatCompletions(params: {
         userEmail: userInfo.email,
         claimedInstanceId,
         requestedModel: typedBody.model,
+        requireActiveSession: isFreebuffGeminiThinkerAgent(agentId),
       })
       if (!gate.ok) {
         trackEvent({
diff --git a/web/src/llm-api/types.ts b/web/src/llm-api/types.ts
index dd3b89a4d7..66a3425a52 100644
--- a/web/src/llm-api/types.ts
+++ b/web/src/llm-api/types.ts
@@ -83,9 +83,7 @@ export function isChatCompletionRequestBody(
 /**
  * Type guard to check if a value is CodebuffMetadata
  */
-export function isCodebuffMetadata(
-  value: unknown,
-): value is CodebuffMetadata {
+export function isCodebuffMetadata(value: unknown): value is CodebuffMetadata {
   if (typeof value !== 'object' || value === null) {
     return false
   }
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index 265c2872b1..70303ee11e 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -402,56 +402,6 @@ describe('requestSession', () => {
   const KIMI_LIMIT = 5
   const KIMI_WINDOW_HOURS = 12
   const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
-  const GEMINI_LIMIT = 1
-  const GEMINI_WINDOW_HOURS = 24
-
-  test('rate_limited: Gemini 3.1 Pro allows one admit per 24h', async () => {
-    deps._tick(KIMI_OPEN_TIME)
-    const now = deps._now()
-    deps.admits.push({
-      user_id: 'u1',
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      admitted_at: new Date(now.getTime() - 23 * 60 * 60 * 1000),
-    })
-
-    const state = await requestSession({
-      userId: 'u1',
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      deps,
-    })
-    expect(state.status).toBe('rate_limited')
-    if (state.status !== 'rate_limited') throw new Error('unreachable')
-    expect(state.model).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID)
-    expect(state.limit).toBe(GEMINI_LIMIT)
-    expect(state.windowHours).toBe(GEMINI_WINDOW_HOURS)
-    expect(state.recentCount).toBe(GEMINI_LIMIT)
-    expect(state.retryAfterMs).toBe(60 * 60 * 1000)
-    expect(deps.rows.has('u1')).toBe(false)
-  })
-
-  test('rate_limited: Gemini 3.1 Pro admit outside 24h window does not count', async () => {
-    deps._tick(KIMI_OPEN_TIME)
-    const now = deps._now()
-    deps.admits.push({
-      user_id: 'u1',
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      admitted_at: new Date(now.getTime() - 25 * 60 * 60 * 1000),
-    })
-
-    const state = await requestSession({
-      userId: 'u1',
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      deps,
-    })
-    expect(state.status).toBe('queued')
-    if (state.status !== 'queued') throw new Error('unreachable')
-    expect(state.rateLimit).toEqual({
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      limit: GEMINI_LIMIT,
-      windowHours: GEMINI_WINDOW_HOURS,
-      recentCount: 0,
-    })
-  })
 
   test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => {
     deps._tick(KIMI_OPEN_TIME)
@@ -745,25 +695,6 @@ describe('getSessionState', () => {
     expect(state).toEqual({ status: 'none', queueDepthByModel: {} })
   })
 
-  test('no row surfaces exhausted Gemini quota before joining', async () => {
-    const now = deps._now()
-    deps.admits.push({
-      user_id: 'u1',
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      admitted_at: new Date(now.getTime() - 23 * 60 * 60 * 1000),
-    })
-
-    const state = await getSessionState({ userId: 'u1', deps })
-    expect(state.status).toBe('none')
-    if (state.status !== 'none') throw new Error('unreachable')
-    expect(state.rateLimitsByModel?.[FREEBUFF_GEMINI_PRO_MODEL_ID]).toEqual({
-      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-      limit: 1,
-      windowHours: 24,
-      recentCount: 1,
-    })
-  })
-
   test('active session with matching instance id returns active', async () => {
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
@@ -916,6 +847,20 @@ describe('checkSessionAdmissible', () => {
     expect(result.ok).toBe(true)
   })
 
+  test('requireActiveSession ignores disabled shortcut and requires a row', async () => {
+    const offDeps = makeDeps({ isWaitingRoomEnabled: () => false })
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      claimedInstanceId: 'inst-1',
+      requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      requireActiveSession: true,
+      deps: offDeps,
+    })
+    expect(result.ok).toBe(false)
+    if (result.ok) throw new Error('unreachable')
+    expect(result.code).toBe('waiting_room_required')
+  })
+
   test('no session → waiting_room_required', async () => {
     const result = await checkSessionAdmissible({
       userId: 'u1',
@@ -940,6 +885,20 @@ describe('checkSessionAdmissible', () => {
     expect(deps.rows.size).toBe(0)
   })
 
+  test('requireActiveSession ignores bypassed emails', async () => {
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      userEmail: 'team@codebuff.com',
+      claimedInstanceId: 'inst-1',
+      requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      requireActiveSession: true,
+      deps,
+    })
+    expect(result.ok).toBe(false)
+    if (result.ok) throw new Error('unreachable')
+    expect(result.code).toBe('waiting_room_required')
+  })
+
   test('bypassed email is case-insensitive', async () => {
     const result = await checkSessionAdmissible({
       userId: 'u1',
@@ -950,6 +909,31 @@ describe('checkSessionAdmissible', () => {
     expect(result.ok).toBe(true)
   })
 
+  test('requireActiveSession still admits Gemini thinker for Kimi rows when disabled', async () => {
+    const offDeps = makeDeps({ isWaitingRoomEnabled: () => false })
+    const now = offDeps._now()
+    offDeps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'active',
+      active_instance_id: 'inst-1',
+      model: FREEBUFF_KIMI_MODEL_ID,
+      queued_at: now,
+      admitted_at: now,
+      expires_at: new Date(now.getTime() + SESSION_LEN),
+      created_at: now,
+      updated_at: now,
+    })
+
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      claimedInstanceId: 'inst-1',
+      requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      requireActiveSession: true,
+      deps: offDeps,
+    })
+    expect(result.ok).toBe(true)
+  })
+
   test('queued session → waiting_room_queued', async () => {
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const result = await checkSessionAdmissible({
@@ -978,6 +962,42 @@ describe('checkSessionAdmissible', () => {
     expect(result.remainingMs).toBe(SESSION_LEN)
   })
 
+  test('active Kimi session admits Gemini thinker requests', async () => {
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
+    const row = deps.rows.get('u1')!
+    row.model = FREEBUFF_KIMI_MODEL_ID
+    row.status = 'active'
+    row.admitted_at = deps._now()
+    row.expires_at = new Date(deps._now().getTime() + SESSION_LEN)
+
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      claimedInstanceId: row.active_instance_id,
+      requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      requireActiveSession: true,
+      deps,
+    })
+    expect(result.ok).toBe(true)
+  })
+
+  test('active MiniMax session rejects Gemini thinker requests', async () => {
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
+    const row = deps.rows.get('u1')!
+    row.status = 'active'
+    row.admitted_at = deps._now()
+    row.expires_at = new Date(deps._now().getTime() + SESSION_LEN)
+
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      claimedInstanceId: row.active_instance_id,
+      requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      requireActiveSession: true,
+      deps,
+    })
+    if (result.ok) throw new Error('unreachable')
+    expect(result.code).toBe('session_model_mismatch')
+  })
+
   test('active + wrong instance id → session_superseded', async () => {
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index cbde91678d..d3e0d9c556 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -1,5 +1,4 @@
 import {
-  FREEBUFF_GEMINI_PRO_MODEL_ID,
   FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MINIMAX_MODEL_ID,
@@ -54,7 +53,6 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  [FREEBUFF_GEMINI_PRO_MODEL_ID]: 50,
   [FREEBUFF_GLM_MODEL_ID]: 50,
   [FREEBUFF_KIMI_MODEL_ID]: 50,
   [FREEBUFF_MINIMAX_MODEL_ID]: 1000,
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index a311ff9411..c5d48bc417 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -48,7 +48,6 @@ import type {
  * queued/active responses — changing them is a deliberate, typed edit.
  */
 const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
-  [FREEBUFF_GEMINI_PRO_MODEL_ID]: { limit: 1, windowHours: 24 },
   [FREEBUFF_GLM_MODEL_ID]: { limit: 5, windowHours: 12 },
   [FREEBUFF_KIMI_MODEL_ID]: { limit: 5, windowHours: 12 },
 }
@@ -530,6 +529,10 @@ export async function checkSessionAdmissible(params: {
   userId: string
   userEmail?: string | null | undefined
   claimedInstanceId: string | null | undefined
+  /** Forces a real active session row check even when the waiting room is
+   *  globally disabled or the user email normally bypasses it. Use for
+   *  subagent/model combinations that must be bound to trusted session state. */
+  requireActiveSession?: boolean
   /** Model the chat-completions request is for. When provided, the gate
    *  rejects requests whose model doesn't match the active session's model
    *  so a stale CLI tab can't slip a request through under the wrong model. */
@@ -538,8 +541,9 @@ export async function checkSessionAdmissible(params: {
 }): Promise<SessionGateResult> {
   const deps = params.deps ?? defaultDeps
   if (
-    !deps.isWaitingRoomEnabled() ||
-    isWaitingRoomBypassedForEmail(params.userEmail)
+    !params.requireActiveSession &&
+    (!deps.isWaitingRoomEnabled() ||
+      isWaitingRoomBypassedForEmail(params.userEmail))
   ) {
     return { ok: true, reason: 'disabled' }
   }
@@ -602,15 +606,21 @@ export async function checkSessionAdmissible(params: {
     }
   }
 
+  const isKimiSessionGeminiThinker =
+    params.requireActiveSession === true &&
+    params.requestedModel === FREEBUFF_GEMINI_PRO_MODEL_ID &&
+    row.model === FREEBUFF_KIMI_MODEL_ID
+
   // Reject requests for a model the session isn't bound to. Sub-agents may
   // legitimately use other models (Gemini Flash etc.) so we only enforce this
-  // when the caller provides a requestedModel — and only against the set of
-  // supported freebuff models. This includes legacy ids so in-flight sessions
-  // created by older clients stay bound to the model they actually requested.
+  // when the caller provides a requestedModel and it is either a supported
+  // freebuff root model or Kimi's Gemini thinker model.
   if (
     params.requestedModel &&
-    isSupportedFreebuffModelId(params.requestedModel) &&
-    params.requestedModel !== row.model
+    (isSupportedFreebuffModelId(params.requestedModel) ||
+      params.requestedModel === FREEBUFF_GEMINI_PRO_MODEL_ID) &&
+    params.requestedModel !== row.model &&
+    !isKimiSessionGeminiThinker
   ) {
     return {
       ok: false,