From 77e0bd38d7a7bf2dad041971b9c9cfb019aec6f8 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 1 May 2026 14:32:07 -0700 Subject: [PATCH] Restore Gemini thinker for Kimi freebuff --- agents/base2/base2.ts | 18 ++ .../integration/local-agents.test.ts | 87 +++++++++- .../components/freebuff-model-selector.tsx | 22 +-- cli/src/hooks/use-send-message.ts | 34 ++-- cli/src/utils/local-agent-registry.ts | 140 +++++++++++++--- common/src/__tests__/free-agents.test.ts | 38 +++++ common/src/__tests__/freebuff-models.test.ts | 20 --- common/src/constants/free-agents.ts | 16 +- .../src/constants/freebuff-gemini-thinker.ts | 16 ++ common/src/constants/freebuff-models.ts | 6 - .../completions/__tests__/completions.test.ts | 68 +++++++- web/src/app/api/v1/chat/completions/_post.ts | 12 +- web/src/llm-api/types.ts | 4 +- .../free-session/__tests__/public-api.test.ts | 158 ++++++++++-------- web/src/server/free-session/config.ts | 2 - web/src/server/free-session/public-api.ts | 26 ++- 16 files changed, 480 insertions(+), 187 deletions(-) create mode 100644 common/src/__tests__/free-agents.test.ts create mode 100644 common/src/constants/freebuff-gemini-thinker.ts diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index d398b2a920..75bdb4967b 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -1,4 +1,11 @@ import { buildArray } from '@codebuff/common/util/array' +import { FREEBUFF_KIMI_MODEL_ID } from '@codebuff/common/constants/freebuff-models' +import { + FREEBUFF_GEMINI_THINKER_AGENT_ID, + FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, + FREEBUFF_GEMINI_THINKER_STEP_PROMPT, + FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, +} from '@codebuff/common/constants/freebuff-gemini-thinker' import { publisher } from '../constants' import { @@ -32,6 +39,7 @@ export function createBase2( const model = modelOverride ?? (isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7') + const hasFreeGeminiThinker = isFree && model === FREEBUFF_KIMI_MODEL_ID const defaultProviderOptions = isFree ? { data_collection: 'deny' as const, @@ -97,6 +105,7 @@ export function createBase2( isFree && 'code-reviewer-lite', isDefault && 'code-reviewer', isMax && 'code-reviewer-multi-prompt', + hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_AGENT_ID, 'thinker-gpt', 'context-pruner', ), @@ -154,6 +163,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u '- Spawn context-gathering agents (file pickers, code searchers, and web/docs researchers) before making edits. Use the list_directory and glob tools directly for searching and exploring the codebase.', isFree && 'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.', + hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, isDefault && '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', (isDefault || isMax) && @@ -280,6 +290,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} isDefault, isMax, isFree, + hasFreeGeminiThinker, hasNoValidation, noAskUser, }), @@ -292,6 +303,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} hasNoValidation, isSonnet, isFree, + hasFreeGeminiThinker, noAskUser, }), @@ -340,6 +352,7 @@ function buildImplementationInstructionsPrompt({ isDefault, isMax, isFree, + hasFreeGeminiThinker, hasNoValidation, noAskUser, }: { @@ -348,6 +361,7 @@ function buildImplementationInstructionsPrompt({ isDefault: boolean isMax: boolean isFree: boolean + hasFreeGeminiThinker: boolean hasNoValidation: boolean noAskUser: boolean }) { @@ -365,6 +379,7 @@ ${buildArray( 'After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice.', (isDefault || isMax || isFree) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, + hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, (isDefault || isMax) && `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`, isDefault && @@ -395,6 +410,7 @@ function buildImplementationStepPrompt({ hasNoValidation, isSonnet, isFree, + hasFreeGeminiThinker, noAskUser, }: { isDefault: boolean @@ -403,12 +419,14 @@ function buildImplementationStepPrompt({ hasNoValidation: boolean isSonnet: boolean isFree: boolean + hasFreeGeminiThinker: boolean noAskUser: boolean }) { return buildArray( isMax && `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, 'Consider loading relevant skills with the skill tool if they might help with the current task. Do not reload skills that were already loaded earlier in this conversation.', + hasFreeGeminiThinker && FREEBUFF_GEMINI_THINKER_STEP_PROMPT, isMax && `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`, (isDefault || isMax) && diff --git a/cli/src/__tests__/integration/local-agents.test.ts b/cli/src/__tests__/integration/local-agents.test.ts index 5085e77843..2f72db75db 100644 --- a/cli/src/__tests__/integration/local-agents.test.ts +++ b/cli/src/__tests__/integration/local-agents.test.ts @@ -4,13 +4,16 @@ import path from 'path' import { validateAgents } from '@codebuff/sdk' import { - describe, - test, - expect, - beforeEach, - afterEach, - mock, -} from 'bun:test' + FREEBUFF_GEMINI_THINKER_AGENT_ID, + FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, + FREEBUFF_GEMINI_THINKER_STEP_PROMPT, + FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, +} from '@codebuff/common/constants/freebuff-gemini-thinker' +import { + FREEBUFF_KIMI_MODEL_ID, + FREEBUFF_MINIMAX_MODEL_ID, +} from '@codebuff/common/constants/freebuff-models' +import { describe, test, expect, beforeEach, afterEach, mock } from 'bun:test' // Mock the logger to prevent analytics initialization errors in tests mock.module('../../utils/logger', () => ({ @@ -27,6 +30,7 @@ import { setProjectRoot, getProjectRoot } from '../../project-files' import { loadAgentDefinitions, loadLocalAgents, + configureFreebuffBaseAgentForModel, initializeAgentRegistry, findAgentsDirectory, getLoadedAgentsData, @@ -37,6 +41,67 @@ import { const MODEL_NAME = 'anthropic/claude-sonnet-4' +describe('configureFreebuffBaseAgentForModel', () => { + const makeBase2Free = () => ({ + id: 'base2-free', + spawnableAgents: ['file-picker', FREEBUFF_GEMINI_THINKER_AGENT_ID], + systemPrompt: [ + 'before', + FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, + 'after', + ].join('\n'), + instructionsPrompt: [ + 'before', + FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, + 'after', + ].join('\n'), + stepPrompt: ['before', FREEBUFF_GEMINI_THINKER_STEP_PROMPT, 'after'].join( + '\n', + ), + }) + + test('keeps the Gemini thinker and prompt guidance for Kimi', () => { + const definition = makeBase2Free() + + configureFreebuffBaseAgentForModel(definition, FREEBUFF_KIMI_MODEL_ID) + + expect(definition.spawnableAgents).toContain( + FREEBUFF_GEMINI_THINKER_AGENT_ID, + ) + expect(definition.systemPrompt).toContain( + FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, + ) + expect(definition.instructionsPrompt).toContain( + FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, + ) + expect(definition.stepPrompt).toContain(FREEBUFF_GEMINI_THINKER_STEP_PROMPT) + }) + + test('removes only exact Gemini thinker prompt guidance for MiniMax', () => { + const definition = makeBase2Free() + definition.systemPrompt += + '\nUser text mentioning thinker-with-files-gemini should stay.' + + configureFreebuffBaseAgentForModel(definition, FREEBUFF_MINIMAX_MODEL_ID) + + expect(definition.spawnableAgents).not.toContain( + FREEBUFF_GEMINI_THINKER_AGENT_ID, + ) + expect(definition.systemPrompt).not.toContain( + FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, + ) + expect(definition.instructionsPrompt).not.toContain( + FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, + ) + expect(definition.stepPrompt).not.toContain( + FREEBUFF_GEMINI_THINKER_STEP_PROMPT, + ) + expect(definition.systemPrompt).toContain( + 'User text mentioning thinker-with-files-gemini should stay.', + ) + }) +}) + const writeAgentFile = ( agentsDir: string, fileName: string, @@ -408,7 +473,9 @@ describe('Local Agent Integration', () => { expect(uiAgent!.id).toBe('test-ui-agent') // File path should be populated for "Open file" UI links // Use realpathSync to normalize paths (on macOS, /var is a symlink to /private/var) - expect(realpathSync(uiAgent!.filePath!)).toBe(realpathSync(path.join(agentsDir, 'ui-agent.ts'))) + expect(realpathSync(uiAgent!.filePath!)).toBe( + realpathSync(path.join(agentsDir, 'ui-agent.ts')), + ) }) test('loadLocalAgents sorts agents alphabetically by displayName', async () => { @@ -735,7 +802,9 @@ describe('Local Agent Integration', () => { const data = getLoadedAgentsData() expect(data).not.toBeNull() expect(data!.agents.some((a) => a.id === 'test-announce-agent')).toBe(true) - expect(data!.agents.some((a) => a.displayName === 'Announce Test Agent')).toBe(true) + expect( + data!.agents.some((a) => a.displayName === 'Announce Test Agent'), + ).toBe(true) }) // ============================================================================ diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index 0001a4da9a..307c7557ba 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -5,7 +5,6 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react' import { Button } from './button' import { FALLBACK_FREEBUFF_MODEL_ID, - FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MODELS, getFreebuffDeploymentAvailabilityLabel, @@ -23,15 +22,8 @@ import { nextFreebuffModelId } from '../utils/freebuff-model-navigation' import type { KeyEvent } from '@opentui/core' const FREEBUFF_MODEL_SELECTOR_MODELS = [ - ...FREEBUFF_MODELS.filter( - (model) => model.id === FREEBUFF_GEMINI_PRO_MODEL_ID, - ), ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID), - ...FREEBUFF_MODELS.filter( - (model) => - model.id !== FREEBUFF_GEMINI_PRO_MODEL_ID && - model.id !== FREEBUFF_KIMI_MODEL_ID, - ), + ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID), ] /** @@ -121,13 +113,7 @@ export const FreebuffModelSelector: React.FC = () => { // when the user's selection moves between queues. The tagline is shown // inline with the name now, so it's no longer part of this slot. const hintWidth = useMemo( - () => - Math.max( - 'No wait'.length, - '999 ahead'.length, - 'Used today'.length, - 'Limit used'.length, - ), + () => Math.max('No wait'.length, '999 ahead'.length, 'Limit used'.length), [], ) @@ -267,9 +253,7 @@ export const FreebuffModelSelector: React.FC = () => { const hint = !isAvailable ? 'Closed' : isQuotaExhausted - ? model.id === FREEBUFF_GEMINI_PRO_MODEL_ID - ? 'Used today' - : 'Limit used' + ? 'Limit used' : ahead === undefined ? '' : ahead === 0 diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts index 03fc065c05..cdb67f2555 100644 --- a/cli/src/hooks/use-send-message.ts +++ b/cli/src/hooks/use-send-message.ts @@ -5,7 +5,11 @@ import { createStreamController } from './stream-state' import { useChatStore } from '../state/chat-store' import { getFreebuffInstanceId } from './use-freebuff-session' import { getCodebuffClient } from '../utils/codebuff-client' -import { AGENT_MODE_TO_ID, AGENT_MODE_TO_COST_MODE, IS_FREEBUFF } from '../utils/constants' +import { + AGENT_MODE_TO_ID, + AGENT_MODE_TO_COST_MODE, + IS_FREEBUFF, +} from '../utils/constants' import { createEventHandlerState } from '../utils/create-event-handler-state' import { createRunConfig } from '../utils/create-run-config' import { loadAgentDefinitions } from '../utils/local-agent-registry' @@ -108,7 +112,7 @@ export const useSendMessage = ({ onBeforeMessageSend, mainAgentTimer, scrollToLatest, - onTimerEvent = () => { }, + onTimerEvent = () => {}, isQueuePausedRef, isProcessingQueueRef, resumeQueue, @@ -295,13 +299,13 @@ export const useSendMessage = ({ const errorsToAttach = validationResult.errors.length === 0 ? [ - // Hide this for now, as validate endpoint may be flaky and we don't want to bother users. - // { - // id: NETWORK_ERROR_ID, - // message: - // 'Agent validation failed. This may be due to a network issue or temporary server problem. Please try again.', - // }, - ] + // Hide this for now, as validate endpoint may be flaky and we don't want to bother users. + // { + // id: NETWORK_ERROR_ID, + // message: + // 'Agent validation failed. This may be due to a network issue or temporary server problem. Please try again.', + // }, + ] : validationResult.errors setMessages((prev) => @@ -457,12 +461,16 @@ export const useSendMessage = ({ eventHandlerState, signal: abortController.signal, costMode: AGENT_MODE_TO_COST_MODE[agentMode], - extraCodebuffMetadata: freebuffInstanceId - ? { freebuff_instance_id: freebuffInstanceId } - : undefined, + extraCodebuffMetadata: + IS_FREEBUFF && freebuffInstanceId + ? { freebuff_instance_id: freebuffInstanceId } + : undefined, }) - logger.info({ runConfig }, '[send-message] Sending message with sdk run config') + logger.info( + { runConfig }, + '[send-message] Sending message with sdk run config', + ) const runState = await client.run(runConfig) // Finalize: persist state and mark complete diff --git a/cli/src/utils/local-agent-registry.ts b/cli/src/utils/local-agent-registry.ts index 6106b3928e..59b042e147 100644 --- a/cli/src/utils/local-agent-registry.ts +++ b/cli/src/utils/local-agent-registry.ts @@ -3,12 +3,22 @@ import os from 'os' import path from 'path' import { pluralize } from '@codebuff/common/util/string' -import { loadLocalAgents as sdkLoadLocalAgents, loadMCPConfigSync } from '@codebuff/sdk' +import { + loadLocalAgents as sdkLoadLocalAgents, + loadMCPConfigSync, +} from '@codebuff/sdk' import type { MCPConfig } from '@codebuff/common/types/mcp' import { FREE_MODE_AGENT_MODELS } from '@codebuff/common/constants/free-agents' -import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models' +import { + FREEBUFF_GEMINI_THINKER_AGENT_ID, + FREEBUFF_GEMINI_THINKER_PROMPT_LINES, +} from '@codebuff/common/constants/freebuff-gemini-thinker' +import { + FREEBUFF_KIMI_MODEL_ID, + FREEBUFF_MODELS, +} from '@codebuff/common/constants/freebuff-models' import { getSelectedFreebuffModel } from '../state/freebuff-model-store' import { getProjectRoot } from '../project-files' @@ -16,6 +26,8 @@ import { AGENT_MODE_TO_ID, IS_FREEBUFF, type AgentMode } from './constants' import { logger } from './logger' import * as bundledAgentsModule from '../agents/bundled-agents.generated' +import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition' + /** Agents whose hardcoded model gets swapped out for the user's currently * selected freebuff model. Derived from the server's * `FREE_MODE_AGENT_MODELS` — any agent whose allowlist contains every @@ -26,8 +38,55 @@ const FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS: ReadonlySet = new Set( .filter(([, allowed]) => FREEBUFF_MODELS.every((m) => allowed.has(m.id))) .map(([agentId]) => agentId), ) +const FREEBUFF_GEMINI_THINKER_PROMPT_LINE_SET = new Set( + FREEBUFF_GEMINI_THINKER_PROMPT_LINES, +) -import type { AgentDefinition } from '@codebuff/common/templates/initial-agents-dir/types/agent-definition' +type ConfigurableFreebuffBaseAgent = { + id: string + spawnableAgents?: string[] + systemPrompt?: string + instructionsPrompt?: string + stepPrompt?: string +} + +function stripFreebuffGeminiThinkerPrompt(prompt: string): string { + return prompt + .split('\n') + .filter((line) => !FREEBUFF_GEMINI_THINKER_PROMPT_LINE_SET.has(line.trim())) + .join('\n') +} + +export function configureFreebuffBaseAgentForModel( + def: ConfigurableFreebuffBaseAgent, + selectedModel: string, +): void { + if (def.id !== 'base2-free') return + + const hasGeminiThinker = selectedModel === FREEBUFF_KIMI_MODEL_ID + const spawnableAgents = def.spawnableAgents ?? [] + + def.spawnableAgents = hasGeminiThinker + ? Array.from( + new Set([...spawnableAgents, FREEBUFF_GEMINI_THINKER_AGENT_ID]), + ) + : spawnableAgents.filter( + (agentId) => agentId !== FREEBUFF_GEMINI_THINKER_AGENT_ID, + ) + + if (hasGeminiThinker) return + + for (const key of [ + 'systemPrompt', + 'instructionsPrompt', + 'stepPrompt', + ] as const) { + const prompt = def[key] + if (typeof prompt === 'string') { + def[key] = stripFreebuffGeminiThinkerPrompt(prompt) + } + } +} // ============================================================================ // Constants and types @@ -56,12 +115,12 @@ let mcpServersCache: Record = {} /** * Initialize the agent registry by loading user agents via the SDK. * This must be called at CLI startup before any sync agent loading functions. - * + * * Agents are loaded from: * - {cwd}/.agents (project) * - {cwd}/../.agents (parent, e.g. monorepo root) * - ~/.agents (global, user's home directory) - * + * * Later directories take precedence, so project agents override global ones. */ export async function initializeAgentRegistry(): Promise { @@ -72,7 +131,10 @@ export async function initializeAgentRegistry(): Promise { userAgentFilePaths = buildAgentFilePathMap(getDefaultAgentDirs()) } catch (error) { // Fall back to empty cache if SDK loading fails, but log a warning - logger.warn({ error }, 'Failed to load user agents from .agents directories') + logger.warn( + { error }, + 'Failed to load user agents from .agents directories', + ) userAgentsCache = {} userAgentFilePaths = new Map() } @@ -83,7 +145,10 @@ export async function initializeAgentRegistry(): Promise { mcpServersCache = mcpConfig.mcpServers if (Object.keys(mcpServersCache).length > 0) { logger.debug( - { mcpServers: Object.keys(mcpServersCache), source: mcpConfig._sourceFilePath }, + { + mcpServers: Object.keys(mcpServersCache), + source: mcpConfig._sourceFilePath, + }, '[agents] Loaded MCP servers from mcp.json', ) } @@ -112,7 +177,7 @@ const getDefaultAgentDirs = (): string[] => { const buildAgentFilePathMap = (agentsDirs: string[]): Map => { const idToPath = new Map() const idRegex = /id\s*:\s*['"`]([^'"`]+)['"`]/i - + const scanDirectory = (dir: string): void => { try { const entries = fs.readdirSync(dir, { withFileTypes: true }) @@ -122,7 +187,12 @@ const buildAgentFilePathMap = (agentsDirs: string[]): Map => { scanDirectory(fullPath) continue } - if (!entry.isFile() || !entry.name.endsWith('.ts') || entry.name.endsWith('.d.ts') || entry.name.endsWith('.test.ts')) { + if ( + !entry.isFile() || + !entry.name.endsWith('.ts') || + entry.name.endsWith('.d.ts') || + entry.name.endsWith('.test.ts') + ) { continue } try { @@ -139,7 +209,7 @@ const buildAgentFilePathMap = (agentsDirs: string[]): Map => { // Skip directories that can't be read } } - + // Scan all directories - later directories override earlier ones for (const agentsDir of agentsDirs) { scanDirectory(agentsDir) @@ -235,13 +305,18 @@ const cachedAgentsByMode: Map = new Map() /** * Load local agents for display in the '@' menu. - * + * * @param currentAgentMode - If provided, filters bundled agents to only include * subagents of the current mode's agent (e.g., base2's spawnableAgents for DEFAULT mode). * User's local agents from .agents/ are always included regardless of mode. */ -export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[] => { - const cacheKey = currentAgentMode ?? 'all' +export const loadLocalAgents = ( + currentAgentMode?: AgentMode, +): LocalAgentInfo[] => { + const selectedFreebuffModel = IS_FREEBUFF ? getSelectedFreebuffModel() : null + const cacheKey = selectedFreebuffModel + ? `${currentAgentMode ?? 'all'}:${selectedFreebuffModel}` + : (currentAgentMode ?? 'all') const cached = cachedAgentsByMode.get(cacheKey) if (cached) { return cached @@ -251,35 +326,45 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[] // compiled into the CLI binary at build time const bundledAgentsInfo = getBundledAgentsAsLocalInfo() const bundledAgents = getBundledAgents() - + // Filter bundled agents to only include subagents of the current mode's agent let filteredBundledAgents: LocalAgentInfo[] if (currentAgentMode) { const currentAgentId = AGENT_MODE_TO_ID[currentAgentMode] const currentAgentDef = bundledAgents[currentAgentId] + ? { + ...bundledAgents[currentAgentId], + spawnableAgents: [ + ...(bundledAgents[currentAgentId].spawnableAgents ?? []), + ], + } + : undefined + if (selectedFreebuffModel && currentAgentDef) { + configureFreebuffBaseAgentForModel(currentAgentDef, selectedFreebuffModel) + } const spawnableAgentIds = new Set(currentAgentDef?.spawnableAgents ?? []) - + // Only include bundled agents that are in the spawnableAgents list - filteredBundledAgents = bundledAgentsInfo.filter(agent => - spawnableAgentIds.has(agent.id) + filteredBundledAgents = bundledAgentsInfo.filter((agent) => + spawnableAgentIds.has(agent.id), ) } else { filteredBundledAgents = bundledAgentsInfo } - + const results: LocalAgentInfo[] = [...filteredBundledAgents] - const includedIds = new Set(filteredBundledAgents.map(a => a.id)) + const includedIds = new Set(filteredBundledAgents.map((a) => a.id)) // Get user agents from the SDK-loaded cache // User agents are always included (not filtered by mode) and can override bundled agents const userAgents = getUserAgentsAsLocalInfo() - + // Merge user agents - they override bundled agents with same ID // and are always included regardless of mode filtering for (const userAgent of userAgents) { if (includedIds.has(userAgent.id)) { // Replace bundled agent with user's version - const idx = results.findIndex(a => a.id === userAgent.id) + const idx = results.findIndex((a) => a.id === userAgent.id) if (idx !== -1) { results[idx] = userAgent } @@ -292,7 +377,7 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[] const sorted = results.sort((a, b) => a.displayName.localeCompare(b.displayName, 'en'), ) - + cachedAgentsByMode.set(cacheKey, sorted) return sorted } @@ -306,7 +391,7 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[] * Bundled agents are compiled into the CLI binary at build time. * User agents from .agents/ are loaded via SDK at startup and cached. * User agents can override bundled agents with the same ID. - * + * * Additionally, all user agent IDs are automatically added to the spawnableAgents * of any base agent (agents with IDs starting with 'base'), so users can spawn * their custom agents without needing to modify the base agent definition. @@ -314,17 +399,19 @@ export const loadLocalAgents = (currentAgentMode?: AgentMode): LocalAgentInfo[] export const loadAgentDefinitions = (): AgentDefinition[] => { // Start with bundled agents - these are the default Codebuff agents const bundledAgents = getBundledAgents() - const definitions: AgentDefinition[] = Object.values(bundledAgents).map(def => ({ ...def })) + const definitions: AgentDefinition[] = Object.values(bundledAgents).map( + (def) => ({ ...def }), + ) const bundledIds = new Set(Object.keys(bundledAgents)) // Get user agents from the SDK-loaded cache const userAgentDefs = getUserAgentDefinitions() - const userAgentIds = userAgentDefs.map(def => def.id) + const userAgentIds = userAgentDefs.map((def) => def.id) for (const agentDef of userAgentDefs) { // User agents override bundled agents with the same ID if (bundledIds.has(agentDef.id)) { - const idx = definitions.findIndex(d => d.id === agentDef.id) + const idx = definitions.findIndex((d) => d.id === agentDef.id) if (idx !== -1) { definitions[idx] = { ...agentDef } } @@ -380,6 +467,7 @@ export const loadAgentDefinitions = (): AgentDefinition[] => { if (FREEBUFF_MODEL_OVERRIDABLE_AGENT_IDS.has(def.id)) { def.model = selectedModel } + configureFreebuffBaseAgentForModel(def, selectedModel) } } diff --git a/common/src/__tests__/free-agents.test.ts b/common/src/__tests__/free-agents.test.ts new file mode 100644 index 0000000000..e6370c9cc3 --- /dev/null +++ b/common/src/__tests__/free-agents.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, test } from 'bun:test' + +import { FREEBUFF_GEMINI_PRO_MODEL_ID } from '../constants/freebuff-models' +import { FREEBUFF_GEMINI_THINKER_AGENT_ID } from '../constants/freebuff-gemini-thinker' +import { + isFreebuffGeminiThinkerAgent, + isFreeModeAllowedAgentModel, +} from '../constants/free-agents' + +describe('free mode agent model allowlist', () => { + test('allows Gemini Pro for the thinker subagent but not the freebuff root', () => { + expect( + isFreeModeAllowedAgentModel('base2-free', FREEBUFF_GEMINI_PRO_MODEL_ID), + ).toBe(false) + expect( + isFreeModeAllowedAgentModel( + FREEBUFF_GEMINI_THINKER_AGENT_ID, + FREEBUFF_GEMINI_PRO_MODEL_ID, + ), + ).toBe(true) + }) + + test('recognizes the Gemini thinker agent in free mode', () => { + expect(isFreebuffGeminiThinkerAgent(FREEBUFF_GEMINI_THINKER_AGENT_ID)).toBe( + true, + ) + expect( + isFreebuffGeminiThinkerAgent( + `codebuff/${FREEBUFF_GEMINI_THINKER_AGENT_ID}@0.0.1`, + ), + ).toBe(true) + expect( + isFreebuffGeminiThinkerAgent( + `other/${FREEBUFF_GEMINI_THINKER_AGENT_ID}@0.0.1`, + ), + ).toBe(false) + }) +}) diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts index 10709e2360..fcf1d04db4 100644 --- a/common/src/__tests__/freebuff-models.test.ts +++ b/common/src/__tests__/freebuff-models.test.ts @@ -2,7 +2,6 @@ import { describe, expect, test } from 'bun:test' import { DEFAULT_FREEBUFF_MODEL_ID, - FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MODELS, @@ -10,29 +9,10 @@ import { getFreebuffDeploymentAvailabilityLabel, isFreebuffDeploymentHours, isFreebuffModelId, - isFreebuffModelAvailable, isSupportedFreebuffModelId, } from '../constants/freebuff-models' describe('freebuff model availability', () => { - test('includes Gemini 3.1 Pro as an always-available option', () => { - expect(FREEBUFF_MODELS.map((model) => model.id)).toContain( - FREEBUFF_GEMINI_PRO_MODEL_ID, - ) - expect( - isFreebuffModelAvailable( - FREEBUFF_GEMINI_PRO_MODEL_ID, - new Date('2026-01-05T18:00:00Z'), - ), - ).toBe(true) - expect( - isFreebuffModelAvailable( - FREEBUFF_GEMINI_PRO_MODEL_ID, - new Date('2026-01-05T12:00:00Z'), - ), - ).toBe(true) - }) - test('defaults to Kimi K2.6', () => { expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID) }) diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 6d22152c5a..6bc97992d4 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -1,6 +1,10 @@ import { parseAgentId } from '../util/agent-id-parsing' -import { SUPPORTED_FREEBUFF_MODELS } from './freebuff-models' +import { FREEBUFF_GEMINI_THINKER_AGENT_ID } from './freebuff-gemini-thinker' +import { + FREEBUFF_GEMINI_PRO_MODEL_ID, + SUPPORTED_FREEBUFF_MODELS, +} from './freebuff-models' import type { CostMode } from './model-config' @@ -53,6 +57,9 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Code reviewer for free mode 'code-reviewer-lite': new Set(FREEBUFF_ALLOWED_MODEL_IDS), + + // Kimi freebuff root may spawn Gemini Pro for deeper thinking. + [FREEBUFF_GEMINI_THINKER_AGENT_ID]: new Set([FREEBUFF_GEMINI_PRO_MODEL_ID]), } /** @@ -93,6 +100,13 @@ export function isFreebuffRootAgent(fullAgentId: string): boolean { return FREEBUFF_ROOT_AGENT_ID_SET.has(agentId) } +export function isFreebuffGeminiThinkerAgent(fullAgentId: string): boolean { + const { publisherId, agentId } = parseAgentId(fullAgentId) + if (!agentId) return false + if (publisherId && publisherId !== 'codebuff') return false + return agentId === FREEBUFF_GEMINI_THINKER_AGENT_ID +} + /** * Check if a specific agent is allowed to use a specific model in FREE mode. * This is the strictest check - validates both the agent AND model combination. diff --git a/common/src/constants/freebuff-gemini-thinker.ts b/common/src/constants/freebuff-gemini-thinker.ts new file mode 100644 index 0000000000..007ac18f00 --- /dev/null +++ b/common/src/constants/freebuff-gemini-thinker.ts @@ -0,0 +1,16 @@ +export const FREEBUFF_GEMINI_THINKER_AGENT_ID = 'thinker-with-files-gemini' + +export const FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION = + "Spawn the thinker-with-files-gemini agent for complex problems -- it's very smart. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths since it has no conversation history." + +export const FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT = + '- For complex problems, spawn the thinker-with-files-gemini agent after gathering context. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths.' + +export const FREEBUFF_GEMINI_THINKER_STEP_PROMPT = + 'Spawn the thinker-with-files-gemini agent for complex problems, not routine edits. Pass the relevant filePaths.' + +export const FREEBUFF_GEMINI_THINKER_PROMPT_LINES = [ + FREEBUFF_GEMINI_THINKER_SYSTEM_INSTRUCTION, + FREEBUFF_GEMINI_THINKER_INSTRUCTIONS_PROMPT, + FREEBUFF_GEMINI_THINKER_STEP_PROMPT, +] as const diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 246731a3f6..884cb6cfe9 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -42,12 +42,6 @@ interface LocalTimeFormatOptions { } export const FREEBUFF_MODELS = [ - { - id: FREEBUFF_GEMINI_PRO_MODEL_ID, - displayName: 'Gemini 3.1 Pro', - tagline: 'Deepest, 1/day', - availability: 'always', - }, { id: FREEBUFF_MINIMAX_MODEL_ID, displayName: 'MiniMax M2.7', diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index cf846131cf..70599bf6d1 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -161,6 +161,13 @@ describe('/api/v1/chat/completions POST endpoint', () => { status: 'running', } } + if (runId === 'run-gemini-thinker-child') { + return { + agent_id: 'thinker-with-files-gemini', + ancestor_run_ids: ['run-free'], + status: 'running', + } + } if (runId === 'run-completed') { return { agent_id: 'agent-123', @@ -823,7 +830,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { FETCH_PATH_TEST_TIMEOUT_MS, ) - it('lets freebuff use Gemini 3.1 Pro through the free-mode allowlist', async () => { + it('rejects Gemini 3.1 Pro as a root freebuff model', async () => { const req = new NextRequest( 'http://localhost:3000/api/v1/chat/completions', { @@ -854,7 +861,9 @@ describe('/api/v1/chat/completions POST endpoint', () => { checkSessionAdmissible: mockCheckSessionAdmissibleAllow, }) - expect(response.status).toBe(200) + expect(response.status).toBe(403) + const body = await response.json() + expect(body.error).toBe('free_mode_invalid_agent_model') }) it('rejects standalone free-mode reviewer runs even when the model is allowlisted', async () => { @@ -864,7 +873,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), body: JSON.stringify({ - model: FREEBUFF_GEMINI_PRO_MODEL_ID, + model: 'minimax/minimax-m2.7', stream: false, codebuff_metadata: { run_id: 'run-reviewer-direct', @@ -893,7 +902,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(body.error).toBe('free_mode_invalid_agent_hierarchy') }) - it('counts child reviewer Gemini requests toward the free-mode request limit', async () => { + it('rejects the Gemini thinker subagent when the session gate rejects it', async () => { const response = await postChatCompletions({ req: new NextRequest('http://localhost:3000/api/v1/chat/completions', { method: 'POST', @@ -902,9 +911,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { model: FREEBUFF_GEMINI_PRO_MODEL_ID, stream: false, codebuff_metadata: { - run_id: 'run-reviewer-child', + run_id: 'run-gemini-thinker-child', client_id: 'test-client-id-123', cost_mode: 'free', + freebuff_instance_id: 'inst-123', }, }), }), @@ -916,7 +926,53 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, - checkSessionAdmissible: mockCheckSessionAdmissibleAllow, + checkSessionAdmissible: async (params) => { + expect(params.requireActiveSession).toBe(true) + expect(params.requestedModel).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID) + expect(params.claimedInstanceId).toBe('inst-123') + return { + ok: false, + code: 'session_model_mismatch', + message: 'This session is bound to minimax/minimax-m2.7.', + } + }, + }) + + expect(response.status).toBe(409) + const body = await response.json() + expect(body.error).toBe('session_model_mismatch') + }) + + it('requires an active session check for the Gemini thinker subagent', async () => { + const response = await postChatCompletions({ + req: new NextRequest('http://localhost:3000/api/v1/chat/completions', { + method: 'POST', + headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), + body: JSON.stringify({ + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + stream: false, + codebuff_metadata: { + run_id: 'run-gemini-thinker-child', + client_id: 'test-client-id-123', + cost_mode: 'free', + freebuff_instance_id: 'inst-123', + }, + }), + }), + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: mockFetch, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: async (params) => { + expect(params.requireActiveSession).toBe(true) + expect(params.requestedModel).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID) + expect(params.claimedInstanceId).toBe('inst-123') + return { ok: true, reason: 'active', remainingMs: 60_000 } + }, }) expect(response.status).toBe(200) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 0a7771d46d..6d2cf868ad 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -1,6 +1,7 @@ import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' import { BYOK_OPENROUTER_HEADER } from '@codebuff/common/constants/byok' import { + isFreebuffGeminiThinkerAgent, isFreebuffRootAgent, isFreeMode, isFreeModeAllowedAgentModel, @@ -433,11 +434,11 @@ export async function postChatCompletions(params: { } } - // Freebuff waiting-room gate. Only enforced for free-mode requests, and - // only when FREEBUFF_WAITING_ROOM_ENABLED=true — otherwise this is a - // no-op that returns { ok: true, reason: 'disabled' } without a DB hit. - // Runs before the rate limiter so rejected requests don't burn a queued - // user's free-mode counters. + // Freebuff waiting-room gate. Usually enforced only when + // FREEBUFF_WAITING_ROOM_ENABLED=true; Gemini thinker children still force + // a DB-backed active-session check so their Kimi-only allowance comes from + // trusted server state. Runs before the rate limiter so rejected requests + // don't burn a queued user's free-mode counters. if (isFreeModeRequest) { const claimedInstanceId = typedBody.codebuff_metadata?.freebuff_instance_id @@ -446,6 +447,7 @@ export async function postChatCompletions(params: { userEmail: userInfo.email, claimedInstanceId, requestedModel: typedBody.model, + requireActiveSession: isFreebuffGeminiThinkerAgent(agentId), }) if (!gate.ok) { trackEvent({ diff --git a/web/src/llm-api/types.ts b/web/src/llm-api/types.ts index dd3b89a4d7..66a3425a52 100644 --- a/web/src/llm-api/types.ts +++ b/web/src/llm-api/types.ts @@ -83,9 +83,7 @@ export function isChatCompletionRequestBody( /** * Type guard to check if a value is CodebuffMetadata */ -export function isCodebuffMetadata( - value: unknown, -): value is CodebuffMetadata { +export function isCodebuffMetadata(value: unknown): value is CodebuffMetadata { if (typeof value !== 'object' || value === null) { return false } diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index 265c2872b1..70303ee11e 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -402,56 +402,6 @@ describe('requestSession', () => { const KIMI_LIMIT = 5 const KIMI_WINDOW_HOURS = 12 const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z') - const GEMINI_LIMIT = 1 - const GEMINI_WINDOW_HOURS = 24 - - test('rate_limited: Gemini 3.1 Pro allows one admit per 24h', async () => { - deps._tick(KIMI_OPEN_TIME) - const now = deps._now() - deps.admits.push({ - user_id: 'u1', - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - admitted_at: new Date(now.getTime() - 23 * 60 * 60 * 1000), - }) - - const state = await requestSession({ - userId: 'u1', - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - deps, - }) - expect(state.status).toBe('rate_limited') - if (state.status !== 'rate_limited') throw new Error('unreachable') - expect(state.model).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID) - expect(state.limit).toBe(GEMINI_LIMIT) - expect(state.windowHours).toBe(GEMINI_WINDOW_HOURS) - expect(state.recentCount).toBe(GEMINI_LIMIT) - expect(state.retryAfterMs).toBe(60 * 60 * 1000) - expect(deps.rows.has('u1')).toBe(false) - }) - - test('rate_limited: Gemini 3.1 Pro admit outside 24h window does not count', async () => { - deps._tick(KIMI_OPEN_TIME) - const now = deps._now() - deps.admits.push({ - user_id: 'u1', - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - admitted_at: new Date(now.getTime() - 25 * 60 * 60 * 1000), - }) - - const state = await requestSession({ - userId: 'u1', - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - deps, - }) - expect(state.status).toBe('queued') - if (state.status !== 'queued') throw new Error('unreachable') - expect(state.rateLimit).toEqual({ - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - limit: GEMINI_LIMIT, - windowHours: GEMINI_WINDOW_HOURS, - recentCount: 0, - }) - }) test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => { deps._tick(KIMI_OPEN_TIME) @@ -745,25 +695,6 @@ describe('getSessionState', () => { expect(state).toEqual({ status: 'none', queueDepthByModel: {} }) }) - test('no row surfaces exhausted Gemini quota before joining', async () => { - const now = deps._now() - deps.admits.push({ - user_id: 'u1', - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - admitted_at: new Date(now.getTime() - 23 * 60 * 60 * 1000), - }) - - const state = await getSessionState({ userId: 'u1', deps }) - expect(state.status).toBe('none') - if (state.status !== 'none') throw new Error('unreachable') - expect(state.rateLimitsByModel?.[FREEBUFF_GEMINI_PRO_MODEL_ID]).toEqual({ - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - limit: 1, - windowHours: 24, - recentCount: 1, - }) - }) - test('active session with matching instance id returns active', async () => { await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! @@ -916,6 +847,20 @@ describe('checkSessionAdmissible', () => { expect(result.ok).toBe(true) }) + test('requireActiveSession ignores disabled shortcut and requires a row', async () => { + const offDeps = makeDeps({ isWaitingRoomEnabled: () => false }) + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: 'inst-1', + requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID, + requireActiveSession: true, + deps: offDeps, + }) + expect(result.ok).toBe(false) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('waiting_room_required') + }) + test('no session → waiting_room_required', async () => { const result = await checkSessionAdmissible({ userId: 'u1', @@ -940,6 +885,20 @@ describe('checkSessionAdmissible', () => { expect(deps.rows.size).toBe(0) }) + test('requireActiveSession ignores bypassed emails', async () => { + const result = await checkSessionAdmissible({ + userId: 'u1', + userEmail: 'team@codebuff.com', + claimedInstanceId: 'inst-1', + requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID, + requireActiveSession: true, + deps, + }) + expect(result.ok).toBe(false) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('waiting_room_required') + }) + test('bypassed email is case-insensitive', async () => { const result = await checkSessionAdmissible({ userId: 'u1', @@ -950,6 +909,31 @@ describe('checkSessionAdmissible', () => { expect(result.ok).toBe(true) }) + test('requireActiveSession still admits Gemini thinker for Kimi rows when disabled', async () => { + const offDeps = makeDeps({ isWaitingRoomEnabled: () => false }) + const now = offDeps._now() + offDeps.rows.set('u1', { + user_id: 'u1', + status: 'active', + active_instance_id: 'inst-1', + model: FREEBUFF_KIMI_MODEL_ID, + queued_at: now, + admitted_at: now, + expires_at: new Date(now.getTime() + SESSION_LEN), + created_at: now, + updated_at: now, + }) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: 'inst-1', + requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID, + requireActiveSession: true, + deps: offDeps, + }) + expect(result.ok).toBe(true) + }) + test('queued session → waiting_room_queued', async () => { await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const result = await checkSessionAdmissible({ @@ -978,6 +962,42 @@ describe('checkSessionAdmissible', () => { expect(result.remainingMs).toBe(SESSION_LEN) }) + test('active Kimi session admits Gemini thinker requests', async () => { + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + const row = deps.rows.get('u1')! + row.model = FREEBUFF_KIMI_MODEL_ID + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID, + requireActiveSession: true, + deps, + }) + expect(result.ok).toBe(true) + }) + + test('active MiniMax session rejects Gemini thinker requests', async () => { + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + const row = deps.rows.get('u1')! + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + requestedModel: FREEBUFF_GEMINI_PRO_MODEL_ID, + requireActiveSession: true, + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_model_mismatch') + }) + test('active + wrong instance id → session_superseded', async () => { await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index cbde91678d..d3e0d9c556 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -1,5 +1,4 @@ import { - FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, @@ -54,7 +53,6 @@ export function getSessionGraceMs(): number { * queue). */ const INSTANT_ADMIT_CAPACITY: Record = { - [FREEBUFF_GEMINI_PRO_MODEL_ID]: 50, [FREEBUFF_GLM_MODEL_ID]: 50, [FREEBUFF_KIMI_MODEL_ID]: 50, [FREEBUFF_MINIMAX_MODEL_ID]: 1000, diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index a311ff9411..c5d48bc417 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -48,7 +48,6 @@ import type { * queued/active responses — changing them is a deliberate, typed edit. */ const RATE_LIMITS: Record = { - [FREEBUFF_GEMINI_PRO_MODEL_ID]: { limit: 1, windowHours: 24 }, [FREEBUFF_GLM_MODEL_ID]: { limit: 5, windowHours: 12 }, [FREEBUFF_KIMI_MODEL_ID]: { limit: 5, windowHours: 12 }, } @@ -530,6 +529,10 @@ export async function checkSessionAdmissible(params: { userId: string userEmail?: string | null | undefined claimedInstanceId: string | null | undefined + /** Forces a real active session row check even when the waiting room is + * globally disabled or the user email normally bypasses it. Use for + * subagent/model combinations that must be bound to trusted session state. */ + requireActiveSession?: boolean /** Model the chat-completions request is for. When provided, the gate * rejects requests whose model doesn't match the active session's model * so a stale CLI tab can't slip a request through under the wrong model. */ @@ -538,8 +541,9 @@ export async function checkSessionAdmissible(params: { }): Promise { const deps = params.deps ?? defaultDeps if ( - !deps.isWaitingRoomEnabled() || - isWaitingRoomBypassedForEmail(params.userEmail) + !params.requireActiveSession && + (!deps.isWaitingRoomEnabled() || + isWaitingRoomBypassedForEmail(params.userEmail)) ) { return { ok: true, reason: 'disabled' } } @@ -602,15 +606,21 @@ export async function checkSessionAdmissible(params: { } } + const isKimiSessionGeminiThinker = + params.requireActiveSession === true && + params.requestedModel === FREEBUFF_GEMINI_PRO_MODEL_ID && + row.model === FREEBUFF_KIMI_MODEL_ID + // Reject requests for a model the session isn't bound to. Sub-agents may // legitimately use other models (Gemini Flash etc.) so we only enforce this - // when the caller provides a requestedModel — and only against the set of - // supported freebuff models. This includes legacy ids so in-flight sessions - // created by older clients stay bound to the model they actually requested. + // when the caller provides a requestedModel and it is either a supported + // freebuff root model or Kimi's Gemini thinker model. if ( params.requestedModel && - isSupportedFreebuffModelId(params.requestedModel) && - params.requestedModel !== row.model + (isSupportedFreebuffModelId(params.requestedModel) || + params.requestedModel === FREEBUFF_GEMINI_PRO_MODEL_ID) && + params.requestedModel !== row.model && + !isKimiSessionGeminiThinker ) { return { ok: false,