diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 8a6b65760d..9e14909f89 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -62,9 +62,9 @@ describe('editor agent', () => { expect(gpt5Editor.model).toBe('openai/gpt-5.1') }) - test('creates minimax editor', () => { - const minimaxEditor = createCodeEditor({ model: 'minimax' }) - expect(minimaxEditor.model).toBe('minimax/minimax-m2.5') + test('creates glm editor', () => { + const glmEditor = createCodeEditor({ model: 'glm' }) + expect(glmEditor.model).toBe('z-ai/glm-5.1') }) test('gpt-5 editor does not include think tags in instructions', () => { @@ -74,9 +74,9 @@ describe('editor agent', () => { }) test('glm editor does not include think tags in instructions', () => { - const minimaxEditor = createCodeEditor({ model: 'minimax' }) - expect(minimaxEditor.instructionsPrompt).not.toContain('') - expect(minimaxEditor.instructionsPrompt).not.toContain('') + const glmEditor = createCodeEditor({ model: 'glm' }) + expect(glmEditor.instructionsPrompt).not.toContain('') + expect(glmEditor.instructionsPrompt).not.toContain('') }) test('opus editor includes think tags in instructions', () => { @@ -88,17 +88,17 @@ describe('editor agent', () => { test('all variants have same base properties', () => { const opusEditor = createCodeEditor({ model: 'opus' }) const gpt5Editor = createCodeEditor({ model: 'gpt-5' }) - const minimaxEditor = createCodeEditor({ model: 'minimax' }) + const glmEditor = createCodeEditor({ model: 'glm' }) // All should have same basic structure expect(opusEditor.displayName).toBe(gpt5Editor.displayName) - expect(gpt5Editor.displayName).toBe(minimaxEditor.displayName) + expect(gpt5Editor.displayName).toBe(glmEditor.displayName) expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode) - expect(gpt5Editor.outputMode).toBe(minimaxEditor.outputMode) + expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode) expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames) - expect(gpt5Editor.toolNames).toEqual(minimaxEditor.toolNames) + expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames) }) }) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index d2ff6c7578..22a58d82a9 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -28,7 +28,7 @@ export function createBase2( return { publisher, - model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', + model: isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { data_collection: 'deny', } : { diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts index 9cb5675b5e..29225f0c29 100644 --- a/agents/editor/editor-lite.ts +++ b/agents/editor/editor-lite.ts @@ -3,7 +3,7 @@ import { createCodeEditor } from './editor' import type { AgentDefinition } from '../types/agent-definition' const definition: AgentDefinition = { - ...createCodeEditor({ model: 'minimax' }), + ...createCodeEditor({ model: 'glm' }), id: 'editor-lite', } export default definition diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index 6beb22d221..e191609ad2 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -4,7 +4,7 @@ import { publisher } from '../constants' import type { AgentDefinition } from '../types/agent-definition' export const createCodeEditor = (options: { - model: 'gpt-5' | 'opus' | 'minimax' + model: 'gpt-5' | 'opus' | 'glm' }): Omit => { const { model } = options return { @@ -12,8 +12,8 @@ export const createCodeEditor = (options: { model: options.model === 'gpt-5' ? 'openai/gpt-5.1' - : options.model === 'minimax' - ? 'minimax/minimax-m2.5' + : options.model === 'glm' + ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6', ...(options.model === 'opus' && { providerOptions: { @@ -65,7 +65,7 @@ OR for new files or major rewrites: } -${model === 'gpt-5' || model === 'minimax' +${model === 'gpt-5' || model === 'glm' ? '' : `Before you start writing your implementation, you should use tags to think about the best way to implement the changes. diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts index f1baa7dffc..feafb87c45 100644 --- a/agents/reviewer/code-reviewer-lite.ts +++ b/agents/reviewer/code-reviewer-lite.ts @@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer' const definition: SecretAgentDefinition = { id: 'code-reviewer-lite', publisher, - ...createReviewer('minimax/minimax-m2.5'), + ...createReviewer('z-ai/glm-5.1'), } export default definition diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index abbcbc0cda..522994ac27 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -424,6 +424,7 @@ export type ModelName = | 'moonshotai/kimi-k2.5' | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' + | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' | 'z-ai/glm-4.6:nitro' | 'z-ai/glm-4.7' diff --git a/cli/src/components/choice-ad-banner.tsx b/cli/src/components/choice-ad-banner.tsx index 5a72e89ab5..7ca3f1d4ac 100644 --- a/cli/src/components/choice-ad-banner.tsx +++ b/cli/src/components/choice-ad-banner.tsx @@ -70,7 +70,7 @@ export const ChoiceAdBanner: React.FC = ({ ads, onImpressio } }, [visibleAds, onImpression]) - const hoverBorderColor = theme.link + const hoverBorderColor = theme.primary return ( = ({ ads, onImpressio diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index 3055f4e2c2..c68535d78d 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -1,5 +1,7 @@ import path from 'path' +import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents' + import { createEventHandler, createStreamChunkHandler, @@ -109,7 +111,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { content, previousRun: previousRunState ?? undefined, agentDefinitions, - maxAgentSteps: 100, + maxAgentSteps: MAX_AGENT_STEPS_DEFAULT, handleStreamChunk: createStreamChunkHandler(eventHandlerState), handleEvent: createEventHandler(eventHandlerState), signal: params.signal, diff --git a/common/src/constants/agents.ts b/common/src/constants/agents.ts index 01b92e37d4..5737b77614 100644 --- a/common/src/constants/agents.ts +++ b/common/src/constants/agents.ts @@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce( {} as Record, ) -export const MAX_AGENT_STEPS_DEFAULT = 100 +export const MAX_AGENT_STEPS_DEFAULT = 200 diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index e56e3fb58a..3a9f5c9166 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -18,7 +18,7 @@ export const FREE_COST_MODE = 'free' as const */ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator - 'base2-free': new Set(['minimax/minimax-m2.5']), + 'base2-free': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // File exploration agents 'file-picker': new Set(['google/gemini-2.5-flash-lite']), @@ -33,10 +33,10 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'basher': new Set(['google/gemini-3.1-flash-lite-preview']), // Editor for free mode - 'editor-lite': new Set(['minimax/minimax-m2.5']), + 'editor-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // Code reviewer for free mode - 'code-reviewer-lite': new Set(['minimax/minimax-m2.5']), + 'code-reviewer-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // Thinker for free mode 'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']), diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index abbcbc0cda..522994ac27 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -424,6 +424,7 @@ export type ModelName = | 'moonshotai/kimi-k2.5' | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' + | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' | 'z-ai/glm-4.6:nitro' | 'z-ai/glm-4.7' diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts index 58a4cb099f..ad01abac66 100644 --- a/scripts/test-fireworks-long.ts +++ b/scripts/test-fireworks-long.ts @@ -7,19 +7,70 @@ * to measure how well Fireworks caches the shared prefix across turns. * * Usage: - * bun scripts/test-fireworks-long.ts + * bun scripts/test-fireworks-long.ts [model] [--deployment] + * + * Models: + * glm-5.1 (default) — z-ai/glm-5.1 + * minimax — minimax/minimax-m2.5 + * + * Flags: + * --deployment Use custom deployment instead of serverless (standard API) + * Serverless is the default */ export { } const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' -const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9' -// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5' -// Pricing constants — https://fireworks.ai/pricing -const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +type ModelConfig = { + id: string // OpenRouter-style ID (for display) + standardModel: string // Fireworks standard API model ID + deploymentModel: string // Fireworks custom deployment model ID + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const MODEL_CONFIGS: Record = { + 'glm-5.1': { + id: 'z-ai/glm-5.1', + standardModel: 'accounts/fireworks/models/glm-5p1', + deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea', + inputCostPerToken: 1.40 / 1_000_000, + cachedInputCostPerToken: 0.26 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, + minimax: { + id: 'minimax/minimax-m2.5', + standardModel: 'accounts/fireworks/models/minimax-m2p5', + deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9', + inputCostPerToken: 0.30 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + }, +} + +const DEFAULT_MODEL = 'glm-5.1' + +function getModelConfig(modelArg?: string): ModelConfig { + const key = modelArg ?? DEFAULT_MODEL + const config = MODEL_CONFIGS[key] + if (!config) { + console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`) + process.exit(1) + } + return config +} + +const USE_DEPLOYMENT = process.argv.includes('--deployment') +const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long') +const MODEL = getModelConfig(modelArg) + +// Default to serverless (standard API); use --deployment for custom deployment +const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel +const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken +const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken +const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken const MAX_TOKENS = 100 @@ -39,9 +90,9 @@ function computeCost(usage: Record): { cost: number; breakdown: const totalCost = inputCost + cachedCost + outputCost const breakdown = [ - `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`, - `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`, - `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`, + `${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`, + `${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`, + `${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`, `Total: $${totalCost.toFixed(8)}`, ].join('\n ') @@ -270,11 +321,11 @@ async function main() { console.log('🧪 Fireworks 10-Turn Conversation Caching Test') console.log('='.repeat(60)) - console.log(`Model: ${FIREWORKS_MODEL}`) + console.log(`Model: ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`) console.log(`Base URL: ${FIREWORKS_BASE_URL}`) console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`) console.log(`Turns: ${TURN_PROMPTS.length}`) - console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`) + console.log(`Pricing: $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`) console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`) console.log('='.repeat(60)) console.log() diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 10f4bb22d8..c377caaf5c 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -29,6 +29,7 @@ const fireworksAgent = new Agent({ /** Map from OpenRouter model IDs to Fireworks standard API model IDs */ const FIREWORKS_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', + 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1', } /** Flag to enable custom Fireworks deployments (set to false to use global API only) */ @@ -37,6 +38,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true /** Custom deployment IDs for models with dedicated Fireworks deployments */ const FIREWORKS_DEPLOYMENT_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', + 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', } /** Check if current time is within deployment hours (10am–8pm ET) */ @@ -137,12 +139,31 @@ function createFireworksRequest(params: { }) } -// Fireworks per-token pricing (dollars per token) -const FIREWORKS_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const FIREWORKS_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const FIREWORKS_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +// Fireworks per-token pricing (dollars per token), keyed by OpenRouter model ID +interface FireworksPricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const FIREWORKS_PRICING_MAP: Record = { + 'minimax/minimax-m2.5': { + inputCostPerToken: 0.30 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + }, + 'z-ai/glm-5.1': { + inputCostPerToken: 1.40 / 1_000_000, + cachedInputCostPerToken: 0.26 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, +} + +function getFireworksPricing(model: string): FireworksPricing { + return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1'] +} -function extractUsageAndCost(usage: Record | undefined | null): UsageData { +function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null const completionDetails = usage.completion_tokens_details as Record | undefined | null @@ -153,11 +174,12 @@ function extractUsageAndCost(usage: Record | undefined | null): const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 // Fireworks doesn't return cost — compute from token counts and known pricing + const pricing = getFireworksPricing(model) const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * FIREWORKS_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * FIREWORKS_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * FIREWORKS_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * pricing.inputCostPerToken + + cacheReadInputTokens * pricing.cachedInputCostPerToken + + outputTokens * pricing.outputCostPerToken return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } @@ -192,7 +214,7 @@ export async function handleFireworksNonStream({ const data = await response.json() const content = data.choices?.[0]?.message?.content ?? '' const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' - const usageData = extractUsageAndCost(data.usage) + const usageData = extractUsageAndCost(data.usage, originalModel) insertMessageToBigQuery({ messageId: data.id, @@ -493,7 +515,7 @@ async function handleResponse({ return { state } } - const usageData = extractUsageAndCost(data.usage as Record) + const usageData = extractUsageAndCost(data.usage as Record, originalModel) const messageId = typeof data.id === 'string' ? data.id : 'unknown' insertMessageToBigQuery({