diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 8a6b65760d..9e14909f89 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -62,9 +62,9 @@ describe('editor agent', () => {
expect(gpt5Editor.model).toBe('openai/gpt-5.1')
})
- test('creates minimax editor', () => {
- const minimaxEditor = createCodeEditor({ model: 'minimax' })
- expect(minimaxEditor.model).toBe('minimax/minimax-m2.5')
+ test('creates glm editor', () => {
+ const glmEditor = createCodeEditor({ model: 'glm' })
+ expect(glmEditor.model).toBe('z-ai/glm-5.1')
})
test('gpt-5 editor does not include think tags in instructions', () => {
@@ -74,9 +74,9 @@ describe('editor agent', () => {
})
test('glm editor does not include think tags in instructions', () => {
- const minimaxEditor = createCodeEditor({ model: 'minimax' })
- expect(minimaxEditor.instructionsPrompt).not.toContain('')
- expect(minimaxEditor.instructionsPrompt).not.toContain('')
+ const glmEditor = createCodeEditor({ model: 'glm' })
+ expect(glmEditor.instructionsPrompt).not.toContain('')
+ expect(glmEditor.instructionsPrompt).not.toContain('')
})
test('opus editor includes think tags in instructions', () => {
@@ -88,17 +88,17 @@ describe('editor agent', () => {
test('all variants have same base properties', () => {
const opusEditor = createCodeEditor({ model: 'opus' })
const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
- const minimaxEditor = createCodeEditor({ model: 'minimax' })
+ const glmEditor = createCodeEditor({ model: 'glm' })
// All should have same basic structure
expect(opusEditor.displayName).toBe(gpt5Editor.displayName)
- expect(gpt5Editor.displayName).toBe(minimaxEditor.displayName)
+ expect(gpt5Editor.displayName).toBe(glmEditor.displayName)
expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode)
- expect(gpt5Editor.outputMode).toBe(minimaxEditor.outputMode)
+ expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode)
expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames)
- expect(gpt5Editor.toolNames).toEqual(minimaxEditor.toolNames)
+ expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames)
})
})
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index d2ff6c7578..22a58d82a9 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -28,7 +28,7 @@ export function createBase2(
return {
publisher,
- model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
+ model: isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6',
providerOptions: isFree ? {
data_collection: 'deny',
} : {
diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts
index 9cb5675b5e..29225f0c29 100644
--- a/agents/editor/editor-lite.ts
+++ b/agents/editor/editor-lite.ts
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
import type { AgentDefinition } from '../types/agent-definition'
const definition: AgentDefinition = {
- ...createCodeEditor({ model: 'minimax' }),
+ ...createCodeEditor({ model: 'glm' }),
id: 'editor-lite',
}
export default definition
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index 6beb22d221..e191609ad2 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
import type { AgentDefinition } from '../types/agent-definition'
export const createCodeEditor = (options: {
- model: 'gpt-5' | 'opus' | 'minimax'
+ model: 'gpt-5' | 'opus' | 'glm'
}): Omit => {
const { model } = options
return {
@@ -12,8 +12,8 @@ export const createCodeEditor = (options: {
model:
options.model === 'gpt-5'
? 'openai/gpt-5.1'
- : options.model === 'minimax'
- ? 'minimax/minimax-m2.5'
+ : options.model === 'glm'
+ ? 'z-ai/glm-5.1'
: 'anthropic/claude-opus-4.6',
...(options.model === 'opus' && {
providerOptions: {
@@ -65,7 +65,7 @@ OR for new files or major rewrites:
}
-${model === 'gpt-5' || model === 'minimax'
+${model === 'gpt-5' || model === 'glm'
? ''
: `Before you start writing your implementation, you should use tags to think about the best way to implement the changes.
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index f1baa7dffc..feafb87c45 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
const definition: SecretAgentDefinition = {
id: 'code-reviewer-lite',
publisher,
- ...createReviewer('minimax/minimax-m2.5'),
+ ...createReviewer('z-ai/glm-5.1'),
}
export default definition
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index abbcbc0cda..522994ac27 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -424,6 +424,7 @@ export type ModelName =
| 'moonshotai/kimi-k2.5'
| 'moonshotai/kimi-k2.5:nitro'
| 'z-ai/glm-5'
+ | 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
| 'z-ai/glm-4.6:nitro'
| 'z-ai/glm-4.7'
diff --git a/cli/src/components/choice-ad-banner.tsx b/cli/src/components/choice-ad-banner.tsx
index 5a72e89ab5..7ca3f1d4ac 100644
--- a/cli/src/components/choice-ad-banner.tsx
+++ b/cli/src/components/choice-ad-banner.tsx
@@ -70,7 +70,7 @@ export const ChoiceAdBanner: React.FC = ({ ads, onImpressio
}
}, [visibleAds, onImpression])
- const hoverBorderColor = theme.link
+ const hoverBorderColor = theme.primary
return (
= ({ ads, onImpressio
diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts
index 3055f4e2c2..c68535d78d 100644
--- a/cli/src/utils/create-run-config.ts
+++ b/cli/src/utils/create-run-config.ts
@@ -1,5 +1,7 @@
import path from 'path'
+import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents'
+
import {
createEventHandler,
createStreamChunkHandler,
@@ -109,7 +111,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => {
content,
previousRun: previousRunState ?? undefined,
agentDefinitions,
- maxAgentSteps: 100,
+ maxAgentSteps: MAX_AGENT_STEPS_DEFAULT,
handleStreamChunk: createStreamChunkHandler(eventHandlerState),
handleEvent: createEventHandler(eventHandlerState),
signal: params.signal,
diff --git a/common/src/constants/agents.ts b/common/src/constants/agents.ts
index 01b92e37d4..5737b77614 100644
--- a/common/src/constants/agents.ts
+++ b/common/src/constants/agents.ts
@@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce(
{} as Record,
)
-export const MAX_AGENT_STEPS_DEFAULT = 100
+export const MAX_AGENT_STEPS_DEFAULT = 200
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index e56e3fb58a..3a9f5c9166 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -18,7 +18,7 @@ export const FREE_COST_MODE = 'free' as const
*/
export const FREE_MODE_AGENT_MODELS: Record> = {
// Root orchestrator
- 'base2-free': new Set(['minimax/minimax-m2.5']),
+ 'base2-free': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),
// File exploration agents
'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -33,10 +33,10 @@ export const FREE_MODE_AGENT_MODELS: Record> = {
'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
// Editor for free mode
- 'editor-lite': new Set(['minimax/minimax-m2.5']),
+ 'editor-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),
// Code reviewer for free mode
- 'code-reviewer-lite': new Set(['minimax/minimax-m2.5']),
+ 'code-reviewer-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),
// Thinker for free mode
'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']),
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index abbcbc0cda..522994ac27 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -424,6 +424,7 @@ export type ModelName =
| 'moonshotai/kimi-k2.5'
| 'moonshotai/kimi-k2.5:nitro'
| 'z-ai/glm-5'
+ | 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
| 'z-ai/glm-4.6:nitro'
| 'z-ai/glm-4.7'
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
index 58a4cb099f..ad01abac66 100644
--- a/scripts/test-fireworks-long.ts
+++ b/scripts/test-fireworks-long.ts
@@ -7,19 +7,70 @@
* to measure how well Fireworks caches the shared prefix across turns.
*
* Usage:
- * bun scripts/test-fireworks-long.ts
+ * bun scripts/test-fireworks-long.ts [model] [--deployment]
+ *
+ * Models:
+ * glm-5.1 (default) — z-ai/glm-5.1
+ * minimax — minimax/minimax-m2.5
+ *
+ * Flags:
+ * --deployment Use custom deployment instead of serverless (standard API)
+ * Serverless is the default
*/
export { }
const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
-const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
-// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
-// Pricing constants — https://fireworks.ai/pricing
-const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
-const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+type ModelConfig = {
+ id: string // OpenRouter-style ID (for display)
+ standardModel: string // Fireworks standard API model ID
+ deploymentModel: string // Fireworks custom deployment model ID
+ inputCostPerToken: number
+ cachedInputCostPerToken: number
+ outputCostPerToken: number
+}
+
+const MODEL_CONFIGS: Record = {
+ 'glm-5.1': {
+ id: 'z-ai/glm-5.1',
+ standardModel: 'accounts/fireworks/models/glm-5p1',
+ deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
+ inputCostPerToken: 1.40 / 1_000_000,
+ cachedInputCostPerToken: 0.26 / 1_000_000,
+ outputCostPerToken: 4.40 / 1_000_000,
+ },
+ minimax: {
+ id: 'minimax/minimax-m2.5',
+ standardModel: 'accounts/fireworks/models/minimax-m2p5',
+ deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9',
+ inputCostPerToken: 0.30 / 1_000_000,
+ cachedInputCostPerToken: 0.03 / 1_000_000,
+ outputCostPerToken: 1.20 / 1_000_000,
+ },
+}
+
+const DEFAULT_MODEL = 'glm-5.1'
+
+function getModelConfig(modelArg?: string): ModelConfig {
+ const key = modelArg ?? DEFAULT_MODEL
+ const config = MODEL_CONFIGS[key]
+ if (!config) {
+ console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
+ process.exit(1)
+ }
+ return config
+}
+
+const USE_DEPLOYMENT = process.argv.includes('--deployment')
+const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long')
+const MODEL = getModelConfig(modelArg)
+
+// Default to serverless (standard API); use --deployment for custom deployment
+const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
+const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
+const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
+const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken
const MAX_TOKENS = 100
@@ -39,9 +90,9 @@ function computeCost(usage: Record): { cost: number; breakdown:
const totalCost = inputCost + cachedCost + outputCost
const breakdown = [
- `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`,
- `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`,
- `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`,
+ `${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`,
+ `${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`,
+ `${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`,
`Total: $${totalCost.toFixed(8)}`,
].join('\n ')
@@ -270,11 +321,11 @@ async function main() {
console.log('🧪 Fireworks 10-Turn Conversation Caching Test')
console.log('='.repeat(60))
- console.log(`Model: ${FIREWORKS_MODEL}`)
+ console.log(`Model: ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`)
console.log(`Base URL: ${FIREWORKS_BASE_URL}`)
console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`)
console.log(`Turns: ${TURN_PROMPTS.length}`)
- console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`)
+ console.log(`Pricing: $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`)
console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`)
console.log('='.repeat(60))
console.log()
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index 10f4bb22d8..c377caaf5c 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -29,6 +29,7 @@ const fireworksAgent = new Agent({
/** Map from OpenRouter model IDs to Fireworks standard API model IDs */
const FIREWORKS_MODEL_MAP: Record = {
'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
+ 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
}
/** Flag to enable custom Fireworks deployments (set to false to use global API only) */
@@ -37,6 +38,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
/** Custom deployment IDs for models with dedicated Fireworks deployments */
const FIREWORKS_DEPLOYMENT_MAP: Record = {
'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
+ 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
}
/** Check if current time is within deployment hours (10am–8pm ET) */
@@ -137,12 +139,31 @@ function createFireworksRequest(params: {
})
}
-// Fireworks per-token pricing (dollars per token)
-const FIREWORKS_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
-const FIREWORKS_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const FIREWORKS_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+// Fireworks per-token pricing (dollars per token), keyed by OpenRouter model ID
+interface FireworksPricing {
+ inputCostPerToken: number
+ cachedInputCostPerToken: number
+ outputCostPerToken: number
+}
+
+const FIREWORKS_PRICING_MAP: Record = {
+ 'minimax/minimax-m2.5': {
+ inputCostPerToken: 0.30 / 1_000_000,
+ cachedInputCostPerToken: 0.03 / 1_000_000,
+ outputCostPerToken: 1.20 / 1_000_000,
+ },
+ 'z-ai/glm-5.1': {
+ inputCostPerToken: 1.40 / 1_000_000,
+ cachedInputCostPerToken: 0.26 / 1_000_000,
+ outputCostPerToken: 4.40 / 1_000_000,
+ },
+}
+
+function getFireworksPricing(model: string): FireworksPricing {
+ return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1']
+}
-function extractUsageAndCost(usage: Record | undefined | null): UsageData {
+function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData {
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
const promptDetails = usage.prompt_tokens_details as Record | undefined | null
const completionDetails = usage.completion_tokens_details as Record | undefined | null
@@ -153,11 +174,12 @@ function extractUsageAndCost(usage: Record | undefined | null):
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
// Fireworks doesn't return cost — compute from token counts and known pricing
+ const pricing = getFireworksPricing(model)
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
const cost =
- nonCachedInputTokens * FIREWORKS_INPUT_COST_PER_TOKEN +
- cacheReadInputTokens * FIREWORKS_CACHED_INPUT_COST_PER_TOKEN +
- outputTokens * FIREWORKS_OUTPUT_COST_PER_TOKEN
+ nonCachedInputTokens * pricing.inputCostPerToken +
+ cacheReadInputTokens * pricing.cachedInputCostPerToken +
+ outputTokens * pricing.outputCostPerToken
return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
}
@@ -192,7 +214,7 @@ export async function handleFireworksNonStream({
const data = await response.json()
const content = data.choices?.[0]?.message?.content ?? ''
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
- const usageData = extractUsageAndCost(data.usage)
+ const usageData = extractUsageAndCost(data.usage, originalModel)
insertMessageToBigQuery({
messageId: data.id,
@@ -493,7 +515,7 @@ async function handleResponse({
return { state }
}
- const usageData = extractUsageAndCost(data.usage as Record)
+ const usageData = extractUsageAndCost(data.usage as Record, originalModel)
const messageId = typeof data.id === 'string' ? data.id : 'unknown'
insertMessageToBigQuery({