CodebuffAI · jahooma · Apr 12, 2026 · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026
@@ -62,9 +62,9 @@ describe('editor agent', () => {
       expect(gpt5Editor.model).toBe('openai/gpt-5.1')
     })
 
-    test('creates minimax editor', () => {
-      const minimaxEditor = createCodeEditor({ model: 'minimax' })
-      expect(minimaxEditor.model).toBe('minimax/minimax-m2.5')
+    test('creates glm editor', () => {
+      const glmEditor = createCodeEditor({ model: 'glm' })
+      expect(glmEditor.model).toBe('z-ai/glm-5.1')
     })
 
     test('gpt-5 editor does not include think tags in instructions', () => {
@@ -74,9 +74,9 @@ describe('editor agent', () => {
     })
 
     test('glm editor does not include think tags in instructions', () => {
-      const minimaxEditor = createCodeEditor({ model: 'minimax' })
-      expect(minimaxEditor.instructionsPrompt).not.toContain('<think>')
-      expect(minimaxEditor.instructionsPrompt).not.toContain('</think>')
+      const glmEditor = createCodeEditor({ model: 'glm' })
+      expect(glmEditor.instructionsPrompt).not.toContain('<think>')
+      expect(glmEditor.instructionsPrompt).not.toContain('</think>')
     })
 
     test('opus editor includes think tags in instructions', () => {
@@ -88,17 +88,17 @@ describe('editor agent', () => {
     test('all variants have same base properties', () => {
       const opusEditor = createCodeEditor({ model: 'opus' })
       const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
-      const minimaxEditor = createCodeEditor({ model: 'minimax' })
+      const glmEditor = createCodeEditor({ model: 'glm' })
 
       // All should have same basic structure
       expect(opusEditor.displayName).toBe(gpt5Editor.displayName)
-      expect(gpt5Editor.displayName).toBe(minimaxEditor.displayName)
+      expect(gpt5Editor.displayName).toBe(glmEditor.displayName)
 
       expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode)
-      expect(gpt5Editor.outputMode).toBe(minimaxEditor.outputMode)
+      expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode)
 
       expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames)
-      expect(gpt5Editor.toolNames).toEqual(minimaxEditor.toolNames)
+      expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames)
     })
   })
 

@@ -28,7 +28,7 @@ export function createBase2(
 
   return {
     publisher,
-    model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
+    model: isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6',
     providerOptions: isFree ? {
       data_collection: 'deny',
     } : {

@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
 import type { AgentDefinition } from '../types/agent-definition'
 
 const definition: AgentDefinition = {
-  ...createCodeEditor({ model: 'minimax' }),
+  ...createCodeEditor({ model: 'glm' }),
   id: 'editor-lite',
 }
 export default definition
@@ -4,16 +4,16 @@ import { publisher } from '../constants'
 import type { AgentDefinition } from '../types/agent-definition'
 
 export const createCodeEditor = (options: {
-  model: 'gpt-5' | 'opus' | 'minimax'
+  model: 'gpt-5' | 'opus' | 'glm'
 }): Omit<AgentDefinition, 'id'> => {
   const { model } = options
   return {
     publisher,
     model:
       options.model === 'gpt-5'
         ? 'openai/gpt-5.1'
-        : options.model === 'minimax'
-          ? 'minimax/minimax-m2.5'
+        : options.model === 'glm'
+          ? 'z-ai/glm-5.1'
           : 'anthropic/claude-opus-4.6',
     ...(options.model === 'opus' && {
       providerOptions: {
@@ -65,7 +65,7 @@ OR for new files or major rewrites:
 }
 </codebuff_tool_call>
 
-${model === 'gpt-5' || model === 'minimax'
+${model === 'gpt-5' || model === 'glm'
         ? ''
         : `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.
 

@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('minimax/minimax-m2.5'),
+  ...createReviewer('z-ai/glm-5.1'),
 }
 
 export default definition
@@ -424,6 +424,7 @@ export type ModelName =
   | 'moonshotai/kimi-k2.5'
   | 'moonshotai/kimi-k2.5:nitro'
   | 'z-ai/glm-5'
+  | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
   | 'z-ai/glm-4.6:nitro'
   | 'z-ai/glm-4.7'

@@ -70,7 +70,7 @@ export const ChoiceAdBanner: React.FC<ChoiceAdBannerProps> = ({ ads, onImpressio
     }
   }, [visibleAds, onImpression])
 
-  const hoverBorderColor = theme.link
+  const hoverBorderColor = theme.primary
 
   return (
     <box
@@ -124,7 +124,7 @@ export const ChoiceAdBanner: React.FC<ChoiceAdBannerProps> = ({ ads, onImpressio
                 <text
                   style={{
                     fg: theme.name === 'light' ? '#ffffff' : theme.background,
-                    bg: isHovered ? theme.link : theme.muted,
+                    bg: isHovered ? theme.primary : theme.muted,
                     attributes: TextAttributes.BOLD,
                   }}
                 >

@@ -1,5 +1,7 @@
 import path from 'path'
 
+import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents'
+
 import {
   createEventHandler,
   createStreamChunkHandler,
@@ -109,7 +111,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => {
     content,
     previousRun: previousRunState ?? undefined,
     agentDefinitions,
-    maxAgentSteps: 100,
+    maxAgentSteps: MAX_AGENT_STEPS_DEFAULT,
     handleStreamChunk: createStreamChunkHandler(eventHandlerState),
     handleEvent: createEventHandler(eventHandlerState),
     signal: params.signal,

@@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce(
   {} as Record<string, string[]>,
 )
 
-export const MAX_AGENT_STEPS_DEFAULT = 100
+export const MAX_AGENT_STEPS_DEFAULT = 200
@@ -18,7 +18,7 @@ export const FREE_COST_MODE = 'free' as const
  */
 export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
-  'base2-free': new Set(['minimax/minimax-m2.5']),
+  'base2-free': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),
 
   // File exploration agents
   'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -33,10 +33,10 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
 
   // Editor for free mode
-  'editor-lite': new Set(['minimax/minimax-m2.5']),
+  'editor-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),
 
   // Code reviewer for free mode
-  'code-reviewer-lite': new Set(['minimax/minimax-m2.5']),
+  'code-reviewer-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),
 
   // Thinker for free mode
   'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']),

@@ -424,6 +424,7 @@ export type ModelName =
   | 'moonshotai/kimi-k2.5'
   | 'moonshotai/kimi-k2.5:nitro'
   | 'z-ai/glm-5'
+  | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
   | 'z-ai/glm-4.6:nitro'
   | 'z-ai/glm-4.7'

@@ -7,19 +7,70 @@
  * to measure how well Fireworks caches the shared prefix across turns.
  *
  * Usage:
- *   bun scripts/test-fireworks-long.ts
+ *   bun scripts/test-fireworks-long.ts [model] [--deployment]
+ *
+ * Models:
+ *   glm-5.1   (default) — z-ai/glm-5.1
+ *   minimax             — minimax/minimax-m2.5
+ *
+ * Flags:
+ *   --deployment   Use custom deployment instead of serverless (standard API)
+ *                  Serverless is the default
  */
 
 export { }
 
 const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
-const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
-// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
 
-// Pricing constants — https://fireworks.ai/pricing
-const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
-const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+type ModelConfig = {
+  id: string              // OpenRouter-style ID (for display)
+  standardModel: string  // Fireworks standard API model ID
+  deploymentModel: string // Fireworks custom deployment model ID
+  inputCostPerToken: number
+  cachedInputCostPerToken: number
+  outputCostPerToken: number
+}
+
+const MODEL_CONFIGS: Record<string, ModelConfig> = {
+  'glm-5.1': {
+    id: 'z-ai/glm-5.1',
+    standardModel: 'accounts/fireworks/models/glm-5p1',
+    deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
+    inputCostPerToken: 1.40 / 1_000_000,
+    cachedInputCostPerToken: 0.26 / 1_000_000,
+    outputCostPerToken: 4.40 / 1_000_000,
+  },
+  minimax: {
+    id: 'minimax/minimax-m2.5',
+    standardModel: 'accounts/fireworks/models/minimax-m2p5',
+    deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9',
+    inputCostPerToken: 0.30 / 1_000_000,
+    cachedInputCostPerToken: 0.03 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
+  },
+}
+
+const DEFAULT_MODEL = 'glm-5.1'
+
+function getModelConfig(modelArg?: string): ModelConfig {
+  const key = modelArg ?? DEFAULT_MODEL
+  const config = MODEL_CONFIGS[key]
+  if (!config) {
+    console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
+    process.exit(1)
+  }
+  return config
+}
+
+const USE_DEPLOYMENT = process.argv.includes('--deployment')
+const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long')
+const MODEL = getModelConfig(modelArg)
+
+// Default to serverless (standard API); use --deployment for custom deployment
+const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
+const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
+const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
+const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken
 
 const MAX_TOKENS = 100
 
@@ -39,9 +90,9 @@ function computeCost(usage: Record<string, unknown>): { cost: number; breakdown:
   const totalCost = inputCost + cachedCost + outputCost
 
   const breakdown = [
-    `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`,
-    `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`,
-    `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`,
+    `${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`,
+    `${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`,
+    `${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`,
     `Total: $${totalCost.toFixed(8)}`,
   ].join('\n         ')
 
@@ -270,11 +321,11 @@ async function main() {
 
   console.log('🧪 Fireworks 10-Turn Conversation Caching Test')
   console.log('='.repeat(60))
-  console.log(`Model:       ${FIREWORKS_MODEL}`)
+  console.log(`Model:       ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`)
   console.log(`Base URL:    ${FIREWORKS_BASE_URL}`)
   console.log(`Max tokens:  ${MAX_TOKENS} (low output per turn)`)
   console.log(`Turns:       ${TURN_PROMPTS.length}`)
-  console.log(`Pricing:     $0.30/M input, $0.03/M cached, $1.20/M output`)
+  console.log(`Pricing:     $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`)
   console.log(`Session ID:  ${SESSION_ID} (x-session-affinity header)`)
   console.log('='.repeat(60))
   console.log()

@@ -29,6 +29,7 @@ const fireworksAgent = new Agent({
 /** Map from OpenRouter model IDs to Fireworks standard API model IDs */
 const FIREWORKS_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
+  'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
 }
 
 /** Flag to enable custom Fireworks deployments (set to false to use global API only) */
@@ -37,6 +38,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
 /** Custom deployment IDs for models with dedicated Fireworks deployments */
 const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
+  'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
 }
 
 /** Check if current time is within deployment hours (10am–8pm ET) */
@@ -137,12 +139,31 @@ function createFireworksRequest(params: {
   })
 }
 
-// Fireworks per-token pricing (dollars per token)
-const FIREWORKS_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
-const FIREWORKS_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
-const FIREWORKS_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
+// Fireworks per-token pricing (dollars per token), keyed by OpenRouter model ID
+interface FireworksPricing {
+  inputCostPerToken: number
+  cachedInputCostPerToken: number
+  outputCostPerToken: number
+}
+
+const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
+  'minimax/minimax-m2.5': {
+    inputCostPerToken: 0.30 / 1_000_000,
+    cachedInputCostPerToken: 0.03 / 1_000_000,
+    outputCostPerToken: 1.20 / 1_000_000,
+  },
+  'z-ai/glm-5.1': {
+    inputCostPerToken: 1.40 / 1_000_000,
+    cachedInputCostPerToken: 0.26 / 1_000_000,
+    outputCostPerToken: 4.40 / 1_000_000,
+  },
+}
+
+function getFireworksPricing(model: string): FireworksPricing {
+  return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1']
+}
 
-function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
+function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
   if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
   const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
   const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
@@ -153,11 +174,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
   const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
 
   // Fireworks doesn't return cost — compute from token counts and known pricing
+  const pricing = getFireworksPricing(model)
   const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
   const cost =
-    nonCachedInputTokens * FIREWORKS_INPUT_COST_PER_TOKEN +
-    cacheReadInputTokens * FIREWORKS_CACHED_INPUT_COST_PER_TOKEN +
-    outputTokens * FIREWORKS_OUTPUT_COST_PER_TOKEN
+    nonCachedInputTokens * pricing.inputCostPerToken +
+    cacheReadInputTokens * pricing.cachedInputCostPerToken +
+    outputTokens * pricing.outputCostPerToken
 
   return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
 }
@@ -192,7 +214,7 @@ export async function handleFireworksNonStream({
   const data = await response.json()
   const content = data.choices?.[0]?.message?.content ?? ''
   const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
-  const usageData = extractUsageAndCost(data.usage)
+  const usageData = extractUsageAndCost(data.usage, originalModel)
 
   insertMessageToBigQuery({
     messageId: data.id,
@@ -493,7 +515,7 @@ async function handleResponse({
     return { state }
   }
 
-  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
+  const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
   const messageId = typeof data.id === 'string' ? data.id : 'unknown'
 
   insertMessageToBigQuery({