Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions agents/__tests__/editor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ describe('editor agent', () => {
expect(gpt5Editor.model).toBe('openai/gpt-5.1')
})

test('creates minimax editor', () => {
const minimaxEditor = createCodeEditor({ model: 'minimax' })
expect(minimaxEditor.model).toBe('minimax/minimax-m2.5')
test('creates glm editor', () => {
const glmEditor = createCodeEditor({ model: 'glm' })
expect(glmEditor.model).toBe('z-ai/glm-5.1')
})

test('gpt-5 editor does not include think tags in instructions', () => {
Expand All @@ -74,9 +74,9 @@ describe('editor agent', () => {
})

test('glm editor does not include think tags in instructions', () => {
const minimaxEditor = createCodeEditor({ model: 'minimax' })
expect(minimaxEditor.instructionsPrompt).not.toContain('<think>')
expect(minimaxEditor.instructionsPrompt).not.toContain('</think>')
const glmEditor = createCodeEditor({ model: 'glm' })
expect(glmEditor.instructionsPrompt).not.toContain('<think>')
expect(glmEditor.instructionsPrompt).not.toContain('</think>')
})

test('opus editor includes think tags in instructions', () => {
Expand All @@ -88,17 +88,17 @@ describe('editor agent', () => {
test('all variants have same base properties', () => {
const opusEditor = createCodeEditor({ model: 'opus' })
const gpt5Editor = createCodeEditor({ model: 'gpt-5' })
const minimaxEditor = createCodeEditor({ model: 'minimax' })
const glmEditor = createCodeEditor({ model: 'glm' })

// All should have same basic structure
expect(opusEditor.displayName).toBe(gpt5Editor.displayName)
expect(gpt5Editor.displayName).toBe(minimaxEditor.displayName)
expect(gpt5Editor.displayName).toBe(glmEditor.displayName)

expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode)
expect(gpt5Editor.outputMode).toBe(minimaxEditor.outputMode)
expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode)

expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames)
expect(gpt5Editor.toolNames).toEqual(minimaxEditor.toolNames)
expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames)
})
})

Expand Down
2 changes: 1 addition & 1 deletion agents/base2/base2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export function createBase2(

return {
publisher,
model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
model: isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6',
providerOptions: isFree ? {
data_collection: 'deny',
} : {
Expand Down
2 changes: 1 addition & 1 deletion agents/editor/editor-lite.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
import type { AgentDefinition } from '../types/agent-definition'

const definition: AgentDefinition = {
...createCodeEditor({ model: 'minimax' }),
...createCodeEditor({ model: 'glm' }),
id: 'editor-lite',
}
export default definition
8 changes: 4 additions & 4 deletions agents/editor/editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ import { publisher } from '../constants'
import type { AgentDefinition } from '../types/agent-definition'

export const createCodeEditor = (options: {
model: 'gpt-5' | 'opus' | 'minimax'
model: 'gpt-5' | 'opus' | 'glm'
}): Omit<AgentDefinition, 'id'> => {
const { model } = options
return {
publisher,
model:
options.model === 'gpt-5'
? 'openai/gpt-5.1'
: options.model === 'minimax'
? 'minimax/minimax-m2.5'
: options.model === 'glm'
? 'z-ai/glm-5.1'
: 'anthropic/claude-opus-4.6',
...(options.model === 'opus' && {
providerOptions: {
Expand Down Expand Up @@ -65,7 +65,7 @@ OR for new files or major rewrites:
}
</codebuff_tool_call>

${model === 'gpt-5' || model === 'minimax'
${model === 'gpt-5' || model === 'glm'
? ''
: `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.

Expand Down
2 changes: 1 addition & 1 deletion agents/reviewer/code-reviewer-lite.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
const definition: SecretAgentDefinition = {
id: 'code-reviewer-lite',
publisher,
...createReviewer('minimax/minimax-m2.5'),
...createReviewer('z-ai/glm-5.1'),
}

export default definition
1 change: 1 addition & 0 deletions agents/types/agent-definition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ export type ModelName =
| 'moonshotai/kimi-k2.5'
| 'moonshotai/kimi-k2.5:nitro'
| 'z-ai/glm-5'
| 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
| 'z-ai/glm-4.6:nitro'
| 'z-ai/glm-4.7'
Expand Down
4 changes: 2 additions & 2 deletions cli/src/components/choice-ad-banner.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export const ChoiceAdBanner: React.FC<ChoiceAdBannerProps> = ({ ads, onImpressio
}
}, [visibleAds, onImpression])

const hoverBorderColor = theme.link
const hoverBorderColor = theme.primary

return (
<box
Expand Down Expand Up @@ -124,7 +124,7 @@ export const ChoiceAdBanner: React.FC<ChoiceAdBannerProps> = ({ ads, onImpressio
<text
style={{
fg: theme.name === 'light' ? '#ffffff' : theme.background,
bg: isHovered ? theme.link : theme.muted,
bg: isHovered ? theme.primary : theme.muted,
attributes: TextAttributes.BOLD,
}}
>
Expand Down
4 changes: 3 additions & 1 deletion cli/src/utils/create-run-config.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import path from 'path'

import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents'

import {
createEventHandler,
createStreamChunkHandler,
Expand Down Expand Up @@ -109,7 +111,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => {
content,
previousRun: previousRunState ?? undefined,
agentDefinitions,
maxAgentSteps: 100,
maxAgentSteps: MAX_AGENT_STEPS_DEFAULT,
handleStreamChunk: createStreamChunkHandler(eventHandlerState),
handleEvent: createEventHandler(eventHandlerState),
signal: params.signal,
Expand Down
2 changes: 1 addition & 1 deletion common/src/constants/agents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce(
{} as Record<string, string[]>,
)

export const MAX_AGENT_STEPS_DEFAULT = 100
export const MAX_AGENT_STEPS_DEFAULT = 200
6 changes: 3 additions & 3 deletions common/src/constants/free-agents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export const FREE_COST_MODE = 'free' as const
*/
export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
// Root orchestrator
'base2-free': new Set(['minimax/minimax-m2.5']),
'base2-free': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),

// File exploration agents
'file-picker': new Set(['google/gemini-2.5-flash-lite']),
Expand All @@ -33,10 +33,10 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
'basher': new Set(['google/gemini-3.1-flash-lite-preview']),

// Editor for free mode
'editor-lite': new Set(['minimax/minimax-m2.5']),
'editor-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),

// Code reviewer for free mode
'code-reviewer-lite': new Set(['minimax/minimax-m2.5']),
'code-reviewer-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']),

// Thinker for free mode
'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ export type ModelName =
| 'moonshotai/kimi-k2.5'
| 'moonshotai/kimi-k2.5:nitro'
| 'z-ai/glm-5'
| 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
| 'z-ai/glm-4.6:nitro'
| 'z-ai/glm-4.7'
Expand Down
75 changes: 63 additions & 12 deletions scripts/test-fireworks-long.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,70 @@
* to measure how well Fireworks caches the shared prefix across turns.
*
* Usage:
* bun scripts/test-fireworks-long.ts
* bun scripts/test-fireworks-long.ts [model] [--deployment]
*
* Models:
* glm-5.1 (default) — z-ai/glm-5.1
* minimax — minimax/minimax-m2.5
*
* Flags:
* --deployment Use custom deployment instead of serverless (standard API)
* Serverless is the default
*/

export { }

const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'

// Pricing constants — https://fireworks.ai/pricing
const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
type ModelConfig = {
id: string // OpenRouter-style ID (for display)
standardModel: string // Fireworks standard API model ID
deploymentModel: string // Fireworks custom deployment model ID
inputCostPerToken: number
cachedInputCostPerToken: number
outputCostPerToken: number
}

const MODEL_CONFIGS: Record<string, ModelConfig> = {
'glm-5.1': {
id: 'z-ai/glm-5.1',
standardModel: 'accounts/fireworks/models/glm-5p1',
deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea',
inputCostPerToken: 1.40 / 1_000_000,
cachedInputCostPerToken: 0.26 / 1_000_000,
outputCostPerToken: 4.40 / 1_000_000,
},
minimax: {
id: 'minimax/minimax-m2.5',
standardModel: 'accounts/fireworks/models/minimax-m2p5',
deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9',
inputCostPerToken: 0.30 / 1_000_000,
cachedInputCostPerToken: 0.03 / 1_000_000,
outputCostPerToken: 1.20 / 1_000_000,
},
}

const DEFAULT_MODEL = 'glm-5.1'

function getModelConfig(modelArg?: string): ModelConfig {
const key = modelArg ?? DEFAULT_MODEL
const config = MODEL_CONFIGS[key]
if (!config) {
console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`)
process.exit(1)
}
return config
}

const USE_DEPLOYMENT = process.argv.includes('--deployment')
const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long')
const MODEL = getModelConfig(modelArg)

// Default to serverless (standard API); use --deployment for custom deployment
const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel
const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken
const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken
const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken

const MAX_TOKENS = 100

Expand All @@ -39,9 +90,9 @@ function computeCost(usage: Record<string, unknown>): { cost: number; breakdown:
const totalCost = inputCost + cachedCost + outputCost

const breakdown = [
`${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`,
`${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`,
`${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`,
`${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`,
`${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`,
`${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`,
`Total: $${totalCost.toFixed(8)}`,
].join('\n ')

Expand Down Expand Up @@ -270,11 +321,11 @@ async function main() {

console.log('🧪 Fireworks 10-Turn Conversation Caching Test')
console.log('='.repeat(60))
console.log(`Model: ${FIREWORKS_MODEL}`)
console.log(`Model: ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`)
console.log(`Base URL: ${FIREWORKS_BASE_URL}`)
console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`)
console.log(`Turns: ${TURN_PROMPTS.length}`)
console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`)
console.log(`Pricing: $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`)
console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`)
console.log('='.repeat(60))
console.log()
Expand Down
42 changes: 32 additions & 10 deletions web/src/llm-api/fireworks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const fireworksAgent = new Agent({
/** Map from OpenRouter model IDs to Fireworks standard API model IDs */
const FIREWORKS_MODEL_MAP: Record<string, string> = {
'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
}

/** Flag to enable custom Fireworks deployments (set to false to use global API only) */
Expand All @@ -37,6 +38,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
/** Custom deployment IDs for models with dedicated Fireworks deployments */
const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
}

/** Check if current time is within deployment hours (10am–8pm ET) */
Expand Down Expand Up @@ -137,12 +139,31 @@ function createFireworksRequest(params: {
})
}

// Fireworks per-token pricing (dollars per token)
const FIREWORKS_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
const FIREWORKS_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
const FIREWORKS_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
// Fireworks per-token pricing (dollars per token), keyed by OpenRouter model ID
interface FireworksPricing {
inputCostPerToken: number
cachedInputCostPerToken: number
outputCostPerToken: number
}

const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
'minimax/minimax-m2.5': {
inputCostPerToken: 0.30 / 1_000_000,
cachedInputCostPerToken: 0.03 / 1_000_000,
outputCostPerToken: 1.20 / 1_000_000,
},
'z-ai/glm-5.1': {
inputCostPerToken: 1.40 / 1_000_000,
cachedInputCostPerToken: 0.26 / 1_000_000,
outputCostPerToken: 4.40 / 1_000_000,
},
}

function getFireworksPricing(model: string): FireworksPricing {
return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1']
}

function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
Expand All @@ -153,11 +174,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0

// Fireworks doesn't return cost — compute from token counts and known pricing
const pricing = getFireworksPricing(model)
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
const cost =
nonCachedInputTokens * FIREWORKS_INPUT_COST_PER_TOKEN +
cacheReadInputTokens * FIREWORKS_CACHED_INPUT_COST_PER_TOKEN +
outputTokens * FIREWORKS_OUTPUT_COST_PER_TOKEN
nonCachedInputTokens * pricing.inputCostPerToken +
cacheReadInputTokens * pricing.cachedInputCostPerToken +
outputTokens * pricing.outputCostPerToken

return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
}
Expand Down Expand Up @@ -192,7 +214,7 @@ export async function handleFireworksNonStream({
const data = await response.json()
const content = data.choices?.[0]?.message?.content ?? ''
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
const usageData = extractUsageAndCost(data.usage)
const usageData = extractUsageAndCost(data.usage, originalModel)

insertMessageToBigQuery({
messageId: data.id,
Expand Down Expand Up @@ -493,7 +515,7 @@ async function handleResponse({
return { state }
}

const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
const messageId = typeof data.id === 'string' ? data.id : 'unknown'

insertMessageToBigQuery({
Expand Down
Loading