diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
index 69940bfac..552652891 100644
--- a/.agents/__tests__/context-pruner.test.ts
+++ b/.agents/__tests__/context-pruner.test.ts
@@ -1,6 +1,8 @@
import { describe, test, expect, beforeEach } from 'bun:test'
+
import contextPruner from '../context-pruner'
-import type { Message } from '../types/agent-definition'
+
+import type { Message } from '../types/codebuff-message'
describe('context-pruner handleSteps', () => {
let mockAgentState: any
@@ -11,11 +13,57 @@ describe('context-pruner handleSteps', () => {
}
})
- const createMessage = (role: 'user' | 'assistant', content: string): Message => ({
+ const createMessage = (
+ role: 'user' | 'assistant',
+ content: string,
+ ): Message => ({
role,
content,
})
+ const createTerminalToolMessage = (
+ command: string,
+ output: string,
+ exitCode?: number,
+ ): any => ({
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolCallId: 'test-id',
+ toolName: 'run_terminal_command',
+ output: [
+ {
+ type: 'json',
+ value: {
+ command,
+ stdout: output,
+ ...(exitCode !== undefined && { exitCode }),
+ },
+ },
+ ],
+ },
+ })
+
+ const createLargeToolMessage = (
+ toolName: string,
+ largeData: string,
+ ): any => ({
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolCallId: 'test-id',
+ toolName,
+ output: [
+ {
+ type: 'json',
+ value: {
+ data: largeData,
+ },
+ },
+ ],
+ },
+ })
+
const runHandleSteps = (messages: Message[]) => {
mockAgentState.messageHistory = messages
const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
@@ -39,27 +87,14 @@ describe('context-pruner handleSteps', () => {
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
- expect(results[0]).toEqual({
- toolName: 'set_messages',
- input: {
- messages,
- },
- })
- })
-
- test('removes spawn_agent_inline call for context-pruner and following messages', () => {
- const messages = [
- createMessage('user', 'Hello'),
- createMessage('assistant', 'I will spawn the context-pruner agent.\n\n\n{\n "cb_tool_name": "spawn_agent_inline",\n "agent_type": "context-pruner"\n}\n'),
- createMessage('user', '{"params": {"maxContextLength": 100000}}'),
- createMessage('user', 'Tools and instructions'),
- ]
-
- const results = runHandleSteps(messages)
-
- expect(results).toHaveLength(1)
- expect(results[0].input.messages).toHaveLength(1)
- expect(results[0].input.messages[0]).toEqual(createMessage('user', 'Hello'))
+ expect(results[0]).toEqual(
+ expect.objectContaining({
+ toolName: 'set_messages',
+ input: {
+ messages,
+ },
+ }),
+ )
})
test('does not remove messages if assistant message does not contain context-pruner spawn call', () => {
@@ -74,33 +109,22 @@ describe('context-pruner handleSteps', () => {
expect(results[0].input.messages).toHaveLength(3)
})
- test('handles context-pruner spawn call without enough following messages', () => {
- const messages = [
- createMessage('user', 'Hello'),
- createMessage('assistant', 'I will spawn the context-pruner agent.\n\n\n{\n "cb_tool_name": "spawn_agent_inline",\n "agent_type": "context-pruner"\n}\n'),
- createMessage('user', '{"params": {"maxContextLength": 100000}}'),
- ]
-
- const results = runHandleSteps(messages)
-
- expect(results).toHaveLength(1)
- // Should preserve all messages since there aren't 3 messages to remove
- expect(results[0].input.messages).toHaveLength(1)
-
- })
-
test('removes old terminal command results while keeping recent 5', () => {
// Create content large enough to exceed 200k token limit (~600k chars)
const largeContent = 'x'.repeat(150000)
-
+
const messages = [
createMessage('user', largeContent),
createMessage('assistant', largeContent),
createMessage('user', largeContent),
createMessage('assistant', largeContent),
// 7 terminal commands (should keep last 5, simplify first 2)
- ...Array.from({ length: 7 }, (_, i) =>
- createMessage('assistant', `Command ${i + 1}\nrun_terminal_commandLarge output ${i + 1}: ${'y'.repeat(1000)}`)
+ ...Array.from({ length: 7 }, (_, i) =>
+ createTerminalToolMessage(
+ `command-${i + 1}`,
+ `Large output ${i + 1}: ${'y'.repeat(1000)}`,
+ 0,
+ ),
),
]
@@ -108,84 +132,104 @@ describe('context-pruner handleSteps', () => {
expect(results).toHaveLength(1)
const resultMessages = results[0].input.messages
-
- // Check that first 2 terminal commands are simplified (should have been replaced)
- const firstTerminalMessage = resultMessages.find((m: any) =>
- typeof m.content === 'string' && m.content.includes('Command 1')
+
+ // Check that first 2 terminal commands are simplified
+ const firstTerminalMessage = resultMessages.find(
+ (m: any) =>
+ m.role === 'tool' &&
+ m.content?.toolName === 'run_terminal_command' &&
+ m.content?.output?.[0]?.value?.command === 'command-1',
)
- expect(firstTerminalMessage?.content).toContain('[Output omitted]')
-
+ expect(
+ firstTerminalMessage?.content?.output?.[0]?.value?.stdoutOmittedForLength,
+ ).toBe(true)
+
// Check that recent terminal commands are preserved (but may be processed by large tool result pass)
- const recentTerminalMessage = resultMessages.find((m: any) =>
- typeof m.content === 'string' && m.content.includes('Command 7')
+ const recentTerminalMessage = resultMessages.find(
+ (m: any) =>
+ m.role === 'tool' &&
+ m.content?.toolName === 'run_terminal_command' &&
+ (m.content?.output?.[0]?.value?.command === 'command-7' ||
+ m.content?.output?.[0]?.value?.message ===
+ '[LARGE_TOOL_RESULT_OMITTED]'),
)
- // The recent message should exist, but if it's large, it may get processed by Pass 2
expect(recentTerminalMessage).toBeDefined()
- expect(recentTerminalMessage?.content).toContain('Command 7')
})
test('removes large tool results', () => {
// Create content large enough to exceed 200k token limit (~600k chars) to trigger terminal pass
const largeContent = 'z'.repeat(150000)
- const largeToolResult = 'x'.repeat(2000) // > 1000 chars
-
+ const largeToolData = 'x'.repeat(2000) // > 1000 chars when stringified
+
const messages = [
createMessage('user', largeContent),
createMessage('assistant', largeContent),
createMessage('user', largeContent),
createMessage('assistant', largeContent),
- // Message with large tool result (total message > 1000 chars)
- createMessage('assistant', `Some text before read_files${largeToolResult} some text after`),
- createMessage('assistant', `code_searchSmall result`),
+ // Message with large tool result
+ createLargeToolMessage('read_files', largeToolData),
+ createLargeToolMessage('code_search', 'Small result'),
]
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
const resultMessages = results[0].input.messages
-
+
// Large tool result should be simplified
- const largeResultMessage = resultMessages.find((m: any) =>
- typeof m.content === 'string' && m.content.includes('read_files')
+ const largeResultMessage = resultMessages.find(
+ (m: any) => m.role === 'tool' && m.content?.toolName === 'read_files',
)
- expect(largeResultMessage?.content).toContain('[Large tool result omitted]')
-
+ expect(largeResultMessage?.content?.output?.[0]?.value?.message).toBe(
+ '[LARGE_TOOL_RESULT_OMITTED]',
+ )
+
// Small tool result should be preserved
- const smallResultMessage = resultMessages.find((m: any) =>
- typeof m.content === 'string' && m.content.includes('Small result')
+ const smallResultMessage = resultMessages.find(
+ (m: any) => m.role === 'tool' && m.content?.toolName === 'code_search',
+ )
+ expect(smallResultMessage?.content?.output?.[0]?.value?.data).toBe(
+ 'Small result',
)
- expect(smallResultMessage?.content).toContain('Small result')
})
test('performs message-level pruning when other passes are insufficient', () => {
// Create many large messages to exceed token limit
const largeContent = 'z'.repeat(50000)
-
- const messages = Array.from({ length: 20 }, (_, i) =>
- createMessage(i % 2 === 0 ? 'user' : 'assistant', `Message ${i + 1}: ${largeContent}`)
+
+ const messages = Array.from({ length: 20 }, (_, i) =>
+ createMessage(
+ i % 2 === 0 ? 'user' : 'assistant',
+ `Message ${i + 1}: ${largeContent}`,
+ ),
)
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
const resultMessages = results[0].input.messages
-
+
// Should have fewer messages due to pruning
expect(resultMessages.length).toBeLessThan(messages.length)
-
+
// Should contain replacement messages
- const hasReplacementMessage = resultMessages.some((m: any) =>
- typeof m.content === 'string' && m.content.includes('Previous message(s) omitted due to length')
+ const hasReplacementMessage = resultMessages.some(
+ (m: any) =>
+ typeof m.content === 'string' &&
+ m.content.includes('Previous message(s) omitted due to length'),
)
expect(hasReplacementMessage).toBe(true)
})
test('preserves messages with keepDuringTruncation flag', () => {
const largeContent = 'w'.repeat(50000)
-
+
const messages = [
createMessage('user', `Message 1: ${largeContent}`),
- { ...createMessage('assistant', `Important message: ${largeContent}`), keepDuringTruncation: true },
+ {
+ ...createMessage('assistant', `Important message: ${largeContent}`),
+ keepDuringTruncation: true,
+ },
createMessage('user', `Message 3: ${largeContent}`),
] as any[]
@@ -193,10 +237,12 @@ describe('context-pruner handleSteps', () => {
expect(results).toHaveLength(1)
const resultMessages = results[0].input.messages
-
+
// Important message should be preserved
- const importantMessage = resultMessages.find((m: any) =>
- typeof m.content === 'string' && m.content.includes('Important message')
+ const importantMessage = resultMessages.find(
+ (m: any) =>
+ typeof m.content === 'string' &&
+ m.content.includes('Important message'),
)
expect(importantMessage).toBeDefined()
})
@@ -230,11 +276,11 @@ describe('context-pruner handleSteps', () => {
// Test the internal token counting logic indirectly
const shortMessage = createMessage('user', 'Hi')
const longMessage = createMessage('user', 'x'.repeat(300)) // ~100 tokens
-
+
// Short message should not trigger pruning
let results = runHandleSteps([shortMessage])
expect(results[0].input.messages).toHaveLength(1)
-
+
// Very long message should potentially trigger some processing
results = runHandleSteps([longMessage])
expect(results).toHaveLength(1)
@@ -250,15 +296,36 @@ describe('context-pruner edge cases', () => {
}
})
- const createMessage = (role: 'user' | 'assistant', content: string): Message => ({
+ const createMessage = (
+ role: 'user' | 'assistant',
+ content: string,
+ ): Message => ({
role,
content,
})
+ const createTerminalToolMessage = (command: string, output: string): any => ({
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolCallId: 'test-id',
+ toolName: 'run_terminal_command',
+ output: [
+ {
+ type: 'json',
+ value: {
+ command,
+ stdout: output,
+ },
+ },
+ ],
+ },
+ })
+
const runHandleSteps = (messages: Message[]) => {
mockAgentState.messageHistory = messages
const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
- const results: any[] = []
+ const results: ReturnType['value'][] = []
let result = generator.next()
while (!result.done) {
if (typeof result.value === 'object') {
@@ -269,69 +336,47 @@ describe('context-pruner edge cases', () => {
return results
}
- test('handles malformed terminal command tool results', () => {
+ test('handles terminal command tool results gracefully', () => {
const largeContent = 'x'.repeat(100000)
const messages = [
createMessage('user', largeContent),
- createMessage('assistant', 'run_terminal_command'), // Missing tool_result wrapper
- createMessage('assistant', 'run_terminal_command'), // Missing result
- createMessage('assistant', 'run_terminal_command[Output omitted]'),
+ createTerminalToolMessage('npm test', '[Output omitted]'),
+ createTerminalToolMessage('ls -la', 'file1.txt\nfile2.txt'),
]
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
- const resultMessages = results[0].input.messages
-
- // Should handle malformed entries gracefully
+ const resultMessages = (results[0] as any).input.messages
+
+ // Should handle terminal commands gracefully
expect(resultMessages.length).toBeGreaterThan(0)
-
+
// Valid terminal command should be processed correctly
- const validCommand = resultMessages.find((m: any) =>
- typeof m.content === 'string' && m.content.includes('run_terminal_command[Output omitted]')
+ const validCommand = resultMessages.find(
+ (m: any) =>
+ m.role === 'tool' && m.content?.toolName === 'run_terminal_command',
)
expect(validCommand).toBeDefined()
})
- test('handles nested tool results in terminal commands', () => {
- const largeContent = 'x'.repeat(100000)
- const nestedToolResult = `
- run_terminal_command[Output omitted]
-
-
- `
-
- const messages = [
- createMessage('user', largeContent),
- createMessage('assistant', nestedToolResult),
- ]
-
- const results = runHandleSteps(messages)
-
- expect(results).toHaveLength(1)
- // Should handle nested XML gracefully without breaking
- expect(results[0].input.messages).toBeDefined()
- })
-
test('handles exact token limit boundary', () => {
// Create content that when stringified is close to the 200k token limit
// 200k tokens ≈ 600k characters (rough approximation used in code)
const boundaryContent = 'x'.repeat(599000)
-
- const messages = [
- createMessage('user', boundaryContent),
- ]
+
+ const messages = [createMessage('user', boundaryContent)]
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
// Should handle boundary condition without errors
- expect(results[0].input.messages).toBeDefined()
+ expect((results[0] as any).input.messages).toBeDefined()
})
test('preserves message order after pruning', () => {
const largeContent = 'x'.repeat(50000)
-
+
const messages = [
createMessage('user', `First: ${largeContent}`),
createMessage('assistant', `Second: ${largeContent}`),
@@ -343,15 +388,23 @@ describe('context-pruner edge cases', () => {
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
- const resultMessages = results[0].input.messages
-
+ const resultMessages = (results[0] as any).input.messages
+
// Check that remaining messages maintain chronological order
let previousIndex = -1
resultMessages.forEach((message: any) => {
if (typeof message.content === 'string') {
- const match = message.content.match(/(First|Second|Third|Fourth|Fifth):/)
+ const match = message.content.match(
+ /(First|Second|Third|Fourth|Fifth):/,
+ )
if (match) {
- const currentIndex = ['First', 'Second', 'Third', 'Fourth', 'Fifth'].indexOf(match[1])
+ const currentIndex = [
+ 'First',
+ 'Second',
+ 'Third',
+ 'Fourth',
+ 'Fifth',
+ ].indexOf(match[1])
expect(currentIndex).toBeGreaterThan(previousIndex)
previousIndex = currentIndex
}
@@ -369,37 +422,55 @@ describe('context-pruner edge cases', () => {
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
- expect(results[0].input.messages).toHaveLength(3)
+ expect((results[0] as any).input.messages).toHaveLength(3)
})
test('handles tool results with various sizes around 1000 char threshold', () => {
// Create content large enough to exceed 200k token limit to trigger pruning
const largeContent = 'x'.repeat(150000)
-
+
+ const createToolMessage = (toolName: string, size: number): any => ({
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolCallId: 'test-id',
+ toolName,
+ output: [
+ {
+ type: 'json',
+ value: {
+ data: 'a'.repeat(size),
+ },
+ },
+ ],
+ },
+ })
+
const messages = [
createMessage('user', largeContent),
createMessage('assistant', largeContent),
createMessage('user', largeContent),
createMessage('assistant', largeContent),
- createMessage('assistant', `test${'a'.repeat(999)}`), // Just under 1000
- createMessage('assistant', `test${'b'.repeat(1000)}`), // Exactly 1000
- createMessage('assistant', `test${'c'.repeat(1001)}`), // Just over 1000
+ createToolMessage('test1', 500), // Small
+ createToolMessage('test2', 999), // Just under 1000 when stringified
+ createToolMessage('test3', 2000), // Large
]
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
- const resultMessages = results[0].input.messages
-
+ const resultMessages = (results[0] as any).input.messages
+
// Check that some tool result processing occurred
- const hasToolResults = resultMessages.some((m: any) =>
- typeof m.content === 'string' && m.content.includes('')
- )
+ const hasToolResults = resultMessages.some((m: any) => m.role === 'tool')
expect(hasToolResults).toBe(true)
-
- // Check that large tool result replacement occurred (may replace all tool results over 1000 chars)
- const hasLargeToolResultReplacement = resultMessages.some((m: any) =>
- typeof m.content === 'string' && m.content.includes('Large tool result omitted')
+
+ // Check that large tool result replacement occurred
+ const hasLargeToolResultReplacement = resultMessages.some(
+ (m: any) =>
+ m.role === 'tool' &&
+ m.content?.output?.[0]?.value?.message ===
+ '[LARGE_TOOL_RESULT_OMITTED]',
)
expect(hasLargeToolResultReplacement).toBe(true)
})
@@ -407,7 +478,8 @@ describe('context-pruner edge cases', () => {
test('handles spawn_agent_inline detection with variations', () => {
const testCases = [
{
- content: 'Regular message with spawn_agent_inline but not for other-agent',
+ content:
+ 'Regular message with spawn_agent_inline but not for other-agent',
shouldRemove: false,
},
{
@@ -436,11 +508,13 @@ describe('context-pruner edge cases', () => {
if (shouldRemove) {
// Should remove the assistant message and following 2 user messages
- expect(results[0].input.messages).toHaveLength(1)
- expect(results[0].input.messages[0]).toEqual(createMessage('user', 'Hello'))
+ expect(results).toHaveLength(1)
+ expect((results[0] as any).input.messages[0]).toEqual(
+ createMessage('user', 'Hello'),
+ )
} else {
// Should preserve all messages
- expect(results[0].input.messages).toHaveLength(4)
+ expect((results[0] as any).input.messages).toHaveLength(4)
}
})
})
@@ -448,29 +522,32 @@ describe('context-pruner edge cases', () => {
test('handles multiple consecutive replacement messages in pruning', () => {
// Create scenario where multiple consecutive messages would be replaced
const largeContent = 'x'.repeat(60000)
-
- const messages = Array.from({ length: 10 }, (_, i) =>
- createMessage('user', `Message ${i}: ${largeContent}`)
+
+ const messages = Array.from({ length: 10 }, (_, i) =>
+ createMessage('user', `Message ${i}: ${largeContent}`),
)
const results = runHandleSteps(messages)
expect(results).toHaveLength(1)
- const resultMessages = results[0].input.messages
-
+ const resultMessages = (results[0] as any).input.messages
+
// Should not have consecutive replacement messages
let consecutiveReplacements = 0
let maxConsecutive = 0
-
+
resultMessages.forEach((message: any) => {
- if (typeof message.content === 'string' && message.content.includes('Previous message(s) omitted')) {
+ if (
+ typeof message.content === 'string' &&
+ message.content.includes('Previous message(s) omitted')
+ ) {
consecutiveReplacements++
} else {
maxConsecutive = Math.max(maxConsecutive, consecutiveReplacements)
consecutiveReplacements = 0
}
})
-
+
maxConsecutive = Math.max(maxConsecutive, consecutiveReplacements)
expect(maxConsecutive).toBeLessThanOrEqual(1) // No more than 1 consecutive replacement
})
diff --git a/.agents/base2/base2-factory.ts b/.agents/base2/base2-factory.ts
index 8c0f11e16..3788e52fd 100644
--- a/.agents/base2/base2-factory.ts
+++ b/.agents/base2/base2-factory.ts
@@ -1,9 +1,9 @@
import { publisher } from '../constants'
-
import {
PLACEHOLDER,
type SecretAgentDefinition,
} from '../types/secret-agent-definition'
+
import type { ModelName } from 'types/agent-definition'
export const base2 = (model: ModelName): Omit => ({
@@ -76,6 +76,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
agent_type: 'context-pruner',
params: params ?? {},
},
+ includeToolCall: false,
} as any
const { stepsComplete } = yield 'STEP'
@@ -87,6 +88,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
role: 'user',
content: `You have reached the step limit. Please summarize your progress in plain text (no need to use set_output) so far and what you still need to solve. Immediately after summarizing, please end your turn. Do not use any tools except for the end_turn tool.`,
},
+ includeToolCall: false,
}
yield 'STEP'
break
diff --git a/.agents/base2/editor.ts b/.agents/base2/editor.ts
index 57ee04524..a95229766 100644
--- a/.agents/base2/editor.ts
+++ b/.agents/base2/editor.ts
@@ -1,5 +1,4 @@
import { publisher } from '../constants'
-
import {
PLACEHOLDER,
type SecretAgentDefinition,
@@ -150,6 +149,7 @@ ${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
content:
'You have reached the step limit. Please use the set_output tool now to summarize your progress so far, what you still need to solve, and provide any insights that could help complete the remaining work. Please end your turn after using the set_output tool with the end_turn tool.',
},
+ includeToolCall: false,
}
// One final step to produce the summary
diff --git a/.agents/base2/planner-factory.ts b/.agents/base2/planner-factory.ts
index 1561c3cbb..363179341 100644
--- a/.agents/base2/planner-factory.ts
+++ b/.agents/base2/planner-factory.ts
@@ -1,10 +1,11 @@
-import { ModelName, ToolCall } from 'types/agent-definition'
import { publisher } from '../constants'
import {
PLACEHOLDER,
type SecretAgentDefinition,
} from '../types/secret-agent-definition'
+import type { ModelName, ToolCall } from 'types/agent-definition'
+
export const plannerFactory = (
model: ModelName,
): Omit => ({
@@ -51,11 +52,23 @@ ${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
agentState.messageHistory
.slice(2)
.map((message) =>
- typeof message.content === 'string'
- ? message.content
- : message.content
- .map((content) => (content.type === 'text' ? content.text : ''))
- .join('\n'),
+ message.role === 'tool'
+ ? JSON.stringify(
+ {
+ toolName: message.content.toolName,
+ toolCallId: message.content.toolCallId,
+ output: message.content.output,
+ },
+ null,
+ 2,
+ )
+ : typeof message.content === 'string'
+ ? message.content
+ : message.content
+ .map((content) =>
+ content.type === 'text' ? content.text : '',
+ )
+ .join('\n'),
)
.join('\n')
diff --git a/.agents/changes-reviewer.ts b/.agents/changes-reviewer.ts
index 9182abd02..bff957fec 100644
--- a/.agents/changes-reviewer.ts
+++ b/.agents/changes-reviewer.ts
@@ -110,6 +110,7 @@ Use the following guidelines to review the changes and suggest improvements:
content:
'Now I will spawn a file explorer to find any missing codebase context, and then review the changes.',
},
+ includeToolCall: false,
}
yield 'STEP_ALL'
diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts
index f1d95e5d6..9b9604e3d 100644
--- a/.agents/context-pruner.ts
+++ b/.agents/context-pruner.ts
@@ -1,10 +1,8 @@
import { publisher } from './constants'
-import type {
- AgentDefinition,
- Message,
- ToolCall,
-} from './types/agent-definition'
+import type { AgentDefinition, ToolCall } from './types/agent-definition'
+import type { Message, ToolMessage } from './types/codebuff-message'
+import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
const definition: AgentDefinition = {
id: 'context-pruner',
@@ -43,31 +41,13 @@ const definition: AgentDefinition = {
let currentMessages = [...messages]
- // Find and remove context-pruner spawn_agent_inline call and following messages
- const lastAssistantMessageIndex = currentMessages.findLastIndex(
- (message) => message.role === 'assistant',
- )
- const lastAssistantMessage = currentMessages[lastAssistantMessageIndex]
- const lastAssistantMessageIsToolCall =
- typeof lastAssistantMessage?.content === 'string' &&
- lastAssistantMessage.content.includes('spawn_agent_inline') &&
- lastAssistantMessage.content.includes('context-pruner')
-
- if (lastAssistantMessageIsToolCall && lastAssistantMessageIndex >= 0) {
- // Remove tool call and any following messages.
- const messagesToRemove =
- currentMessages.length - lastAssistantMessageIndex
- currentMessages.splice(lastAssistantMessageIndex, messagesToRemove)
- }
-
- // Initial check - if already under limit, return (with inline agent tool call removed)
+ // Initial check - if already under limit, return
const initialTokens = countTokensJson(currentMessages)
if (initialTokens < maxMessageTokens) {
yield {
toolName: 'set_messages',
- input: {
- messages: currentMessages,
- },
+ input: { messages: currentMessages },
+ includeToolCall: false,
}
return
}
@@ -78,25 +58,41 @@ const definition: AgentDefinition = {
for (let i = currentMessages.length - 1; i >= 0; i--) {
const message = currentMessages[i]
- let processedContent =
- typeof message.content === 'string'
- ? message.content
- : JSON.stringify(message.content)
- if (processedContent.includes('run_terminal_command')) {
+ // Handle tool messages with new object format
+ if (
+ message.role === 'tool' &&
+ message.content.toolName === 'run_terminal_command'
+ ) {
+ const toolMessage =
+ message as CodebuffToolMessage<'run_terminal_command'>
+
if (numKeptTerminalCommands < numTerminalCommandsToKeep) {
numKeptTerminalCommands++
- afterTerminalPass.unshift({ ...message, content: processedContent })
+ afterTerminalPass.unshift(message)
} else {
- // Simplify terminal command result
- processedContent = processedContent.replace(
- /\s*run_terminal_command<\/tool>\s*[\s\S]*?<\/result>\s*<\/tool_result>/g,
- 'run_terminal_command[Output omitted]',
- )
- afterTerminalPass.unshift({ ...message, content: processedContent })
+ // Simplify terminal command result by replacing output
+ const simplifiedMessage: CodebuffToolMessage<'run_terminal_command'> =
+ {
+ ...toolMessage,
+ content: {
+ ...toolMessage.content,
+ output: [
+ {
+ type: 'json',
+ value: {
+ command:
+ toolMessage.content.output[0]?.value?.command || '',
+ stdoutOmittedForLength: true,
+ },
+ },
+ ],
+ },
+ }
+ afterTerminalPass.unshift(simplifiedMessage)
}
} else {
- afterTerminalPass.unshift({ ...message, content: processedContent })
+ afterTerminalPass.unshift(message)
}
}
@@ -108,28 +104,37 @@ const definition: AgentDefinition = {
input: {
messages: afterTerminalPass,
},
+ includeToolCall: false,
}
return
}
- // PASS 2: Remove large tool results (any tool result > 1000 chars)
+ // PASS 2: Remove large tool results (any tool result output > 1000 chars when stringified)
const afterToolResultsPass = afterTerminalPass.map((message) => {
- let processedContent =
- typeof message.content === 'string'
- ? message.content
- : JSON.stringify(message.content)
-
- if (
- processedContent.includes('') &&
- processedContent.length > 1000
- ) {
- processedContent = processedContent.replace(
- /[\s\S]*?<\/result>/g,
- '[Large tool result omitted]',
- )
+ if (message.role === 'tool') {
+ const outputSize = JSON.stringify(message.content.output).length
+
+ if (outputSize > 1000) {
+ // Replace with simplified output
+ const simplifiedMessage: ToolMessage = {
+ ...message,
+ content: {
+ ...message.content,
+ output: [
+ {
+ type: 'json',
+ value: {
+ message: '[LARGE_TOOL_RESULT_OMITTED]',
+ originalSize: outputSize,
+ },
+ },
+ ],
+ },
+ }
+ return simplifiedMessage
+ }
}
-
- return { ...message, content: processedContent }
+ return message
})
// Check if tool results pass was enough
@@ -140,7 +145,8 @@ const definition: AgentDefinition = {
input: {
messages: afterToolResultsPass,
},
- } satisfies ToolCall
+ includeToolCall: false,
+ } satisfies ToolCall<'set_messages'>
return
}
@@ -162,10 +168,7 @@ const definition: AgentDefinition = {
const filteredMessages: any[] = []
for (const message of afterToolResultsPass) {
- if (
- removedTokens >= tokensToRemove ||
- (message as any).keepDuringTruncation
- ) {
+ if (removedTokens >= tokensToRemove || message.keepDuringTruncation) {
filteredMessages.push(message)
continue
}
@@ -190,7 +193,8 @@ const definition: AgentDefinition = {
input: {
messages: finalMessages,
},
- } satisfies ToolCall
+ includeToolCall: false,
+ } satisfies ToolCall<'set_messages'>
},
}
diff --git a/.agents/factory/base.ts b/.agents/factory/base.ts
index 4fd3a563e..0064bae24 100644
--- a/.agents/factory/base.ts
+++ b/.agents/factory/base.ts
@@ -68,6 +68,7 @@ export const base = (model: ModelName): Omit => ({
agent_type: 'context-pruner',
params: params ?? {},
},
+ includeToolCall: false,
} as any
const { stepsComplete } = yield 'STEP'
diff --git a/.agents/git-committer.ts b/.agents/git-committer.ts
index 4701ceb9f..10c33216f 100644
--- a/.agents/git-committer.ts
+++ b/.agents/git-committer.ts
@@ -60,6 +60,7 @@ const definition: AgentDefinition = {
content:
"I've analyzed the git diff and recent commit history. Now I'll read any relevant files to better understand the context of these changes.",
},
+ includeToolCall: false,
}
// Step 3: Let AI generate a step to decide which files to read.
@@ -73,6 +74,7 @@ const definition: AgentDefinition = {
content:
"Now I'll analyze the changes and create a commit with a good commit message.",
},
+ includeToolCall: false,
}
yield 'STEP_ALL'
diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index cfdfb6633..fa71fef3f 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -14,6 +14,10 @@
* export default definition
*/
+import type { Message } from './codebuff-message'
+import type * as Tools from './tools'
+type ToolName = Tools.ToolName
+
// ============================================================================
// Agent Definition and Utility Types
// ============================================================================
@@ -201,25 +205,6 @@ export interface AgentState {
output: Record | undefined
}
-/**
- * Message in conversation history
- */
-export interface Message {
- role: 'user' | 'assistant'
- content:
- | string
- | Array<
- | {
- type: 'text'
- text: string
- }
- | {
- type: 'image'
- image: string
- }
- >
-}
-
/**
* Context provided to handleSteps generator function
*/
@@ -236,6 +221,7 @@ export type ToolCall = {
[K in T]: {
toolName: K
input: Tools.GetToolParams
+ includeToolCall?: boolean
}
}[T]
@@ -362,6 +348,4 @@ export type ModelName =
| 'z-ai/glm-4.5:nitro'
| (string & {})
-import type * as Tools from './tools'
export type { Tools }
-type ToolName = Tools.ToolName
diff --git a/.agents/types/codebuff-message.ts b/.agents/types/codebuff-message.ts
new file mode 100644
index 000000000..97b9fdc1a
--- /dev/null
+++ b/.agents/types/codebuff-message.ts
@@ -0,0 +1,77 @@
+import z from 'zod/v4'
+
+import {
+ filePartSchema,
+ imagePartSchema,
+ reasoningPartSchema,
+ textPartSchema,
+ toolCallPartSchema,
+ toolResultPartSchema,
+} from './content-part'
+import { providerMetadataSchema } from './provider-metadata'
+
+const auxiliaryDataSchema = z.object({
+ providerOptions: providerMetadataSchema.optional(),
+ timeToLive: z
+ .union([z.literal('agentStep'), z.literal('userPrompt')])
+ .optional(),
+ keepDuringTruncation: z.boolean().optional(),
+})
+
+export const systemMessageSchema = z
+ .object({
+ role: z.literal('system'),
+ content: z.string(),
+ })
+ .and(auxiliaryDataSchema)
+export type SystemMessage = z.infer
+
+export const userMessageSchema = z
+ .object({
+ role: z.literal('user'),
+ content: z.union([
+ z.string(),
+ z.union([textPartSchema, imagePartSchema, filePartSchema]).array(),
+ ]),
+ })
+ .and(auxiliaryDataSchema)
+export type UserMessage = z.infer
+
+export const assistantMessageSchema = z
+ .object({
+ role: z.literal('assistant'),
+ content: z.union([
+ z.string(),
+ z
+ .union([textPartSchema, reasoningPartSchema, toolCallPartSchema])
+ .array(),
+ ]),
+ })
+ .and(auxiliaryDataSchema)
+export type AssistantMessage = z.infer
+
+export const toolMessageSchema = z
+ .object({
+ role: z.literal('tool'),
+ content: toolResultPartSchema,
+ })
+ .and(auxiliaryDataSchema)
+export type ToolMessage = z.infer
+
+export const messageSchema = z
+ .union([
+ systemMessageSchema,
+ userMessageSchema,
+ assistantMessageSchema,
+ toolMessageSchema,
+ ])
+ .and(
+ z.object({
+ providerOptions: providerMetadataSchema.optional(),
+ timeToLive: z
+ .union([z.literal('agentStep'), z.literal('userPrompt')])
+ .optional(),
+ keepDuringTruncation: z.boolean().optional(),
+ }),
+ )
+export type Message = z.infer
diff --git a/.agents/types/content-part.ts b/.agents/types/content-part.ts
new file mode 100644
index 000000000..474ce335e
--- /dev/null
+++ b/.agents/types/content-part.ts
@@ -0,0 +1,68 @@
+import z from 'zod/v4'
+
+import { dataContentSchema } from './data-content'
+import { jsonValueSchema } from './json'
+import { providerMetadataSchema } from './provider-metadata'
+
+export const textPartSchema = z.object({
+ type: z.literal('text'),
+ text: z.string(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type TextPart = z.infer
+
+export const imagePartSchema = z.object({
+ type: z.literal('image'),
+ image: z.union([dataContentSchema, z.instanceof(URL)]),
+ mediaType: z.string().optional(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type ImagePart = z.infer
+
+export const filePartSchema = z.object({
+ type: z.literal('file'),
+ data: z.union([dataContentSchema, z.instanceof(URL)]),
+ filename: z.string().optional(),
+ mediaType: z.string(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type FilePart = z.infer
+
+export const reasoningPartSchema = z.object({
+ type: z.literal('reasoning'),
+ text: z.string(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type ReasoningPart = z.infer
+
+export const toolCallPartSchema = z.object({
+ type: z.literal('tool-call'),
+ toolCallId: z.string(),
+ toolName: z.string(),
+ input: z.record(z.string(), z.unknown()),
+ providerOptions: providerMetadataSchema.optional(),
+ providerExecuted: z.boolean().optional(),
+})
+export type ToolCallPart = z.infer
+
+export const toolResultOutputSchema = z.discriminatedUnion('type', [
+ z.object({
+ type: z.literal('json'),
+ value: jsonValueSchema,
+ }),
+ z.object({
+ type: z.literal('media'),
+ data: z.string(),
+ mediaType: z.string(),
+ }),
+])
+export type ToolResultOutput = z.infer
+
+export const toolResultPartSchema = z.object({
+ type: z.literal('tool-result'),
+ toolCallId: z.string(),
+ toolName: z.string(),
+ output: toolResultOutputSchema.array(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type ToolResultPart = z.infer
diff --git a/.agents/types/data-content.ts b/.agents/types/data-content.ts
new file mode 100644
index 000000000..eb5c2e1ff
--- /dev/null
+++ b/.agents/types/data-content.ts
@@ -0,0 +1,14 @@
+import z from 'zod/v4'
+
+export const dataContentSchema = z.union([
+ z.string(),
+ z.instanceof(Uint8Array),
+ z.instanceof(ArrayBuffer),
+ z.custom(
+ // Buffer might not be available in some environments such as CloudFlare:
+ (value: unknown): value is Buffer =>
+ globalThis.Buffer?.isBuffer(value) ?? false,
+ { message: 'Must be a Buffer' },
+ ),
+])
+export type DataContent = z.infer
diff --git a/.agents/types/json.ts b/.agents/types/json.ts
new file mode 100644
index 000000000..167f8d051
--- /dev/null
+++ b/.agents/types/json.ts
@@ -0,0 +1,29 @@
+import z from 'zod/v4'
+
+export type JSONValue =
+ | null
+ | string
+ | number
+ | boolean
+ | JSONObject
+ | JSONArray
+export const jsonValueSchema: z.ZodType = z.lazy(() =>
+ z.union([
+ z.null(),
+ z.string(),
+ z.number(),
+ z.boolean(),
+ jsonObjectSchema,
+ jsonArraySchema,
+ ]),
+)
+
+export const jsonObjectSchema: z.ZodType = z.lazy(() =>
+ z.record(z.string(), jsonValueSchema),
+)
+export type JSONObject = { [key: string]: JSONValue }
+
+export const jsonArraySchema: z.ZodType = z.lazy(() =>
+ z.array(jsonValueSchema),
+)
+export type JSONArray = JSONValue[]
diff --git a/.agents/types/provider-metadata.ts b/.agents/types/provider-metadata.ts
new file mode 100644
index 000000000..5749359fe
--- /dev/null
+++ b/.agents/types/provider-metadata.ts
@@ -0,0 +1,10 @@
+import z from 'zod/v4'
+
+import { jsonValueSchema } from './json'
+
+export const providerMetadataSchema = z.record(
+ z.string(),
+ z.record(z.string(), jsonValueSchema),
+)
+
+export type ProviderMetadata = z.infer
diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts
index 18b20f563..9acee92bd 100644
--- a/.agents/types/tools.ts
+++ b/.agents/types/tools.ts
@@ -1,3 +1,5 @@
+import type { Message } from './codebuff-message'
+
/**
* Union type of all available tool names
*/
@@ -118,15 +120,7 @@ export interface RunTerminalCommandParams {
* Set the conversation history to the provided messages.
*/
export interface SetMessagesParams {
- messages: {
- role: 'user' | 'assistant'
- content:
- | string
- | {
- type: 'text'
- text: string
- }[]
- }[]
+ messages: Message[]
}
/**
diff --git a/backend/src/__tests__/cost-aggregation.integration.test.ts b/backend/src/__tests__/cost-aggregation.integration.test.ts
index 3fa044395..75401f40e 100644
--- a/backend/src/__tests__/cost-aggregation.integration.test.ts
+++ b/backend/src/__tests__/cost-aggregation.integration.test.ts
@@ -198,19 +198,25 @@ describe('Cost Aggregation Integration Tests', () => {
async (ws, userInputId, toolName, input) => {
if (toolName === 'write_file') {
return {
- success: true,
- output: {
- type: 'text' as const,
- value: `File ${input.path} created successfully`,
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ message: `File ${input.path} created successfully`,
+ },
+ },
+ ],
}
}
return {
- success: true,
- output: {
- type: 'text' as const,
- value: 'Tool executed successfully',
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ message: 'Tool executed successfully',
+ },
+ },
+ ],
}
},
)
diff --git a/backend/src/__tests__/main-prompt.integration.test.ts b/backend/src/__tests__/main-prompt.integration.test.ts
index 64622a455..91eb9d7ab 100644
--- a/backend/src/__tests__/main-prompt.integration.test.ts
+++ b/backend/src/__tests__/main-prompt.integration.test.ts
@@ -18,7 +18,6 @@ import * as requestFilesPrompt from '../find-files/request-files-prompt'
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
import { mainPrompt } from '../main-prompt'
import { logger } from '../util/logger'
-import { renderReadFilesResult } from '../util/parse-tool-call-xml'
import * as websocketAction from '../websockets/websocket-action'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
@@ -91,8 +90,12 @@ describe.skip('mainPrompt (Integration)', () => {
input: Record,
) => {
return {
- success: true,
- result: `Tool call success: ${{ toolName, input }}` as any,
+ output: [
+ {
+ type: 'json',
+ value: `Tool call success: ${{ toolName, input }}`,
+ },
+ ],
}
},
)
@@ -352,16 +355,21 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) {
}),
},
{
- role: 'user',
- content: renderReadFilesResult(
- [
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'read_files',
+ toolCallId: 'test-id',
+ output: [
{
- path: 'src/util/messages.ts',
- content: initialContent,
+ type: 'json',
+ value: {
+ path: 'src/util/messages.ts',
+ content: initialContent,
+ },
},
],
- {},
- ),
+ },
},
)
@@ -437,16 +445,21 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) {
}),
},
{
- role: 'user',
- content: renderReadFilesResult(
- [
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'read_files',
+ toolCallId: 'test-id',
+ output: [
{
- path: 'packages/backend/src/index.ts',
- content: initialContent,
+ type: 'json',
+ value: {
+ path: 'packages/backend/src/index.ts',
+ content: initialContent,
+ },
},
],
- {},
- ),
+ },
},
)
diff --git a/backend/src/__tests__/main-prompt.test.ts b/backend/src/__tests__/main-prompt.test.ts
index 5e3edbccc..c27c4515e 100644
--- a/backend/src/__tests__/main-prompt.test.ts
+++ b/backend/src/__tests__/main-prompt.test.ts
@@ -5,10 +5,7 @@ import {
clearMockedModules,
mockModule,
} from '@codebuff/common/testing/mock-modules'
-import {
- getToolCallString,
- renderToolResults,
-} from '@codebuff/common/tools/utils'
+import { getToolCallString } from '@codebuff/common/tools/utils'
import {
AgentTemplateTypes,
getInitialSessionState,
@@ -159,8 +156,12 @@ describe('mainPrompt', () => {
timeout: number = 30_000,
) => {
return {
- success: true,
- result: `Tool call success: ${{ toolName, input }}` as any,
+ output: [
+ {
+ type: 'json',
+ value: `Tool call success: ${{ toolName, input }}`,
+ },
+ ],
}
},
)
@@ -229,18 +230,21 @@ describe('mainPrompt', () => {
const sessionState = getInitialSessionState(mockFileContext)
// Simulate a previous read_files result being in the history
sessionState.mainAgentState.messageHistory.push({
- role: 'user',
- content: renderToolResults([
- {
- toolCallId: 'prev-read',
- toolName: 'read_files',
- output: {
- type: 'text',
- value:
- '\ntest.txt\nold content\n',
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolCallId: 'prev-read',
+ toolName: 'read_files',
+ output: [
+ {
+ type: 'json',
+ value: {
+ path: 'test.txt',
+ content: 'old content',
+ },
},
- },
- ]),
+ ],
+ },
})
const action = {
@@ -299,23 +303,18 @@ describe('mainPrompt', () => {
// It's usually the message right before the final assistant response.
const toolResultMessages =
newSessionState.mainAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes(''),
+ (m) => m.role === 'tool',
)
// Find the specific tool result message that contains file_updates
const fileUpdateMessage = toolResultMessages.find(
- (m) =>
- typeof m.content === 'string' &&
- m.content.includes('read_files'),
+ (m) => m.content.toolName === 'read_files',
)
expect(fileUpdateMessage).toBeDefined()
- expect(fileUpdateMessage?.content).toContain('test.txt')
+ expect(JSON.stringify(fileUpdateMessage?.content)).toContain('test.txt')
// Check that the content reflects the *new* mock content within the file_updates result
- expect(fileUpdateMessage?.content).toContain('old content')
+ expect(JSON.stringify(fileUpdateMessage?.content)).toContain('old content')
})
it('should handle direct terminal command', async () => {
@@ -365,10 +364,7 @@ describe('mainPrompt', () => {
// Verify that a tool result was added to message history
const toolResultMessages =
newSessionState.mainAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes(''),
+ (m) => m.role === 'tool',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
})
diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts
index 70f59ecf3..a3343f002 100644
--- a/backend/src/__tests__/read-docs-tool.test.ts
+++ b/backend/src/__tests__/read-docs-tool.test.ts
@@ -19,6 +19,7 @@ import {
test,
} from 'bun:test'
+import researcherAgent from '../../../.agents/researcher'
import * as checkTerminalCommandModule from '../check-terminal-command'
import * as requestFilesPrompt from '../find-files/request-files-prompt'
import * as liveUserInputs from '../live-user-inputs'
@@ -28,7 +29,6 @@ import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
import { runAgentStep } from '../run-agent-step'
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
import * as websocketAction from '../websockets/websocket-action'
-import researcherAgent from '../../../.agents/researcher'
import type { WebSocket } from 'ws'
@@ -106,8 +106,12 @@ describe('read_docs tool with researcher agent', () => {
websocketAction,
'requestToolCall',
).mockImplementation(async () => ({
- success: true,
- result: 'Tool call success' as any,
+ output: [
+ {
+ type: 'json',
+ value: 'Tool call success',
+ },
+ ],
}))
mockedFunctions.push({
name: 'websocketAction.requestToolCall',
@@ -336,15 +340,12 @@ describe('read_docs tool with researcher agent', () => {
// Check that the documentation was added to the message history
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('read_docs'),
+ (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- mockDocumentation,
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain(JSON.stringify(mockDocumentation).slice(1, -1))
}, 10000)
test('should fetch documentation with topic and max_tokens', async () => {
@@ -458,15 +459,12 @@ describe('read_docs tool with researcher agent', () => {
// Check that the "no documentation found" message was added
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('read_docs'),
+ (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'No documentation found for "NonExistentLibrary"',
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('No documentation found for \\"NonExistentLibrary\\"')
}, 10000)
test('should handle API errors gracefully', async () => {
@@ -530,18 +528,15 @@ describe('read_docs tool with researcher agent', () => {
// Check that the error message was added
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('read_docs'),
+ (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Error fetching documentation for "React"',
- )
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Network timeout',
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Error fetching documentation for \\"React\\"')
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Network timeout')
}, 10000)
test('should include topic in error message when specified', async () => {
@@ -601,14 +596,13 @@ describe('read_docs tool with researcher agent', () => {
// Check that the topic is included in the error message
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('read_docs'),
+ (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'No documentation found for "React" with topic "server-components"',
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain(
+ 'No documentation found for \\"React\\" with topic \\"server-components\\"',
)
}, 10000)
@@ -671,17 +665,14 @@ describe('read_docs tool with researcher agent', () => {
// Check that the generic error message was added
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('read_docs'),
+ (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Error fetching documentation for "React"',
- )
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Unknown error',
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Error fetching documentation for \\"React\\"')
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Unknown error')
}, 10000)
})
diff --git a/backend/src/__tests__/request-files-prompt.test.ts b/backend/src/__tests__/request-files-prompt.test.ts
index 9afc1e92a..eb496fb6a 100644
--- a/backend/src/__tests__/request-files-prompt.test.ts
+++ b/backend/src/__tests__/request-files-prompt.test.ts
@@ -12,7 +12,7 @@ import * as OriginalRequestFilesPromptModule from '../find-files/request-files-p
import * as geminiWithFallbacksModule from '../llm-apis/gemini-with-fallbacks'
import type { CostMode } from '@codebuff/common/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { Mock } from 'bun:test'
@@ -52,9 +52,7 @@ bunMockFn.module('@codebuff/bigquery', () => ({
}))
describe('requestRelevantFiles', () => {
- const mockMessages: CodebuffMessage[] = [
- { role: 'user', content: 'test prompt' },
- ]
+ const mockMessages: Message[] = [{ role: 'user', content: 'test prompt' }]
const mockSystem = 'test system'
const mockFileContext: ProjectFileContext = {
projectRoot: '/test/project',
diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index a67d522d4..b42a0de9e 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -4,7 +4,6 @@ import {
clearMockedModules,
mockModule,
} from '@codebuff/common/testing/mock-modules'
-import { renderToolResults } from '@codebuff/common/tools/utils'
import { getInitialSessionState } from '@codebuff/common/types/session-state'
import {
afterAll,
@@ -24,16 +23,16 @@ import {
} from '../run-programmatic-step'
import { mockFileContext, MockWebSocket } from './test-utils'
import * as toolExecutor from '../tools/tool-executor'
-import { asSystemMessage } from '../util/messages'
import * as requestContext from '../websockets/request-context'
import type { AgentTemplate, StepGenerator } from '../templates/types'
+import type { PublicAgentState } from '@codebuff/common/types/agent-template'
import type {
- AgentState,
- ToolResult,
-} from '@codebuff/common/types/session-state'
+ ToolResultOutput,
+ ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
+import type { AgentState } from '@codebuff/common/types/session-state'
import type { WebSocket } from 'ws'
-import { PublicAgentState } from '@codebuff/common/types/agent-template'
describe('runProgrammaticStep', () => {
let mockTemplate: AgentTemplate
@@ -206,6 +205,7 @@ describe('runProgrammaticStep', () => {
yield {
toolName: 'add_message',
input: { role: 'user', content: 'Hello world' },
+ includeToolCall: false,
}
yield { toolName: 'read_files', input: { paths: ['test.txt'] } }
yield { toolName: 'end_turn', input: {} }
@@ -302,35 +302,27 @@ describe('runProgrammaticStep', () => {
// Mock executeToolCall to simulate find_files tool result
executeToolCallSpy.mockImplementation(async (options: any) => {
if (options.toolName === 'find_files') {
- const toolResult: ToolResult = {
+ const toolResult: ToolResultPart = {
+ type: 'tool-result',
toolName: 'find_files',
toolCallId: 'find-files-call-id',
- output: {
- type: 'text',
- value: JSON.stringify({
- files: [
- { path: 'src/auth.ts', relevance: 0.9 },
- { path: 'src/login.ts', relevance: 0.8 },
- ],
- }),
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ files: [
+ { path: 'src/auth.ts', relevance: 0.9 },
+ { path: 'src/login.ts', relevance: 0.8 },
+ ],
+ },
+ },
+ ],
}
options.toolResults.push(toolResult)
- // Add tool result to state.messages like the real implementation
- // This mimics what tool-executor.ts does: state.messages.push({ role: 'user', content: asSystemMessage(renderToolResults([toolResult])) })
- const formattedToolResult = asSystemMessage(
- renderToolResults([
- {
- toolName: toolResult.toolName,
- toolCallId: toolResult.toolCallId,
- output: toolResult.output,
- },
- ]),
- )
options.state.messages.push({
- role: 'user',
- content: formattedToolResult,
+ role: 'tool',
+ content: toolResult,
})
}
// Return a value to satisfy the call
@@ -351,13 +343,12 @@ describe('runProgrammaticStep', () => {
// Verify tool result was added to messageHistory
const toolMessages = result.agentState.messageHistory.filter(
(msg) =>
- msg.role === 'user' &&
- typeof msg.content === 'string' &&
- msg.content.includes('src/auth.ts'),
+ msg.role === 'tool' &&
+ JSON.stringify(msg.content.output).includes('src/auth.ts'),
)
expect(toolMessages).toHaveLength(1)
- expect(toolMessages[0].content).toContain('src/auth.ts')
- expect(toolMessages[0].content).toContain('src/login.ts')
+ expect(JSON.stringify(toolMessages[0].content)).toContain('src/auth.ts')
+ expect(JSON.stringify(toolMessages[0].content)).toContain('src/login.ts')
expect(result.endTurn).toBe(true)
})
@@ -382,7 +373,7 @@ describe('runProgrammaticStep', () => {
it('should comprehensively test STEP_ALL functionality with multiple tools and state management', async () => {
// Track all tool results and state changes for verification
- const toolResultsReceived: (string | undefined)[] = []
+ const toolResultsReceived: ToolResultOutput[][] = []
const stateSnapshots: PublicAgentState[] = []
let stepCount = 0
@@ -535,23 +526,22 @@ describe('runProgrammaticStep', () => {
result = `${toolName} executed successfully`
}
- const toolResult: ToolResult = {
+ const toolResult: ToolResultPart = {
+ type: 'tool-result',
toolName,
toolCallId: `${toolName}-call-id`,
- output: {
- type: 'text',
- value: result,
- },
+ output: [
+ {
+ type: 'json',
+ value: result,
+ },
+ ],
}
toolResults.push(toolResult)
- // Add tool result to state.messages like the real implementation
- const formattedToolResult = asSystemMessage(
- renderToolResults([toolResult]),
- )
state.messages.push({
role: 'user',
- content: formattedToolResult,
+ content: toolResult,
})
})
@@ -580,9 +570,11 @@ describe('runProgrammaticStep', () => {
// Verify tool results were passed back to generator
expect(toolResultsReceived).toHaveLength(7)
- expect(toolResultsReceived[0]).toContain('authenticate')
- expect(toolResultsReceived[3]).toContain('auth-analysis')
- expect(toolResultsReceived[6]).toContain('Output set successfully')
+ expect(JSON.stringify(toolResultsReceived[0])).toContain('authenticate')
+ expect(JSON.stringify(toolResultsReceived[3])).toContain('auth-analysis')
+ expect(JSON.stringify(toolResultsReceived[6])).toContain(
+ 'Output set successfully',
+ )
// Verify state management throughout execution
expect(stateSnapshots).toHaveLength(7)
@@ -643,8 +635,8 @@ describe('runProgrammaticStep', () => {
})
it('should pass tool results back to generator', async () => {
- const toolResults: ToolResult[] = []
- let receivedToolResult: string | undefined
+ const toolResults: ToolResultPart[] = []
+ let receivedToolResult: ToolResultOutput[] | undefined
const mockGenerator = (function* () {
const input1 = yield {
@@ -661,19 +653,27 @@ describe('runProgrammaticStep', () => {
executeToolCallSpy.mockImplementation(async (options: any) => {
if (options.toolName === 'read_files') {
options.toolResults.push({
+ type: 'tool-result',
toolName: 'read_files',
toolCallId: 'test-id',
- output: {
- type: 'text',
- value: 'file content',
- },
- })
+ output: [
+ {
+ type: 'json',
+ value: 'file content',
+ },
+ ],
+ } satisfies ToolResultPart)
}
})
await runProgrammaticStep(mockAgentState, mockParams)
- expect(receivedToolResult).toEqual('file content')
+ expect(receivedToolResult).toEqual([
+ {
+ type: 'json',
+ value: 'file content',
+ },
+ ])
})
})
diff --git a/backend/src/__tests__/spawn-agents-message-history.test.ts b/backend/src/__tests__/spawn-agents-message-history.test.ts
index a01c97320..5c3f464ec 100644
--- a/backend/src/__tests__/spawn-agents-message-history.test.ts
+++ b/backend/src/__tests__/spawn-agents-message-history.test.ts
@@ -17,7 +17,7 @@ import * as loggerModule from '../util/logger'
import type { CodebuffToolCall } from '@codebuff/common/tools/list'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { WebSocket } from 'ws'
describe('Spawn Agents Message History', () => {
@@ -102,7 +102,7 @@ describe('Spawn Agents Message History', () => {
const toolCall = createSpawnToolCall('child-agent')
// Create mock messages including system message
- const mockMessages: CodebuffMessage[] = [
+ const mockMessages: Message[] = [
{
role: 'system',
content: 'This is the parent system prompt that should be excluded',
@@ -183,7 +183,7 @@ describe('Spawn Agents Message History', () => {
const sessionState = getInitialSessionState(mockFileContext)
const toolCall = createSpawnToolCall('child-agent')
- const mockMessages: CodebuffMessage[] = [
+ const mockMessages: Message[] = [
{ role: 'system', content: 'System prompt' },
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi there!' },
@@ -222,7 +222,7 @@ describe('Spawn Agents Message History', () => {
const sessionState = getInitialSessionState(mockFileContext)
const toolCall = createSpawnToolCall('child-agent')
- const mockMessages: CodebuffMessage[] = [] // Empty message history
+ const mockMessages: Message[] = [] // Empty message history
const { result } = handleSpawnAgents({
previousToolCallFinished: Promise.resolve(),
@@ -259,7 +259,7 @@ describe('Spawn Agents Message History', () => {
const sessionState = getInitialSessionState(mockFileContext)
const toolCall = createSpawnToolCall('child-agent')
- const mockMessages: CodebuffMessage[] = [
+ const mockMessages: Message[] = [
{ role: 'system', content: 'System prompt 1' },
{ role: 'system', content: 'System prompt 2' },
]
diff --git a/backend/src/__tests__/spawn-agents-permissions.test.ts b/backend/src/__tests__/spawn-agents-permissions.test.ts
index ebcad7b9e..a8cc15741 100644
--- a/backend/src/__tests__/spawn-agents-permissions.test.ts
+++ b/backend/src/__tests__/spawn-agents-permissions.test.ts
@@ -261,7 +261,7 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Mock agent response')
+ expect(JSON.stringify(output)).toContain('Mock agent response')
expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1)
})
@@ -293,8 +293,8 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Error spawning agent')
- expect(output).toContain(
+ expect(JSON.stringify(output)).toContain('Error spawning agent')
+ expect(JSON.stringify(output)).toContain(
'is not allowed to spawn child agent type reviewer',
)
expect(mockLoopAgentSteps).not.toHaveBeenCalled()
@@ -327,8 +327,10 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Error spawning agent')
- expect(output).toContain('Agent type nonexistent not found')
+ expect(JSON.stringify(output)).toContain('Error spawning agent')
+ expect(JSON.stringify(output)).toContain(
+ 'Agent type nonexistent not found',
+ )
expect(mockLoopAgentSteps).not.toHaveBeenCalled()
})
@@ -360,7 +362,7 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Mock agent response')
+ expect(JSON.stringify(output)).toContain('Mock agent response')
expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1)
})
@@ -395,7 +397,7 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Mock agent response')
+ expect(JSON.stringify(output)).toContain('Mock agent response')
expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1)
})
@@ -427,8 +429,10 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Error spawning agent')
- expect(output).toContain('is not allowed to spawn child agent type')
+ expect(JSON.stringify(output)).toContain('Error spawning agent')
+ expect(JSON.stringify(output)).toContain(
+ 'is not allowed to spawn child agent type',
+ )
expect(mockLoopAgentSteps).not.toHaveBeenCalled()
})
@@ -474,9 +478,9 @@ describe('Spawn Agents Permissions', () => {
})
const output = await result
- expect(output).toContain('Mock agent response') // Successful thinker spawn
- expect(output).toContain('Error spawning agent') // Failed reviewer spawn
- expect(output).toContain(
+ expect(JSON.stringify(output)).toContain('Mock agent response') // Successful thinker spawn
+ expect(JSON.stringify(output)).toContain('Error spawning agent') // Failed reviewer spawn
+ expect(JSON.stringify(output)).toContain(
'is not allowed to spawn child agent type reviewer',
)
expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1) // Only thinker was spawned
diff --git a/backend/src/__tests__/web-search-tool.test.ts b/backend/src/__tests__/web-search-tool.test.ts
index 0132aba1a..89500b8e3 100644
--- a/backend/src/__tests__/web-search-tool.test.ts
+++ b/backend/src/__tests__/web-search-tool.test.ts
@@ -22,6 +22,7 @@ import {
test,
} from 'bun:test'
+import researcherAgent from '../../../.agents/researcher'
import * as checkTerminalCommandModule from '../check-terminal-command'
import * as requestFilesPrompt from '../find-files/request-files-prompt'
import * as liveUserInputs from '../live-user-inputs'
@@ -31,7 +32,6 @@ import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
import { runAgentStep } from '../run-agent-step'
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
import * as websocketAction from '../websockets/websocket-action'
-import researcherAgent from '../../../.agents/researcher'
import type { WebSocket } from 'ws'
@@ -62,8 +62,12 @@ describe('web_search tool with researcher agent', () => {
spyOn(websocketAction, 'requestFiles').mockImplementation(async () => ({}))
spyOn(websocketAction, 'requestFile').mockImplementation(async () => null)
spyOn(websocketAction, 'requestToolCall').mockImplementation(async () => ({
- success: true,
- result: 'Tool call success' as any,
+ output: [
+ {
+ type: 'json',
+ value: 'Tool call success',
+ },
+ ],
}))
// Mock LLM APIs
@@ -121,7 +125,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
await runAgentStep(new MockWebSocket() as unknown as WebSocket, {
userId: TEST_USER_ID,
@@ -165,7 +171,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
const { agentState: newAgentState } = await runAgentStep(
new MockWebSocket() as unknown as WebSocket,
@@ -193,15 +201,12 @@ describe('web_search tool with researcher agent', () => {
// Check that the search results were added to the message history
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('web_search'),
+ (m) => m.role === 'tool' && m.content.toolName === 'web_search',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- mockSearchResult,
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain(mockSearchResult)
})
test('should handle custom depth parameter', async () => {
@@ -227,7 +232,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
await runAgentStep(new MockWebSocket() as unknown as WebSocket, {
userId: TEST_USER_ID,
@@ -268,7 +275,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
const { agentState: newAgentState } = await runAgentStep(
new MockWebSocket() as unknown as WebSocket,
@@ -297,15 +306,12 @@ describe('web_search tool with researcher agent', () => {
// Check that the "no results found" message was added
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('web_search'),
+ (m) => m.role === 'tool' && m.content.toolName === 'web_search',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'No search results found',
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('No search results found')
})
test('should handle API errors gracefully', async () => {
@@ -329,7 +335,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
const { agentState: newAgentState } = await runAgentStep(
new MockWebSocket() as unknown as WebSocket,
@@ -355,18 +363,15 @@ describe('web_search tool with researcher agent', () => {
// Check that the error message was added
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('web_search'),
+ (m) => m.role === 'tool' && m.content.toolName === 'web_search',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Error performing web search',
- )
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Linkup API timeout',
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Error performing web search')
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Linkup API timeout')
})
test('should handle null response from searchWeb', async () => {
@@ -386,7 +391,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
const { agentState: newAgentState } = await runAgentStep(
new MockWebSocket() as unknown as WebSocket,
@@ -430,7 +437,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
const { agentState: newAgentState } = await runAgentStep(
new MockWebSocket() as unknown as WebSocket,
@@ -456,15 +465,12 @@ describe('web_search tool with researcher agent', () => {
// Check that the error message was added
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('web_search'),
+ (m) => m.role === 'tool' && m.content.toolName === 'web_search',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- 'Error performing web search',
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain('Error performing web search')
})
test('should format search results correctly', async () => {
@@ -489,7 +495,9 @@ describe('web_search tool with researcher agent', () => {
...sessionState.mainAgentState,
agentType: 'researcher' as const,
}
- const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+ const { agentTemplates } = assembleLocalAgentTemplates(
+ mockFileContextWithAgents,
+ )
const { agentState: newAgentState } = await runAgentStep(
new MockWebSocket() as unknown as WebSocket,
@@ -515,14 +523,11 @@ describe('web_search tool with researcher agent', () => {
// Check that the search results were formatted correctly
const toolResultMessages = newAgentState.messageHistory.filter(
- (m) =>
- m.role === 'user' &&
- typeof m.content === 'string' &&
- m.content.includes('web_search'),
+ (m) => m.role === 'tool' && m.content.toolName === 'web_search',
)
expect(toolResultMessages.length).toBeGreaterThan(0)
- expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
- mockSearchResult,
- )
+ expect(
+ JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+ ).toContain(mockSearchResult)
})
})
diff --git a/backend/src/admin/relabelRuns.ts b/backend/src/admin/relabelRuns.ts
index eb85e8182..73a58c42f 100644
--- a/backend/src/admin/relabelRuns.ts
+++ b/backend/src/admin/relabelRuns.ts
@@ -13,11 +13,9 @@ import { generateCompactId } from '@codebuff/common/util/string'
import { closeXml } from '@codebuff/common/util/xml'
import { rerank } from '../llm-apis/relace-api'
-import {
- promptAiSdk,
- transformMessages,
-} from '../llm-apis/vercel-ai-sdk/ai-sdk'
+import { promptAiSdk } from '../llm-apis/vercel-ai-sdk/ai-sdk'
import { logger } from '../util/logger'
+import { messagesWithSystem } from '../util/messages'
import type { System } from '../llm-apis/claude'
import type {
@@ -27,7 +25,7 @@ import type {
GetRelevantFilesTrace,
Relabel,
} from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { Request, Response } from 'express'
// --- GET Handler Logic ---
@@ -171,8 +169,8 @@ export async function relabelForUserHandler(req: Request, res: Response) {
const system = payload.system
output = await promptAiSdk({
- messages: transformMessages(
- messages as CodebuffMessage[],
+ messages: messagesWithSystem(
+ messages as Message[],
system as System,
),
model: model,
@@ -397,10 +395,7 @@ export async function relabelWithClaudeWithFullFileContext(
}
const output = await promptAiSdk({
- messages: transformMessages(
- trace.payload.messages as CodebuffMessage[],
- system,
- ),
+ messages: messagesWithSystem(trace.payload.messages as Message[], system),
model: model as any, // Model type is string here for flexibility
clientSessionId: 'relabel-trace-api',
fingerprintId: 'relabel-trace-api',
diff --git a/backend/src/fast-rewrite.ts b/backend/src/fast-rewrite.ts
index 5fc6f6148..1d7537ce8 100644
--- a/backend/src/fast-rewrite.ts
+++ b/backend/src/fast-rewrite.ts
@@ -1,9 +1,6 @@
import { geminiModels, openaiModels } from '@codebuff/common/constants'
import { buildArray } from '@codebuff/common/util/array'
-import {
- parseFileBlocks,
- parseMarkdownCodeBlock,
-} from '@codebuff/common/util/file'
+import { parseMarkdownCodeBlock } from '@codebuff/common/util/file'
import { generateCompactId, hasLazyEdit } from '@codebuff/common/util/string'
import { promptFlashWithFallbacks } from './llm-apis/gemini-with-fallbacks'
@@ -11,7 +8,11 @@ import { promptRelaceAI } from './llm-apis/relace-api'
import { promptAiSdk } from './llm-apis/vercel-ai-sdk/ai-sdk'
import { logger } from './util/logger'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
+import type {
+ Message,
+ ToolMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
export async function fastRewrite(
initialContent: string,
@@ -135,23 +136,34 @@ export const shouldAddFilePlaceholders = async (
filePath: string,
oldContent: string,
rewrittenNewContent: string,
- messageHistory: CodebuffMessage[],
+ messageHistory: Message[],
fullResponse: string,
userId: string | undefined,
clientSessionId: string,
fingerprintId: string,
userInputId: string,
) => {
- const fileBlocks = parseFileBlocks(
- messageHistory
- .map((message) =>
- typeof message.content === 'string'
- ? message.content
- : message.content.map((c) => ('text' in c ? c.text : '')).join('\n'),
- )
- .join('\n') + fullResponse,
- )
- const fileWasPreviouslyEdited = Object.keys(fileBlocks).includes(filePath)
+ const fileWasPreviouslyEdited = messageHistory
+ .filter(
+ (
+ m,
+ ): m is ToolMessage & {
+ content: { toolName: 'create_plan' | 'str_replace' | 'write_file' }
+ } => {
+ return (
+ m.role === 'tool' &&
+ (m.content.toolName === 'create_plan' ||
+ m.content.toolName === 'str_replace' ||
+ m.content.toolName === 'write_file')
+ )
+ },
+ )
+ .some((m) => {
+ const message = m as CodebuffToolMessage<
+ 'create_plan' | 'str_replace' | 'write_file'
+ >
+ return message.content.output[0].value.file === filePath
+ })
if (!fileWasPreviouslyEdited) {
// If Claude hasn't edited this file before, it's almost certainly not a local-only change.
// Usually, it's only when Claude is editing a function for a second or third time that
diff --git a/backend/src/find-files/request-files-prompt.ts b/backend/src/find-files/request-files-prompt.ts
index 3cdbb766d..905592fb9 100644
--- a/backend/src/find-files/request-files-prompt.ts
+++ b/backend/src/find-files/request-files-prompt.ts
@@ -29,7 +29,7 @@ import type {
GetExpandedFileContextForTrainingTrace,
GetRelevantFilesTrace,
} from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { ProjectFileContext } from '@codebuff/common/util/file'
const NUMBER_OF_EXAMPLE_FILES = 100
@@ -123,7 +123,7 @@ export async function requestRelevantFiles(
messages,
system,
}: {
- messages: CodebuffMessage[]
+ messages: Message[]
system: string | Array
},
fileContext: ProjectFileContext,
@@ -221,7 +221,7 @@ export async function requestRelevantFilesForTraining(
messages,
system,
}: {
- messages: CodebuffMessage[]
+ messages: Message[]
system: string | Array
},
fileContext: ProjectFileContext,
@@ -302,7 +302,7 @@ async function getRelevantFiles(
messages,
system,
}: {
- messages: CodebuffMessage[]
+ messages: Message[]
system: string | Array
},
userPrompt: string,
@@ -385,7 +385,7 @@ async function getRelevantFilesForTraining(
messages,
system,
}: {
- messages: CodebuffMessage[]
+ messages: Message[]
system: string | Array
},
userPrompt: string,
diff --git a/backend/src/get-file-reading-updates.ts b/backend/src/get-file-reading-updates.ts
index 1ea30e387..41dec21ad 100644
--- a/backend/src/get-file-reading-updates.ts
+++ b/backend/src/get-file-reading-updates.ts
@@ -1,19 +1,13 @@
import { HIDDEN_FILE_READ_STATUS } from '@codebuff/common/constants'
-import { parseFileBlocks } from '@codebuff/common/util/file'
-import { toContentString } from '@codebuff/common/util/messages'
import { countTokens } from 'gpt-tokenizer'
import { uniq, difference } from 'lodash'
import { logger } from './util/logger'
-import {
- isToolResult,
- parseToolResults,
- parseReadFilesResult,
-} from './util/parse-tool-call-xml'
+import { getEditedFiles, getPreviouslyReadFiles } from './util/messages'
import { countTokensJson } from './util/token-counter'
import { requestFiles } from './websockets/websocket-action'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
@@ -39,7 +33,7 @@ const getInitialFiles = (fileContext: ProjectFileContext) => {
export async function getFileReadingUpdates(
ws: WebSocket,
- messages: CodebuffMessage[],
+ messages: Message[],
fileContext: ProjectFileContext,
options: {
requestedFiles?: string[]
@@ -50,27 +44,25 @@ export async function getFileReadingUpdates(
userId: string | undefined
repoId: string | undefined
},
-) {
+): Promise<{
+ addedFiles: {
+ path: string
+ content: string
+ }[]
+ updatedFilePaths: string[]
+ printedPaths: string[]
+ clearReadFileToolResults: boolean
+}> {
const FILE_TOKEN_BUDGET = 100_000
- const toolResults = messages
- .filter(isToolResult)
- .flatMap((content) => parseToolResults(toContentString(content)))
- const previousFileList = toolResults
- .filter(({ toolName }) => toolName === 'read_files')
- .flatMap(({ output }) => parseReadFilesResult(output.value))
+ const previousFileList = getPreviouslyReadFiles(messages)
const previousFiles = Object.fromEntries(
previousFileList.map(({ path, content }) => [path, content]),
)
const previousFilePaths = uniq(Object.keys(previousFiles))
- const editedFilePaths = messages
- .filter(({ role }) => role === 'assistant')
- .map(toContentString)
- .filter((content) => content.includes(' Object.keys(parseFileBlocks(content)))
- .filter((path) => path !== undefined)
+ const editedFilePaths = getEditedFiles(messages)
const requestedFiles = options.requestedFiles ?? []
diff --git a/backend/src/llm-apis/gemini-with-fallbacks.ts b/backend/src/llm-apis/gemini-with-fallbacks.ts
index 30f529df9..4f76ffd6e 100644
--- a/backend/src/llm-apis/gemini-with-fallbacks.ts
+++ b/backend/src/llm-apis/gemini-with-fallbacks.ts
@@ -8,7 +8,7 @@ import type {
FinetunedVertexModel,
GeminiModel,
} from '@codebuff/common/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
/**
* Prompts a Gemini model with fallback logic.
@@ -36,7 +36,7 @@ import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-m
* @throws If all API calls (primary and fallbacks) fail.
*/
export async function promptFlashWithFallbacks(
- messages: CodebuffMessage[],
+ messages: Message[],
options: {
clientSessionId: string
fingerprintId: string
diff --git a/backend/src/llm-apis/message-cost-tracker.ts b/backend/src/llm-apis/message-cost-tracker.ts
index e7253bde0..fbf468ef1 100644
--- a/backend/src/llm-apis/message-cost-tracker.ts
+++ b/backend/src/llm-apis/message-cost-tracker.ts
@@ -22,7 +22,7 @@ import { SWITCHBOARD } from '../websockets/server'
import { sendAction } from '../websockets/websocket-action'
import type { ClientState } from '../websockets/switchboard'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
export const PROFIT_MARGIN = 0.055
@@ -332,7 +332,7 @@ type InsertMessageParams = {
fingerprintId: string
userInputId: string
model: string
- request: CodebuffMessage[]
+ request: Message[]
response: string
inputTokens: number
outputTokens: number
@@ -533,7 +533,7 @@ export const saveMessage = async (value: {
fingerprintId: string
userInputId: string
model: string
- request: CodebuffMessage[]
+ request: Message[]
response: string
inputTokens: number
outputTokens: number
diff --git a/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts b/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
index 9f40519eb..51979d467 100644
--- a/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
+++ b/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
@@ -23,13 +23,12 @@ import { saveMessage } from '../message-cost-tracker'
import { openRouterLanguageModel } from '../openrouter'
import { vertexFinetuned } from './vertex-finetuned'
-import type { System } from '../claude'
import type {
GeminiModel,
Model,
OpenAIModel,
} from '@codebuff/common/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type {
OpenRouterProviderOptions,
OpenRouterUsageAccounting,
@@ -64,7 +63,7 @@ const modelToAiSDKModel = (model: Model): LanguageModel => {
// eg: [{model: "gemini-2.0-flash-001"}, {model: "vertex/gemini-2.0-flash-001"}, {model: "claude-3-5-haiku", retries: 3}]
export const promptAiSdkStream = async function* (
options: {
- messages: CodebuffMessage[]
+ messages: Message[]
clientSessionId: string
fingerprintId: string
model: Model
@@ -76,7 +75,7 @@ export const promptAiSdkStream = async function* (
maxRetries?: number
onCostCalculated?: (credits: number) => Promise
includeCacheControl?: boolean
- } & Omit[0], 'model'>,
+ } & Omit[0], 'model' | 'messages'>,
) {
if (
!checkLiveUserInput(
@@ -225,7 +224,7 @@ export const promptAiSdkStream = async function* (
// TODO: figure out a nice way to unify stream & non-stream versions maybe?
export const promptAiSdk = async function (
options: {
- messages: CodebuffMessage[]
+ messages: Message[]
clientSessionId: string
fingerprintId: string
userInputId: string
@@ -235,7 +234,7 @@ export const promptAiSdk = async function (
agentId?: string
onCostCalculated?: (credits: number) => Promise
includeCacheControl?: boolean
- } & Omit[0], 'model'>,
+ } & Omit[0], 'model' | 'messages'>,
): Promise {
if (
!checkLiveUserInput(
@@ -295,7 +294,7 @@ export const promptAiSdk = async function (
// Copied over exactly from promptAiSdk but with a schema
export const promptAiSdkStructured = async function (options: {
- messages: CodebuffMessage[]
+ messages: Message[]
schema: z.ZodType
clientSessionId: string
fingerprintId: string
@@ -369,33 +368,3 @@ export const promptAiSdkStructured = async function (options: {
return content
}
-
-// TODO: temporary - ideally we move to using CodebuffMessage[] directly
-// and don't need this transform!!
-export function transformMessages(
- messages: CodebuffMessage[],
- system?: System,
-): CodebuffMessage[] {
- const codebuffMessages: CodebuffMessage[] = []
-
- if (system) {
- codebuffMessages.push({
- role: 'system',
- content:
- typeof system === 'string'
- ? system
- : system.map((block) => block.text).join('\n\n'),
- })
- }
-
- return buildArray([
- system && {
- role: 'system',
- content:
- typeof system === 'string'
- ? system
- : system.map((block) => block.text).join('\n\n'),
- },
- messages,
- ])
-}
diff --git a/backend/src/main-prompt.ts b/backend/src/main-prompt.ts
index 56ab04128..c1b386e30 100644
--- a/backend/src/main-prompt.ts
+++ b/backend/src/main-prompt.ts
@@ -1,4 +1,3 @@
-import { renderToolResults } from '@codebuff/common/tools/utils'
import { AgentTemplateTypes } from '@codebuff/common/types/session-state'
import { generateCompactId } from '@codebuff/common/util/string'
import { uniq } from 'lodash'
@@ -13,10 +12,10 @@ import { requestToolCall } from './websockets/websocket-action'
import type { AgentTemplate } from './templates/types'
import type { ClientAction } from '@codebuff/common/actions'
import type { CostMode } from '@codebuff/common/constants'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type {
SessionState,
- ToolResult,
AgentTemplateType,
} from '@codebuff/common/types/session-state'
import type { WebSocket } from 'ws'
@@ -35,7 +34,7 @@ export const mainPrompt = async (
): Promise<{
sessionState: SessionState
toolCalls: []
- toolResults: ToolResult[]
+ toolResults: ToolResultPart[]
}> => {
const { userId, clientSessionId, onResponseChunk, localAgentTemplates } =
options
@@ -71,7 +70,7 @@ export const mainPrompt = async (
`Detected terminal command in ${duration}ms, executing directly: ${prompt}`,
)
- const response = await requestToolCall(
+ const { output } = await requestToolCall(
ws,
promptId,
'run_terminal_command',
@@ -83,23 +82,15 @@ export const mainPrompt = async (
},
)
- const toolResult: ToolResult['output'] = {
- type: 'text',
- value:
- (response.success ? response.output?.value : response.error) || '',
- }
- if (response.success) {
- mainAgentState.messageHistory.push({
- role: 'user',
- content: renderToolResults([
- {
- toolName: 'run_terminal_command',
- toolCallId: generateCompactId(),
- output: toolResult,
- },
- ]),
- })
- }
+ mainAgentState.messageHistory.push({
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: generateCompactId(),
+ output: output,
+ },
+ })
const newSessionState = {
...sessionState,
diff --git a/backend/src/process-file-block.ts b/backend/src/process-file-block.ts
index 736fb4d92..a77551c79 100644
--- a/backend/src/process-file-block.ts
+++ b/backend/src/process-file-block.ts
@@ -12,14 +12,14 @@ import { promptAiSdk } from './llm-apis/vercel-ai-sdk/ai-sdk'
import { logger } from './util/logger'
import { countTokens } from './util/token-counter'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
export async function processFileBlock(
path: string,
instructions: string | undefined,
initialContentPromise: Promise,
newContent: string,
- messages: CodebuffMessage[],
+ messages: Message[],
fullResponse: string,
lastUserPrompt: string | undefined,
clientSessionId: string,
diff --git a/backend/src/prompt-agent-stream.ts b/backend/src/prompt-agent-stream.ts
index f12243bd5..cb923d9de 100644
--- a/backend/src/prompt-agent-stream.ts
+++ b/backend/src/prompt-agent-stream.ts
@@ -4,7 +4,7 @@ import { promptAiSdkStream } from './llm-apis/vercel-ai-sdk/ai-sdk'
import { globalStopSequence } from './tools/constants'
import type { AgentTemplate } from './templates/types'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { OpenRouterProviderOptions } from '@codebuff/internal/openrouter-ai-sdk'
export const getAgentStreamFromTemplate = (params: {
@@ -35,7 +35,7 @@ export const getAgentStreamFromTemplate = (params: {
const { model } = template
- const getStream = (messages: CodebuffMessage[]) => {
+ const getStream = (messages: Message[]) => {
const options: Parameters[0] = {
messages,
model,
diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index 56d779bf4..a09d9ba46 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -6,7 +6,6 @@ import {
} from '@codebuff/common/constants'
import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
import { TOOLS_WHICH_WONT_FORCE_NEXT_STEP } from '@codebuff/common/tools/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
import { buildArray } from '@codebuff/common/util/array'
import { generateCompactId } from '@codebuff/common/util/string'
@@ -29,19 +28,20 @@ import {
getMessagesSubset,
isSystemInstruction,
} from './util/messages'
-import { isToolResult, renderReadFilesResult } from './util/parse-tool-call-xml'
+import { renderReadFilesResult } from './util/parse-tool-call-xml'
import { simplifyReadFileResults } from './util/simplify-tool-results'
import { countTokensJson } from './util/token-counter'
import { getRequestContext } from './websockets/request-context'
import type { AgentResponseTrace } from '@codebuff/bigquery'
+import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type {
AgentTemplateType,
AgentState,
- ToolResult,
} from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
@@ -149,8 +149,12 @@ export const runAgentStep = async (
if (clearReadFileToolResults) {
// Update message history.
for (const message of messageHistory) {
- if (isToolResult(message)) {
- message.content = simplifyReadFileResults(message.content)
+ if (
+ message.role === 'tool' &&
+ message.content.toolName === 'read_files'
+ ) {
+ const m = message as CodebuffToolMessage<'read_files'>
+ m.content.output = simplifyReadFileResults(m.content.output)
}
}
@@ -162,7 +166,7 @@ export const runAgentStep = async (
})
}
- const toolResults: ToolResult[] = []
+ const toolResults: ToolResultPart[] = []
const updatedFiles = addedFiles.filter((f) =>
updatedFilePaths.includes(f.path),
@@ -170,14 +174,21 @@ export const runAgentStep = async (
if (updatedFiles.length > 0) {
toolResults.push({
+ type: 'tool-result',
toolName: 'file_updates',
toolCallId: generateCompactId(),
- output: {
- type: 'text',
- value:
- `These are the updates made to the files since the last response (either by you or by the user). These are the most recent versions of these files. You MUST be considerate of the user's changes:\n` +
- renderReadFilesResult(updatedFiles, fileContext.tokenCallers ?? {}),
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ message: `These are the updates made to the files since the last response (either by you or by the user). These are the most recent versions of these files. You MUST be considerate of the user's changes.`,
+ files: renderReadFilesResult(
+ updatedFiles,
+ fileContext.tokenCallers ?? {},
+ ),
+ },
+ },
+ ],
})
}
@@ -217,13 +228,15 @@ export const runAgentStep = async (
localAgentTemplates,
)
- const agentMessagesUntruncated = buildArray(
+ const agentMessagesUntruncated = buildArray(
...expireMessages(messageHistory, 'agentStep'),
- toolResults.length > 0 && {
- role: 'user' as const,
- content: asSystemMessage(renderToolResults(toolResults)),
- },
+ toolResults.map((result) => {
+ return {
+ role: 'tool',
+ content: result,
+ }
+ }),
stepPrompt && {
role: 'user' as const,
@@ -443,6 +456,7 @@ export const loopAgentSteps = async (
userId,
clientSessionId,
onResponseChunk,
+ clearUserPromptMessagesAfterResponse = true,
}: {
userInputId: string
agentType: AgentTemplateType
@@ -451,8 +465,9 @@ export const loopAgentSteps = async (
params: Record | undefined
fingerprintId: string
fileContext: ProjectFileContext
- toolResults: ToolResult[]
+ toolResults: ToolResultPart[]
localAgentTemplates: Record
+ clearUserPromptMessagesAfterResponse?: boolean
userId: string | undefined
clientSessionId: string
@@ -465,7 +480,9 @@ export const loopAgentSteps = async (
}
// Initialize message history with user prompt and instructions on first iteration
- const hasPrompt = Boolean(prompt || params)
+ const hasPrompt = Boolean(
+ prompt || (params && Object.keys(params).length > 0),
+ )
// Get the instructions prompt if we have a prompt/params
const instructionsPrompt = hasPrompt
@@ -479,23 +496,27 @@ export const loopAgentSteps = async (
: undefined
// Build the initial message history with user prompt and instructions
- const initialMessages = buildArray(
- ...agentState.messageHistory.map((m) => ({
+ const initialMessages = buildArray(
+ agentState.messageHistory.map((m) => ({
...m,
keepDuringTruncation: false,
})),
- toolResults.length > 0 && {
- role: 'user' as const,
- content: asSystemMessage(renderToolResults(toolResults)),
- },
+ toolResults.map((result) => {
+ return {
+ role: 'tool' as const,
+ content: result,
+ }
+ }),
hasPrompt && [
{
// Actual user prompt!
role: 'user' as const,
content: asUserMessage(
- `${prompt ?? ''}${params ? `\n\n${JSON.stringify(params, null, 2)}` : ''}`,
+ buildArray([prompt, params && JSON.stringify(params, null, 2)]).join(
+ '\n\n',
+ ),
),
keepDuringTruncation: true,
},
@@ -563,6 +584,12 @@ export const loopAgentSteps = async (
// End turn if programmatic step ended turn, or if the previous runAgentStep ended turn
if (shouldEndTurn) {
+ if (clearUserPromptMessagesAfterResponse) {
+ currentAgentState.messageHistory = expireMessages(
+ currentAgentState.messageHistory,
+ 'userPrompt',
+ )
+ }
return {
agentState: currentAgentState,
}
@@ -590,6 +617,12 @@ export const loopAgentSteps = async (
currentParams = undefined
}
+ if (clearUserPromptMessagesAfterResponse) {
+ currentAgentState.messageHistory = expireMessages(
+ currentAgentState.messageHistory,
+ 'userPrompt',
+ )
+ }
return { agentState: currentAgentState }
} catch (error) {
// Log the error but still return the state with partial costs
diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index eecbb32fc..57a8c46f4 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -13,11 +13,14 @@ import type {
StepGenerator,
PublicAgentState,
} from '@codebuff/common/types/agent-template'
+import type {
+ ToolResultOutput,
+ ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type {
AgentState,
AgentTemplateType,
- ToolResult,
} from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
@@ -121,7 +124,7 @@ export async function runProgrammaticStep(
// Initialize state for tool execution
const toolCalls: CodebuffToolCall[] = []
- const toolResults: ToolResult[] = []
+ const toolResults: ToolResultPart[] = []
const state = {
ws,
fingerprintId,
@@ -146,7 +149,7 @@ export async function runProgrammaticStep(
messages: agentState.messageHistory.map((msg) => ({ ...msg })),
}
- let toolResult: string | undefined
+ let toolResult: ToolResultOutput[] = []
let endTurn = false
try {
@@ -181,7 +184,9 @@ export async function runProgrammaticStep(
const toolCall = {
...toolCallWithoutId,
toolCallId: crypto.randomUUID(),
- } as CodebuffToolCall
+ } as CodebuffToolCall & {
+ includeToolCall?: boolean
+ }
if (!template.toolNames.includes(toolCall.toolName)) {
throw new Error(
@@ -191,7 +196,7 @@ export async function runProgrammaticStep(
// Add assistant message with the tool call before executing it
// Exception: don't add tool call message for add_message since it adds its own message
- if (toolCall.toolName !== 'add_message') {
+ if (toolCall?.includeToolCall !== false) {
const toolCallString = getToolCallString(
toolCall.toolName,
toolCall.input,
@@ -234,7 +239,7 @@ export async function runProgrammaticStep(
state.agentState.messageHistory = state.messages
// Get the latest tool result
- toolResult = toolResults[toolResults.length - 1]?.output.value
+ toolResult = toolResults[toolResults.length - 1]?.output
if (toolCall.toolName === 'end_turn') {
endTurn = true
diff --git a/backend/src/templates/strings.ts b/backend/src/templates/strings.ts
index 8035b8299..76f6f8a2e 100644
--- a/backend/src/templates/strings.ts
+++ b/backend/src/templates/strings.ts
@@ -1,6 +1,5 @@
import { CodebuffConfigSchema } from '@codebuff/common/json-config/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
-import { escapeString, generateCompactId } from '@codebuff/common/util/string'
+import { escapeString } from '@codebuff/common/util/string'
import { schemaToJsonStr } from '@codebuff/common/util/zod-schema'
import { z } from 'zod/v4'
@@ -74,27 +73,25 @@ export async function formatPrompt(
[PLACEHOLDER.USER_CWD]: fileContext.cwd,
[PLACEHOLDER.USER_INPUT_PROMPT]: escapeString(lastUserInput ?? ''),
[PLACEHOLDER.INITIAL_AGENT_PROMPT]: escapeString(intitialAgentPrompt ?? ''),
- [PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS]: renderToolResults(
- Object.entries({
- ...Object.fromEntries(
- Object.entries(fileContext.knowledgeFiles)
- .filter(([path]) =>
- [
- 'knowledge.md',
- 'CLAUDE.md',
- 'codebuff.json',
- 'codebuff.jsonc',
- ].includes(path),
- )
- .map(([path, content]) => [path, content.trim()]),
- ),
- ...fileContext.userKnowledgeFiles,
- }).map(([path, content]) => ({
- toolName: 'read_files',
- toolCallId: generateCompactId(),
- output: { type: 'text', value: JSON.stringify({ path, content }) },
- })),
- ),
+ [PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS]: Object.entries({
+ ...Object.fromEntries(
+ Object.entries(fileContext.knowledgeFiles)
+ .filter(([path]) =>
+ [
+ 'knowledge.md',
+ 'CLAUDE.md',
+ 'codebuff.json',
+ 'codebuff.jsonc',
+ ].includes(path),
+ )
+ .map(([path, content]) => [path, content.trim()]),
+ ),
+ ...fileContext.userKnowledgeFiles,
+ })
+ .map(([path, content]) => {
+ return `\`\`\`${path}\n${content.trim()}\n\`\`\``
+ })
+ .join('\n\n'),
}
for (const varName of placeholderValues) {
diff --git a/backend/src/tools/definitions/list.ts b/backend/src/tools/definitions/list.ts
index 33cc4f53c..b4f21d93b 100644
--- a/backend/src/tools/definitions/list.ts
+++ b/backend/src/tools/definitions/list.ts
@@ -1,4 +1,4 @@
-import { llmToolCallSchema } from '@codebuff/common/tools/list'
+import { $toolParams } from '@codebuff/common/tools/list'
import { addMessageTool } from './tool/add-message'
import { addSubgoalTool } from './tool/add-subgoal'
@@ -53,7 +53,7 @@ const toolDescriptions = {
}
export type ToolDefinition = {
- [K in ToolName]: (typeof toolDescriptions)[K] & (typeof llmToolCallSchema)[K]
+ [K in ToolName]: (typeof toolDescriptions)[K] & (typeof $toolParams)[K]
}[T]
export const codebuffToolDefs = Object.fromEntries(
@@ -61,7 +61,7 @@ export const codebuffToolDefs = Object.fromEntries(
toolName,
{
...toolDescriptions[toolName as ToolName],
- ...llmToolCallSchema[toolName as ToolName],
+ ...$toolParams[toolName as ToolName],
} satisfies ToolDefinition,
]),
) as { [K in ToolName]: ToolDefinition } satisfies ToolSet
diff --git a/backend/src/tools/handlers/handler-function-type.ts b/backend/src/tools/handlers/handler-function-type.ts
index 7ce02ce53..ebcc6d824 100644
--- a/backend/src/tools/handlers/handler-function-type.ts
+++ b/backend/src/tools/handlers/handler-function-type.ts
@@ -3,8 +3,10 @@ import type {
ClientToolCall,
ClientToolName,
CodebuffToolCall,
+ CodebuffToolOutput,
+ CodebuffToolResult,
} from '@codebuff/common/tools/list'
-import { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type { ProjectFileContext } from '@codebuff/common/util/file'
type PresentOrAbsent =
@@ -31,9 +33,9 @@ export type CodebuffToolHandlerFunction = (
'requestClientToolCall',
(
toolCall: ClientToolCall,
- ) => Promise
+ ) => Promise>
>,
) => {
- result: Promise
+ result: Promise['output']>
state?: Record
}
diff --git a/backend/src/tools/handlers/tool/add-message.ts b/backend/src/tools/handlers/tool/add-message.ts
index fa1072b79..4e99920e6 100644
--- a/backend/src/tools/handlers/tool/add-message.ts
+++ b/backend/src/tools/handlers/tool/add-message.ts
@@ -1,6 +1,9 @@
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
export const handleAddMessage = (({
previousToolCallFinished,
@@ -9,16 +12,18 @@ export const handleAddMessage = (({
}: {
previousToolCallFinished: Promise
toolCall: CodebuffToolCall<'add_message'>
- getLatestState: () => { messages: CodebuffMessage[] }
+ getLatestState: () => { messages: Message[] }
}): {
- result: Promise
+ result: Promise>
state: {}
} => {
return {
- result: previousToolCallFinished.then(() => {
+ result: (async () => {
+ await previousToolCallFinished
+
getLatestState().messages.push(toolCall.input)
- return undefined
- }),
+ return []
+ })(),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'add_message'>
diff --git a/backend/src/tools/handlers/tool/add-subgoal.ts b/backend/src/tools/handlers/tool/add-subgoal.ts
index 3c9c60b33..fec5d87cc 100644
--- a/backend/src/tools/handlers/tool/add-subgoal.ts
+++ b/backend/src/tools/handlers/tool/add-subgoal.ts
@@ -1,7 +1,10 @@
import { buildArray } from '@codebuff/common/util/array'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
import type { Subgoal } from '@codebuff/common/types/session-state'
export const handleAddSubgoal = ((params: {
@@ -9,7 +12,7 @@ export const handleAddSubgoal = ((params: {
toolCall: CodebuffToolCall<'add_subgoal'>
state: { agentContext?: Record }
}): {
- result: Promise
+ result: Promise>
state: { agentContext: Record }
} => {
const { previousToolCallFinished, toolCall, state } = params
@@ -23,7 +26,17 @@ export const handleAddSubgoal = ((params: {
}
return {
- result: previousToolCallFinished.then(() => 'Successfully added subgoal'),
+ result: (async () => {
+ await previousToolCallFinished
+ return [
+ {
+ type: 'json',
+ value: {
+ message: 'Successfully added subgoal',
+ },
+ },
+ ]
+ })(),
state: { agentContext },
}
}) satisfies CodebuffToolHandlerFunction<'add_subgoal'>
diff --git a/backend/src/tools/handlers/tool/browser-logs.ts b/backend/src/tools/handlers/tool/browser-logs.ts
index 7d1234dc3..dc2f460d4 100644
--- a/backend/src/tools/handlers/tool/browser-logs.ts
+++ b/backend/src/tools/handlers/tool/browser-logs.ts
@@ -2,6 +2,7 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
export const handleBrowserLogs = ((params: {
@@ -9,14 +10,15 @@ export const handleBrowserLogs = ((params: {
toolCall: CodebuffToolCall<'browser_logs'>
requestClientToolCall: (
toolCall: ClientToolCall<'browser_logs'>,
- ) => Promise
-}): { result: Promise; state: {} } => {
+ ) => Promise>
+}): { result: Promise>; state: {} } => {
const { previousToolCallFinished, toolCall, requestClientToolCall } = params
return {
- result: previousToolCallFinished.then(() =>
- requestClientToolCall(toolCall),
- ),
+ result: (async () => {
+ await previousToolCallFinished
+ return await requestClientToolCall(toolCall)
+ })(),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'browser_logs'>
diff --git a/backend/src/tools/handlers/tool/code-search.ts b/backend/src/tools/handlers/tool/code-search.ts
index 3221985b7..fb05802cf 100644
--- a/backend/src/tools/handlers/tool/code-search.ts
+++ b/backend/src/tools/handlers/tool/code-search.ts
@@ -2,6 +2,7 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
export const handleCodeSearch = ((params: {
@@ -9,14 +10,15 @@ export const handleCodeSearch = ((params: {
toolCall: CodebuffToolCall<'code_search'>
requestClientToolCall: (
toolCall: ClientToolCall<'code_search'>,
- ) => Promise
-}): { result: Promise; state: {} } => {
+ ) => Promise>
+}): { result: Promise>; state: {} } => {
const { previousToolCallFinished, toolCall, requestClientToolCall } = params
return {
- result: previousToolCallFinished.then(() =>
- requestClientToolCall(toolCall),
- ),
+ result: (async () => {
+ await previousToolCallFinished
+ return await requestClientToolCall(toolCall)
+ })(),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'code_search'>
diff --git a/backend/src/tools/handlers/tool/create-plan.ts b/backend/src/tools/handlers/tool/create-plan.ts
index d242b5a7c..9363c6f80 100644
--- a/backend/src/tools/handlers/tool/create-plan.ts
+++ b/backend/src/tools/handlers/tool/create-plan.ts
@@ -12,6 +12,7 @@ import type {
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
export const handleCreatePlan = ((params: {
@@ -19,7 +20,7 @@ export const handleCreatePlan = ((params: {
toolCall: CodebuffToolCall<'create_plan'>
requestClientToolCall: (
toolCall: ClientToolCall<'create_plan'>,
- ) => Promise
+ ) => Promise>
writeToClient: (chunk: string) => void
getLatestState: () => FileProcessingState
@@ -32,7 +33,7 @@ export const handleCreatePlan = ((params: {
repoId?: string
} & OptionalFileProcessingState
}): {
- result: Promise
+ result: Promise>
state: FileProcessingState
} => {
const {
@@ -86,14 +87,15 @@ export const handleCreatePlan = ((params: {
fileProcessingState.allPromises.push(Promise.resolve(change))
return {
- result: previousToolCallFinished.then(async () => {
+ result: (async () => {
+ await previousToolCallFinished
return await postStreamProcessing<'create_plan'>(
change,
getLatestState(),
writeToClient,
requestClientToolCall,
)
- }),
+ })(),
state: fileProcessingState,
}
}) satisfies CodebuffToolHandlerFunction<'create_plan'>
diff --git a/backend/src/tools/handlers/tool/end-turn.ts b/backend/src/tools/handlers/tool/end-turn.ts
index 80be2fe4e..a04dbceab 100644
--- a/backend/src/tools/handlers/tool/end-turn.ts
+++ b/backend/src/tools/handlers/tool/end-turn.ts
@@ -1,9 +1,20 @@
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
-export const handleEndTurn = ((params: {
+export const handleEndTurn = (({
+ previousToolCallFinished,
+}: {
previousToolCallFinished: Promise
toolCall: CodebuffToolCall<'end_turn'>
-}): { result: Promise; state: {} } => {
- return { result: params.previousToolCallFinished.then(() => ''), state: {} }
+}): { result: Promise>; state: {} } => {
+ return {
+ result: (async () => {
+ await previousToolCallFinished
+ return []
+ })(),
+ state: {},
+ }
}) satisfies CodebuffToolHandlerFunction<'end_turn'>
diff --git a/backend/src/tools/handlers/tool/find-files.ts b/backend/src/tools/handlers/tool/find-files.ts
index a4fb94b8f..03438a17e 100644
--- a/backend/src/tools/handlers/tool/find-files.ts
+++ b/backend/src/tools/handlers/tool/find-files.ts
@@ -14,8 +14,11 @@ import { requestFiles } from '../../../websockets/websocket-action'
import type { TextBlock } from '../../../llm-apis/claude'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type { GetExpandedFileContextForTrainingBlobTrace } from '@codebuff/bigquery'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
@@ -37,9 +40,9 @@ export const handleFindFiles = ((params: {
fingerprintId?: string
userId?: string
repoId?: string
- messages?: CodebuffMessage[]
+ messages?: Message[]
}
-}): { result: Promise; state: {} } => {
+}): { result: Promise>; state: {} } => {
const {
previousToolCallFinished,
toolCall,
@@ -73,7 +76,9 @@ export const handleFindFiles = ((params: {
userId,
})
- const triggerFindFiles = async () => {
+ const triggerFindFiles: () => Promise<
+ CodebuffToolOutput<'find_files'>
+ > = async () => {
const requestedFiles = await requestRelevantFiles(
{ messages, system },
fileContext,
@@ -119,16 +124,41 @@ export const handleFindFiles = ((params: {
}
if (addedFiles.length > 0) {
- return renderReadFilesResult(addedFiles, fileContext.tokenCallers ?? {})
+ return [
+ {
+ type: 'json',
+ value: renderReadFilesResult(
+ addedFiles,
+ fileContext.tokenCallers ?? {},
+ ),
+ },
+ ]
}
- return `No new relevant files found for prompt: ${prompt}`
+ return [
+ {
+ type: 'json',
+ value: {
+ message: `No new relevant files found for prompt: ${prompt}`,
+ },
+ },
+ ]
} else {
- return `No relevant files found for prompt: ${prompt}`
+ return [
+ {
+ type: 'json',
+ value: {
+ message: `No relevant files found for prompt: ${prompt}`,
+ },
+ },
+ ]
}
}
return {
- result: previousToolCallFinished.then(triggerFindFiles),
+ result: (async () => {
+ await previousToolCallFinished
+ return await triggerFindFiles()
+ })(),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'find_files'>
@@ -139,7 +169,7 @@ async function uploadExpandedFileContextForTraining(
messages,
system,
}: {
- messages: CodebuffMessage[]
+ messages: Message[]
system: string | Array
},
fileContext: ProjectFileContext,
diff --git a/backend/src/tools/handlers/tool/read-docs.ts b/backend/src/tools/handlers/tool/read-docs.ts
index e27f34ab8..cf2b9eae2 100644
--- a/backend/src/tools/handlers/tool/read-docs.ts
+++ b/backend/src/tools/handlers/tool/read-docs.ts
@@ -2,7 +2,10 @@ import { fetchContext7LibraryDocumentation } from '../../../llm-apis/context7-ap
import { logger } from '../../../util/logger'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
export const handleReadDocs = (({
previousToolCallFinished,
@@ -25,7 +28,7 @@ export const handleReadDocs = (({
repoId?: string
}
}): {
- result: Promise
+ result: Promise>
state: {}
} => {
const { libraryTitle, topic, max_tokens } = toolCall.input
@@ -121,7 +124,14 @@ export const handleReadDocs = (({
return {
result: (async () => {
await previousToolCallFinished
- return await documentationPromise
+ return [
+ {
+ type: 'json',
+ value: {
+ documentation: await documentationPromise,
+ },
+ },
+ ]
})(),
state: {},
}
diff --git a/backend/src/tools/handlers/tool/read-files.ts b/backend/src/tools/handlers/tool/read-files.ts
index 89bd8666a..503983a7d 100644
--- a/backend/src/tools/handlers/tool/read-files.ts
+++ b/backend/src/tools/handlers/tool/read-files.ts
@@ -2,14 +2,18 @@ import { getFileReadingUpdates } from '../../../get-file-reading-updates'
import { renderReadFilesResult } from '../../../util/parse-tool-call-xml'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
+type ToolName = 'read_files'
export const handleReadFiles = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'read_files'>
+ toolCall: CodebuffToolCall
agentStepId: string
clientSessionId: string
@@ -21,10 +25,10 @@ export const handleReadFiles = ((params: {
userId?: string
fingerprintId?: string
repoId?: string
- messages?: CodebuffMessage[]
+ messages?: Message[]
}
}): {
- result: Promise
+ result: Promise>
state: {}
} => {
const {
@@ -77,8 +81,13 @@ export const handleReadFiles = ((params: {
return {
result: (async () => {
await previousToolCallFinished
- return await readFilesResultsPromise
+ return [
+ {
+ type: 'json',
+ value: await readFilesResultsPromise,
+ },
+ ]
})(),
state: {},
}
-}) satisfies CodebuffToolHandlerFunction<'read_files'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/run-file-change-hooks.ts b/backend/src/tools/handlers/tool/run-file-change-hooks.ts
index 28e00bb3b..e0a20e895 100644
--- a/backend/src/tools/handlers/tool/run-file-change-hooks.ts
+++ b/backend/src/tools/handlers/tool/run-file-change-hooks.ts
@@ -2,21 +2,24 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
+type ToolName = 'run_file_change_hooks'
export const handleRunFileChangeHooks = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'run_file_change_hooks'>
+ toolCall: CodebuffToolCall
requestClientToolCall: (
- toolCall: ClientToolCall<'run_file_change_hooks'>,
- ) => Promise
-}): { result: Promise; state: {} } => {
+ toolCall: ClientToolCall,
+ ) => Promise>
+}): { result: Promise>; state: {} } => {
const { previousToolCallFinished, toolCall, requestClientToolCall } = params
return {
- result: previousToolCallFinished.then(() =>
- requestClientToolCall(toolCall),
- ),
+ result: (async () => {
+ await previousToolCallFinished
+ return await requestClientToolCall(toolCall)
+ })(),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'run_file_change_hooks'>
diff --git a/backend/src/tools/handlers/tool/run-terminal-command.ts b/backend/src/tools/handlers/tool/run-terminal-command.ts
index 4d094539f..6cc2e1ada 100644
--- a/backend/src/tools/handlers/tool/run-terminal-command.ts
+++ b/backend/src/tools/handlers/tool/run-terminal-command.ts
@@ -2,18 +2,22 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
-export const handleRunTerminalCommand = ((params: {
+type ToolName = 'run_terminal_command'
+export const handleRunTerminalCommand = (({
+ previousToolCallFinished,
+ toolCall,
+ requestClientToolCall,
+}: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'run_terminal_command'>
+ toolCall: CodebuffToolCall
requestClientToolCall: (
- toolCall: ClientToolCall<'run_terminal_command'>,
- ) => Promise
-}): { result: Promise; state: {} } => {
- const { previousToolCallFinished, toolCall, requestClientToolCall } = params
-
- const clientToolCall: ClientToolCall<'run_terminal_command'> = {
+ toolCall: ClientToolCall,
+ ) => Promise>
+}): { result: Promise>; state: {} } => {
+ const clientToolCall: ClientToolCall = {
toolName: 'run_terminal_command',
toolCallId: toolCall.toolCallId,
input: {
@@ -25,9 +29,10 @@ export const handleRunTerminalCommand = ((params: {
},
}
return {
- result: previousToolCallFinished.then(() =>
- requestClientToolCall(clientToolCall),
- ),
+ result: (async () => {
+ await previousToolCallFinished
+ return await requestClientToolCall(clientToolCall)
+ })(),
state: {},
}
-}) satisfies CodebuffToolHandlerFunction<'run_terminal_command'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/set-messages.ts b/backend/src/tools/handlers/tool/set-messages.ts
index 8b84fea07..9054535ca 100644
--- a/backend/src/tools/handlers/tool/set-messages.ts
+++ b/backend/src/tools/handlers/tool/set-messages.ts
@@ -1,6 +1,9 @@
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
export const handleSetMessages = (({
previousToolCallFinished,
@@ -9,16 +12,17 @@ export const handleSetMessages = (({
}: {
previousToolCallFinished: Promise
toolCall: CodebuffToolCall<'set_messages'>
- getLatestState: () => { messages: CodebuffMessage[] }
+ getLatestState: () => { messages: Message[] }
}): {
- result: Promise
+ result: Promise>
state: {}
} => {
return {
- result: previousToolCallFinished.then(() => {
+ result: (async () => {
+ await previousToolCallFinished
getLatestState().messages = toolCall.input.messages
- return undefined
- }),
+ return []
+ })(),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'set_messages'>
diff --git a/backend/src/tools/handlers/tool/set-output.ts b/backend/src/tools/handlers/tool/set-output.ts
index 45366e594..1b3331b78 100644
--- a/backend/src/tools/handlers/tool/set-output.ts
+++ b/backend/src/tools/handlers/tool/set-output.ts
@@ -2,21 +2,25 @@ import { getAgentTemplate } from '../../../templates/agent-registry'
import { logger } from '../../../util/logger'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
import type { AgentState } from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
+type ToolName = 'set_output'
export const handleSetOutput = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'set_output'>
+ toolCall: CodebuffToolCall
fileContext: ProjectFileContext
state: {
agentState?: AgentState
localAgentTemplates?: Record
}
}): {
- result: Promise
+ result: Promise>
state: { agentState: AgentState }
} => {
const { previousToolCallFinished, toolCall, state } = params
@@ -69,7 +73,17 @@ export const handleSetOutput = ((params: {
}
return {
- result: previousToolCallFinished.then(triggerSetOutput),
+ result: (async () => {
+ await previousToolCallFinished
+ return [
+ {
+ type: 'json',
+ value: {
+ message: await triggerSetOutput(),
+ },
+ },
+ ]
+ })(),
state: { agentState: agentState },
}
-}) satisfies CodebuffToolHandlerFunction<'set_output'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/spawn-agent-inline.ts b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
index 77acd59d8..35f1758cc 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-inline.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
@@ -10,33 +10,37 @@ import {
} from './spawn-agent-utils'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type { AgentState } from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
+type ToolName = 'spawn_agent_inline'
export const handleSpawnAgentInline = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'spawn_agent_inline'>
+ toolCall: CodebuffToolCall
fileContext: ProjectFileContext
clientSessionId: string
userInputId: string
writeToClient: (chunk: string | PrintModeEvent) => void
- getLatestState: () => { messages: CodebuffMessage[] }
+ getLatestState: () => { messages: Message[] }
state: {
ws?: WebSocket
fingerprintId?: string
userId?: string
agentTemplate?: AgentTemplate
localAgentTemplates?: Record
- messages?: CodebuffMessage[]
+ messages?: Message[]
agentState?: AgentState
}
-}): { result: Promise; state: {} } => {
+}): { result: Promise>; state: {} } => {
const {
previousToolCallFinished,
toolCall,
@@ -110,6 +114,7 @@ export const handleSpawnAgentInline = ((params: {
// Inherits parent's onResponseChunk
// writeToClient(chunk)
},
+ clearUserPromptMessagesAfterResponse: false,
})
// Update parent's message history with child's final state
@@ -127,7 +132,11 @@ export const handleSpawnAgentInline = ((params: {
}
return {
- result: previousToolCallFinished.then(triggerSpawnAgentInline),
+ result: (async () => {
+ await previousToolCallFinished
+ await triggerSpawnAgentInline()
+ return []
+ })(),
state: {},
}
-}) satisfies CodebuffToolHandlerFunction<'spawn_agent_inline'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
index ecb3ba510..793e84207 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -6,7 +6,10 @@ import { getAgentTemplate } from '../../../templates/agent-registry'
import { logger } from '../../../util/logger'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ AssistantMessage,
+ Message,
+} from '@codebuff/common/types/messages/codebuff-message'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type {
AgentState,
@@ -27,7 +30,7 @@ export interface BaseSpawnState {
userId?: string
agentTemplate?: AgentTemplate
localAgentTemplates?: Record
- messages?: CodebuffMessage[]
+ messages?: Message[]
agentState?: AgentState
}
@@ -35,7 +38,7 @@ export interface SpawnContext {
fileContext: ProjectFileContext
clientSessionId: string
userInputId: string
- getLatestState: () => { messages: CodebuffMessage[] }
+ getLatestState: () => { messages: Message[] }
}
/**
@@ -221,9 +224,7 @@ export function validateAgentInput(
/**
* Creates conversation history message for spawned agents
*/
-export function createConversationHistoryMessage(
- messages: CodebuffMessage[],
-): CodebuffMessage {
+export function createConversationHistoryMessage(messages: Message[]): Message {
// Filter out system messages from conversation history to avoid including parent's system prompt
const messagesWithoutSystem = messages.filter(
(message) => message.role !== 'system',
@@ -244,7 +245,7 @@ export function createConversationHistoryMessage(
export function createAgentState(
agentType: string,
parentAgentState: AgentState,
- messageHistory: CodebuffMessage[],
+ messageHistory: Message[],
): AgentState {
const agentId = generateCompactId()
@@ -302,6 +303,7 @@ export async function executeAgent({
clientSessionId,
onResponseChunk,
isOnlyChild = false,
+ clearUserPromptMessagesAfterResponse = true,
}: {
ws: WebSocket
userInputId: string
@@ -316,6 +318,7 @@ export async function executeAgent({
clientSessionId: string
onResponseChunk: (chunk: string | PrintModeEvent) => void
isOnlyChild?: boolean
+ clearUserPromptMessagesAfterResponse?: boolean
}) {
const width = 60
const fullAgentName = `${agentTemplate.displayName} (${agentTemplate.id})`
@@ -346,6 +349,7 @@ export async function executeAgent({
userId,
clientSessionId,
onResponseChunk,
+ clearUserPromptMessagesAfterResponse,
})
// Send agent end notification if this is the only child
@@ -371,42 +375,60 @@ export async function formatAgentResult(
result: { agentState: AgentState },
agentTemplate: AgentTemplate,
agentTypeStr: string,
-): Promise {
- const agentName = agentTemplate.displayName
- let report = ''
+): Promise<
+ {
+ agentType: string
+ agentName: string
+ } & (
+ | { errorMessage: string }
+ | { structuredOutput: Record | null }
+ | {
+ lastMessage: any
+ }
+ | {
+ allMessages: any[]
+ }
+ )
+> {
+ const agentInfo = {
+ agentType: agentTemplate.id,
+ agentName: agentTemplate.displayName,
+ }
if (agentTemplate.outputMode === 'structured_output') {
- report = JSON.stringify(result.agentState.output, null, 2)
- } else if (agentTemplate.outputMode === 'last_message') {
+ return {
+ ...agentInfo,
+ structuredOutput: result.agentState.output ?? null,
+ }
+ }
+ if (agentTemplate.outputMode === 'last_message') {
const { agentState } = result
const assistantMessages = agentState.messageHistory.filter(
- (message) => message.role === 'assistant',
+ (message): message is AssistantMessage => message.role === 'assistant',
)
const lastAssistantMessage = assistantMessages[assistantMessages.length - 1]
if (!lastAssistantMessage) {
- report = 'No response from agent'
- } else if (typeof lastAssistantMessage.content === 'string') {
- report = lastAssistantMessage.content
- } else {
- report = JSON.stringify(lastAssistantMessage.content, null, 2)
+ return {
+ ...agentInfo,
+ errorMessage: 'No response from agent',
+ }
+ }
+ return {
+ ...agentInfo,
+ lastMessage: lastAssistantMessage.content,
}
- } else if (agentTemplate.outputMode === 'all_messages') {
+ }
+ if (agentTemplate.outputMode === 'all_messages') {
const { agentState } = result
// Remove the first message, which includes the previous conversation history.
const agentMessages = agentState.messageHistory.slice(1)
- report = `Agent messages:\n\n${JSON.stringify(agentMessages, null, 2)}`
- } else {
- throw new Error(
- `Unknown output mode: ${'outputMode' in agentTemplate ? agentTemplate.outputMode : 'undefined'}`,
- )
+ return {
+ ...agentInfo,
+ allMessages: agentMessages,
+ }
}
-
- return `**${agentName}(${agentTypeStr}):**\n${report}`
-}
-
-/**
- * Formats error result for failed agent spawn
- */
-export function formatAgentError(agentTypeStr: string, error: any): string {
- return `**Agent (${agentTypeStr}):**\nError spawning agent: ${error}`
+ agentTemplate.outputMode satisfies never
+ throw new Error(
+ `Unknown output mode: ${'outputMode' in agentTemplate ? agentTemplate.outputMode : 'undefined'}`,
+ )
}
diff --git a/backend/src/tools/handlers/tool/spawn-agents-async.ts b/backend/src/tools/handlers/tool/spawn-agents-async.ts
index 95ed17c5e..1e67a2935 100644
--- a/backend/src/tools/handlers/tool/spawn-agents-async.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents-async.ts
@@ -15,24 +15,28 @@ import { logger } from '../../../util/logger'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type { SendSubagentChunk } from './spawn-agents'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type { AgentState } from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { WebSocket } from 'ws'
+type ToolName = 'spawn_agents_async'
export const handleSpawnAgentsAsync = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'spawn_agents_async'>
+ toolCall: CodebuffToolCall
fileContext: ProjectFileContext
clientSessionId: string
userInputId: string
writeToClient: (chunk: string | PrintModeEvent) => void
- getLatestState: () => { messages: CodebuffMessage[] }
+ getLatestState: () => { messages: Message[] }
state: {
ws?: WebSocket
fingerprintId?: string
@@ -40,10 +44,10 @@ export const handleSpawnAgentsAsync = ((params: {
agentTemplate?: AgentTemplate
localAgentTemplates?: Record
sendSubagentChunk?: SendSubagentChunk
- messages?: CodebuffMessage[]
+ messages?: Message[]
agentState?: AgentState
}
-}): { result: Promise; state: {} } => {
+}): { result: Promise>; state: {} } => {
if (!ASYNC_AGENTS_ENABLED) {
return handleSpawnAgents({
...params,
@@ -81,13 +85,10 @@ export const handleSpawnAgentsAsync = ((params: {
)
}
- const triggerSpawnAgentsAsync = async () => {
- const results: Array<{
- agentType: string
- success: boolean
- agentId?: string
- error?: string
- }> = []
+ const triggerSpawnAgentsAsync: () => Promise<
+ CodebuffToolOutput[0]['value']
+ > = async () => {
+ const results: CodebuffToolOutput[0]['value'] = []
const conversationHistoryMessage = createConversationHistoryMessage(
getLatestState().messages,
@@ -104,7 +105,7 @@ export const handleSpawnAgentsAsync = ((params: {
validateAgentInput(agentTemplate, agentType, prompt, params)
- const subAgentMessages: CodebuffMessage[] = []
+ const subAgentMessages: Message[] = []
if (agentTemplate.includeMessageHistory) {
subAgentMessages.push(conversationHistoryMessage)
}
@@ -257,7 +258,7 @@ export const handleSpawnAgentsAsync = ((params: {
results.push({
agentType: agentTypeStr,
success: false,
- error: errorMessage,
+ errorMessage,
})
logger.error(
{ agentType: agentTypeStr, error },
@@ -267,23 +268,19 @@ export const handleSpawnAgentsAsync = ((params: {
}
}
- const successful = results.filter((r) => r.success)
-
- let result = `Agent spawn results (${successful.length}/${results.length} successful):\n`
-
- results.forEach(({ agentType, success, agentId, error }) => {
- if (success) {
- result += `✓ ${agentType}: spawned (${agentId})\n`
- } else {
- result += `✗ ${agentType}: failed - ${error}\n`
- }
- })
-
- return result.trim()
+ return results
}
return {
- result: previousToolCallFinished.then(triggerSpawnAgentsAsync),
+ result: (async () => {
+ await previousToolCallFinished
+ return [
+ {
+ type: 'json',
+ value: await triggerSpawnAgentsAsync(),
+ },
+ ]
+ })(),
state: {},
}
-}) satisfies CodebuffToolHandlerFunction<'spawn_agents_async'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/spawn-agents.ts b/backend/src/tools/handlers/tool/spawn-agents.ts
index 34b616b2f..7dcb8752c 100644
--- a/backend/src/tools/handlers/tool/spawn-agents.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents.ts
@@ -7,14 +7,16 @@ import {
logAgentSpawn,
executeAgent,
formatAgentResult,
- formatAgentError,
} from './spawn-agent-utils'
import { logger } from '../../../util/logger'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
import type { AgentState } from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
@@ -28,16 +30,17 @@ export type SendSubagentChunk = (data: {
prompt?: string
}) => void
+type ToolName = 'spawn_agents'
export const handleSpawnAgents = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'spawn_agents'>
+ toolCall: CodebuffToolCall
fileContext: ProjectFileContext
clientSessionId: string
userInputId: string
writeToClient: (chunk: string | PrintModeEvent) => void
- getLatestState: () => { messages: CodebuffMessage[] }
+ getLatestState: () => { messages: Message[] }
state: {
ws?: WebSocket
fingerprintId?: string
@@ -45,10 +48,10 @@ export const handleSpawnAgents = ((params: {
agentTemplate?: AgentTemplate
localAgentTemplates?: Record
sendSubagentChunk?: SendSubagentChunk
- messages?: CodebuffMessage[]
+ messages?: Message[]
agentState?: AgentState
}
-}): { result: Promise; state: {} } => {
+}): { result: Promise>; state: {} } => {
const {
previousToolCallFinished,
toolCall,
@@ -95,7 +98,7 @@ export const handleSpawnAgents = ((params: {
validateAgentInput(agentTemplate, agentType, prompt, params)
- const subAgentMessages: CodebuffMessage[] = []
+ const subAgentMessages: Message[] = []
if (agentTemplate.includeMessageHistory) {
subAgentMessages.push(conversationHistoryMessage)
}
@@ -172,7 +175,10 @@ export const handleSpawnAgents = ((params: {
agentTypeStr,
)
} else {
- return formatAgentError(agentTypeStr, result.reason)
+ return {
+ agentType: agentTypeStr,
+ errorMessage: `Error spawning agent: ${result.reason}`,
+ }
}
}),
)
@@ -219,11 +225,17 @@ export const handleSpawnAgents = ((params: {
})
return reports
- .map((report: string) => `${report}`)
- .join('\n')
}
return {
- result: previousToolCallFinished.then(triggerSpawnAgents),
+ result: (async () => {
+ await previousToolCallFinished
+ return [
+ {
+ type: 'json',
+ value: await triggerSpawnAgents(),
+ },
+ ]
+ })(),
state: {},
}
-}) satisfies CodebuffToolHandlerFunction<'spawn_agents'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/str-replace.ts b/backend/src/tools/handlers/tool/str-replace.ts
index 141f9f70a..f89f6bf52 100644
--- a/backend/src/tools/handlers/tool/str-replace.ts
+++ b/backend/src/tools/handlers/tool/str-replace.ts
@@ -11,6 +11,7 @@ import type {
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
import type { WebSocket } from 'ws'
@@ -19,7 +20,7 @@ export const handleStrReplace = ((params: {
toolCall: CodebuffToolCall<'str_replace'>
requestClientToolCall: (
toolCall: ClientToolCall<'str_replace'>,
- ) => Promise
+ ) => Promise>
writeToClient: (chunk: string) => void
getLatestState: () => FileProcessingState
@@ -27,7 +28,7 @@ export const handleStrReplace = ((params: {
ws?: WebSocket
} & OptionalFileProcessingState
}): {
- result: Promise
+ result: Promise>
state: FileProcessingState
} => {
const {
diff --git a/backend/src/tools/handlers/tool/think-deeply.ts b/backend/src/tools/handlers/tool/think-deeply.ts
index bb8a3aece..8bdd8d9aa 100644
--- a/backend/src/tools/handlers/tool/think-deeply.ts
+++ b/backend/src/tools/handlers/tool/think-deeply.ts
@@ -1,12 +1,15 @@
import { logger } from '../../../util/logger'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
export const handleThinkDeeply = ((params: {
previousToolCallFinished: Promise
toolCall: CodebuffToolCall<'think_deeply'>
-}): { result: Promise; state: {} } => {
+}): { result: Promise>; state: {} } => {
const { previousToolCallFinished, toolCall } = params
const { thought } = toolCall.input
@@ -18,7 +21,7 @@ export const handleThinkDeeply = ((params: {
)
return {
- result: previousToolCallFinished.then(() => 'Deep thinking completed.'),
+ result: previousToolCallFinished.then(() => []),
state: {},
}
}) satisfies CodebuffToolHandlerFunction<'think_deeply'>
diff --git a/backend/src/tools/handlers/tool/update-subgoal.ts b/backend/src/tools/handlers/tool/update-subgoal.ts
index 3cbf612f7..bf39ff881 100644
--- a/backend/src/tools/handlers/tool/update-subgoal.ts
+++ b/backend/src/tools/handlers/tool/update-subgoal.ts
@@ -1,13 +1,17 @@
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
import type { Subgoal } from '@codebuff/common/types/session-state'
+type ToolName = 'update_subgoal'
export const handleUpdateSubgoal = ((params: {
previousToolCallFinished: Promise
- toolCall: CodebuffToolCall<'update_subgoal'>
+ toolCall: CodebuffToolCall
state: { agentContext?: Record }
}): {
- result: Promise
+ result: Promise>
state: { agentContext: Record }
} => {
const { previousToolCallFinished, toolCall, state } = params
@@ -36,7 +40,17 @@ export const handleUpdateSubgoal = ((params: {
}
messages.push('Successfully updated subgoal.')
return {
- result: previousToolCallFinished.then(() => messages.join('\n\n')),
+ result: (async () => {
+ await previousToolCallFinished
+ return [
+ {
+ type: 'json',
+ value: {
+ message: messages.join('\n\n'),
+ },
+ },
+ ]
+ })(),
state: { agentContext },
}
-}) satisfies CodebuffToolHandlerFunction<'update_subgoal'>
+}) satisfies CodebuffToolHandlerFunction
diff --git a/backend/src/tools/handlers/tool/web-search.ts b/backend/src/tools/handlers/tool/web-search.ts
index bd31c63a7..c2abb57a5 100644
--- a/backend/src/tools/handlers/tool/web-search.ts
+++ b/backend/src/tools/handlers/tool/web-search.ts
@@ -6,7 +6,10 @@ import { PROFIT_MARGIN } from '../../../llm-apis/message-cost-tracker'
import { logger } from '../../../util/logger'
import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+ CodebuffToolCall,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
export const handleWebSearch = ((params: {
previousToolCallFinished: Promise
@@ -21,7 +24,7 @@ export const handleWebSearch = ((params: {
fingerprintId?: string
repoId?: string
}
-}): { result: Promise; state: {} } => {
+}): { result: Promise>; state: {} } => {
const {
previousToolCallFinished,
toolCall,
@@ -51,93 +54,113 @@ export const handleWebSearch = ((params: {
repoId,
}
- const webSearchPromise: Promise = (async () => {
- try {
- const searchResult = await searchWeb(query, { depth })
- const searchDuration = Date.now() - searchStartTime
- const resultLength = searchResult?.length || 0
- const hasResults = Boolean(searchResult && searchResult.trim())
+ const webSearchPromise: Promise> =
+ (async () => {
+ try {
+ const searchResult = await searchWeb(query, { depth })
+ const searchDuration = Date.now() - searchStartTime
+ const resultLength = searchResult?.length || 0
+ const hasResults = Boolean(searchResult && searchResult.trim())
- // Charge credits for web search usage
- let creditResult = null
- if (userId) {
- const creditsToCharge = Math.round(
- (depth === 'deep' ? 5 : 1) * (1 + PROFIT_MARGIN),
- )
- const requestContext = getRequestContext()
- const repoUrl = requestContext?.processedRepoUrl
+ // Charge credits for web search usage
+ let creditResult = null
+ if (userId) {
+ const creditsToCharge = Math.round(
+ (depth === 'deep' ? 5 : 1) * (1 + PROFIT_MARGIN),
+ )
+ const requestContext = getRequestContext()
+ const repoUrl = requestContext?.processedRepoUrl
+
+ creditResult = await consumeCreditsWithFallback({
+ userId,
+ creditsToCharge,
+ repoUrl,
+ context: 'web search',
+ })
+
+ if (!creditResult.success) {
+ logger.error(
+ {
+ ...searchContext,
+ error: creditResult.error,
+ creditsToCharge,
+ searchDuration,
+ },
+ 'Failed to charge credits for web search',
+ )
+ }
+ }
- creditResult = await consumeCreditsWithFallback({
- userId,
- creditsToCharge,
- repoUrl,
- context: 'web search',
- })
+ logger.info(
+ {
+ ...searchContext,
+ searchDuration,
+ resultLength,
+ hasResults,
+ creditsCharged: creditResult?.success
+ ? depth === 'deep'
+ ? 5
+ : 1
+ : 0,
+ success: true,
+ },
+ 'Search completed',
+ )
- if (!creditResult.success) {
- logger.error(
+ if (searchResult) {
+ return [
+ {
+ type: 'json',
+ value: { result: searchResult },
+ },
+ ]
+ } else {
+ logger.warn(
{
...searchContext,
- error: creditResult.error,
- creditsToCharge,
searchDuration,
},
- 'Failed to charge credits for web search',
+ 'No results returned from search API',
)
+ return [
+ {
+ type: 'json',
+ value: {
+ errorMessage: `No search results found for "${query}". Try refining your search query or using different keywords.`,
+ },
+ },
+ ]
}
- }
-
- logger.info(
- {
- ...searchContext,
- searchDuration,
- resultLength,
- hasResults,
- creditsCharged: creditResult?.success
- ? depth === 'deep'
- ? 5
- : 1
- : 0,
- success: true,
- },
- 'Search completed',
- )
-
- if (searchResult) {
- return searchResult
- } else {
- logger.warn(
+ } catch (error) {
+ const searchDuration = Date.now() - searchStartTime
+ logger.error(
{
...searchContext,
+ error:
+ error instanceof Error
+ ? {
+ name: error.name,
+ message: error.message,
+ stack: error.stack,
+ }
+ : error,
searchDuration,
+ success: false,
},
- 'No results returned from search API',
+ 'Search failed with error',
)
- return `No search results found for "${query}". Try refining your search query or using different keywords.`
+ return [
+ {
+ type: 'json',
+ value: {
+ errorMessage: `Error performing web search for "${query}": ${
+ error instanceof Error ? error.message : 'Unknown error'
+ }`,
+ },
+ },
+ ]
}
- } catch (error) {
- const searchDuration = Date.now() - searchStartTime
- logger.error(
- {
- ...searchContext,
- error:
- error instanceof Error
- ? {
- name: error.name,
- message: error.message,
- stack: error.stack,
- }
- : error,
- searchDuration,
- success: false,
- },
- 'Search failed with error',
- )
- return `Error performing web search for "${query}": ${
- error instanceof Error ? error.message : 'Unknown error'
- }`
- }
- })()
+ })()
return {
result: (async () => {
diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts
index 6cf5a0dfa..4b912a061 100644
--- a/backend/src/tools/handlers/tool/write-file.ts
+++ b/backend/src/tools/handlers/tool/write-file.ts
@@ -8,8 +8,9 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
import type {
ClientToolCall,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
import type { WebSocket } from 'ws'
type FileProcessingTools = 'write_file' | 'str_replace' | 'create_plan'
@@ -61,7 +62,19 @@ export function getFileProcessingValues(
return fileProcessingValues
}
-export const handleWriteFile = ((params: {
+export const handleWriteFile = (({
+ previousToolCallFinished,
+ toolCall,
+
+ clientSessionId,
+ userInputId,
+
+ requestClientToolCall,
+ writeToClient,
+
+ getLatestState,
+ state,
+}: {
previousToolCallFinished: Promise
toolCall: CodebuffToolCall<'write_file'>
@@ -70,7 +83,7 @@ export const handleWriteFile = ((params: {
requestClientToolCall: (
toolCall: ClientToolCall<'write_file'>,
- ) => Promise
+ ) => Promise>
writeToClient: (chunk: string) => void
getLatestState: () => FileProcessingState
@@ -80,25 +93,12 @@ export const handleWriteFile = ((params: {
userId?: string
fullResponse?: string
prompt?: string
- messages?: CodebuffMessage[]
+ messages?: Message[]
} & OptionalFileProcessingState
}): {
- result: Promise
+ result: Promise>
state: FileProcessingState
} => {
- const {
- previousToolCallFinished,
- toolCall,
-
- clientSessionId,
- userInputId,
-
- requestClientToolCall,
- writeToClient,
-
- getLatestState,
- state,
- } = params
const { path, instructions, content } = toolCall.input
const { ws, fingerprintId, userId, fullResponse, prompt } = state
if (!ws) {
@@ -168,14 +168,15 @@ export const handleWriteFile = ((params: {
fileProcessingPromises.push(newPromise)
return {
- result: previousToolCallFinished.then(async () => {
+ result: (async () => {
+ await previousToolCallFinished
return await postStreamProcessing<'write_file'>(
await newPromise,
getLatestState(),
writeToClient,
requestClientToolCall,
)
- }),
+ })(),
state: fileProcessingState,
}
}) satisfies CodebuffToolHandlerFunction<'write_file'>
@@ -184,8 +185,10 @@ export async function postStreamProcessing(
toolCall: FileProcessing,
fileProcessingState: FileProcessingState,
writeToClient: (chunk: string) => void,
- requestClientToolCall: (toolCall: ClientToolCall) => Promise,
-) {
+ requestClientToolCall: (
+ toolCall: ClientToolCall,
+ ) => Promise>,
+): Promise> {
const allFileProcessingResults = await Promise.all(
fileProcessingState.allPromises,
)
@@ -224,25 +227,41 @@ export async function postStreamProcessing(
const errors = fileProcessingState.fileChangeErrors.filter(
(result) => result.toolCallId === toolCall.toolCallId,
)
- toolCallResults.push(
- ...errors.map(({ path, error }) => `Error processing ${path}: ${error}`),
- )
+ if (errors.length > 0) {
+ if (errors.length > 1) {
+ throw new Error(
+ `Internal error: Unexpected number of matching errors for ${{ toolCall }}, found ${errors.length}, expected 1`,
+ )
+ }
+
+ const { path, error } = errors[0]
+ return [
+ {
+ type: 'json',
+ value: {
+ file: path,
+ errorMessage: error,
+ },
+ },
+ ]
+ }
const changes = fileProcessingState.fileChanges.filter(
(result) => result.toolCallId === toolCall.toolCallId,
)
- for (const { path, content, patch } of changes) {
- const clientToolCall: ClientToolCall = {
- toolCallId: toolCall.toolCallId,
- toolName: toolCall.tool,
- input: patch
- ? { type: 'patch' as const, path, content: patch }
- : { type: 'file' as const, path, content },
- } as ClientToolCall
- const clientResult = await requestClientToolCall(clientToolCall)
-
- toolCallResults.push(clientResult)
+ if (changes.length !== 1) {
+ throw new Error(
+ `Internal error: Unexpected number of matching changes for ${{ toolCall }}, found ${changes.length}, expected 1`,
+ )
}
- return toolCallResults.join('\n\n')
+ const { patch, content, path } = changes[0]
+ const clientToolCall: ClientToolCall = {
+ toolCallId: toolCall.toolCallId,
+ toolName: toolCall.tool,
+ input: patch
+ ? { type: 'patch' as const, path, content: patch }
+ : { type: 'file' as const, path, content },
+ } as ClientToolCall
+ return await requestClientToolCall(clientToolCall)
}
diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts
index 1a0ddbd56..583298386 100644
--- a/backend/src/tools/stream-parser.ts
+++ b/backend/src/tools/stream-parser.ts
@@ -11,13 +11,10 @@ import type { CustomToolCall } from './tool-executor'
import type { AgentTemplate } from '../templates/types'
import type { ToolName } from '@codebuff/common/tools/constants'
import type { CodebuffToolCall } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type {
- AgentState,
- Subgoal,
- ToolResult,
-} from '@codebuff/common/types/session-state'
+import type { AgentState, Subgoal } from '@codebuff/common/types/session-state'
import type { ProjectFileContext } from '@codebuff/common/util/file'
import type { ToolCallPart } from 'ai'
import type { WebSocket } from 'ws'
@@ -40,7 +37,7 @@ export async function processStreamWithTools(options: {
agentTemplate: AgentTemplate
localAgentTemplates: Record
fileContext: ProjectFileContext
- messages: CodebuffMessage[]
+ messages: Message[]
agentState: AgentState
agentContext: Record
onResponseChunk: (chunk: string | PrintModeEvent) => void
@@ -66,7 +63,7 @@ export async function processStreamWithTools(options: {
const messages = [...options.messages]
- const toolResults: ToolResult[] = []
+ const toolResults: ToolResultPart[] = []
const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
const { promise: streamDonePromise, resolve: resolveStreamDonePromise } =
Promise.withResolvers()
@@ -158,9 +155,10 @@ export async function processStreamWithTools(options: {
]),
(toolName, error) => {
toolResults.push({
+ type: 'tool-result',
toolName,
toolCallId: generateCompactId(),
- output: { type: 'text', value: error },
+ output: [{ type: 'json', value: { errorMessage: error } }],
})
},
onResponseChunk,
@@ -176,7 +174,7 @@ export async function processStreamWithTools(options: {
fullResponseChunks.push(chunk)
}
- state.messages = buildArray([
+ state.messages = buildArray([
...expireMessages(state.messages, 'agentStep'),
fullResponseChunks.length > 0 && {
role: 'assistant' as const,
diff --git a/backend/src/tools/tool-executor.ts b/backend/src/tools/tool-executor.ts
index ce4f57f57..c35d8e00b 100644
--- a/backend/src/tools/tool-executor.ts
+++ b/backend/src/tools/tool-executor.ts
@@ -1,12 +1,10 @@
import { endsAgentStepParam } from '@codebuff/common/tools/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
import { generateCompactId } from '@codebuff/common/util/string'
import z from 'zod/v4'
import { convertJsonSchemaToZod } from 'zod-from-json-schema'
import { checkLiveUserInput } from '../live-user-inputs'
import { logger } from '../util/logger'
-import { asSystemMessage } from '../util/messages'
import { requestToolCall } from '../websockets/websocket-action'
import { codebuffToolDefs } from './definitions/list'
import { codebuffToolHandlers } from './handlers/list'
@@ -18,9 +16,14 @@ import type {
ClientToolCall,
ClientToolName,
CodebuffToolCall,
+ CodebuffToolOutput,
} from '@codebuff/common/tools/list'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ ToolResultOutput,
+ ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type { ToolResult } from '@codebuff/common/types/session-state'
import type {
customToolDefinitionsSchema,
ProjectFileContext,
@@ -110,7 +113,7 @@ export interface ExecuteToolCallParams {
toolName: T
input: Record
toolCalls: (CodebuffToolCall | CustomToolCall)[]
- toolResults: ToolResult[]
+ toolResults: ToolResultPart[]
previousToolCallFinished: Promise
ws: WebSocket
agentTemplate: AgentTemplate
@@ -153,12 +156,17 @@ export function executeToolCall({
)
if ('error' in toolCall) {
toolResults.push({
+ type: 'tool-result',
toolName,
toolCallId: toolCall.toolCallId,
- output: {
- type: 'text',
- value: toolCall.error,
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ errorMessage: toolCall.error,
+ },
+ },
+ ],
})
logger.debug(
{ toolCall, error: toolCall.error },
@@ -179,19 +187,24 @@ export function executeToolCall({
// Filter out restricted tools in ask mode unless exporting summary
if (!agentTemplate.toolNames.includes(toolCall.toolName)) {
toolResults.push({
+ type: 'tool-result',
toolName,
toolCallId: toolCall.toolCallId,
- output: {
- type: 'text',
- value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ errorMessage: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
+ },
+ },
+ ],
})
return previousToolCallFinished
}
- const { result: toolResultPromise, state: stateUpdate } = (
- codebuffToolHandlers[toolName] as CodebuffToolHandlerFunction
- )({
+ // Cast to any to avoid type errors
+ const handler = codebuffToolHandlers[toolName] as any
+ const { result: toolResultPromise, state: stateUpdate } = handler({
previousToolCallFinished,
fileContext,
agentStepId,
@@ -212,17 +225,12 @@ export function executeToolCall({
clientToolCall.toolName,
clientToolCall.input,
)
- return (
- clientToolResult.error ??
- (clientToolResult.output?.type === 'text'
- ? clientToolResult.output.value
- : 'undefined')
- )
+ return clientToolResult.output as CodebuffToolOutput
},
toolCall,
getLatestState: () => state,
state,
- })
+ }) as ReturnType>
for (const [key, value] of Object.entries(stateUpdate ?? {})) {
if (key === 'agentState' && typeof value === 'object' && value !== null) {
@@ -234,13 +242,11 @@ export function executeToolCall({
}
return toolResultPromise.then((result) => {
- const toolResult = {
+ const toolResult: ToolResultPart = {
+ type: 'tool-result',
toolName,
toolCallId: toolCall.toolCallId,
- output: {
- type: 'text' as const,
- value: result as string,
- },
+ output: result,
}
logger.debug(
{ input, toolResult },
@@ -259,8 +265,8 @@ export function executeToolCall({
toolResults.push(toolResult)
state.messages.push({
- role: 'user' as const,
- content: asSystemMessage(renderToolResults([toolResult])),
+ role: 'tool' as const,
+ content: toolResult,
})
})
}
@@ -369,12 +375,17 @@ export function executeCustomToolCall({
)
if ('error' in toolCall) {
toolResults.push({
+ type: 'tool-result',
toolName,
toolCallId: toolCall.toolCallId,
- output: {
- type: 'text',
- value: toolCall.error,
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ errorMessage: toolCall.error,
+ },
+ },
+ ],
})
logger.debug(
{ toolCall, error: toolCall.error },
@@ -395,12 +406,17 @@ export function executeCustomToolCall({
// Filter out restricted tools in ask mode unless exporting summary
if (!(agentTemplate.toolNames as string[]).includes(toolCall.toolName)) {
toolResults.push({
+ type: 'tool-result',
toolName,
toolCallId: toolCall.toolCallId,
- output: {
- type: 'text',
- value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
- },
+ output: [
+ {
+ type: 'json',
+ value: {
+ errorMessage: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
+ },
+ },
+ ],
})
return previousToolCallFinished
}
@@ -408,7 +424,7 @@ export function executeCustomToolCall({
return previousToolCallFinished
.then(async () => {
if (!checkLiveUserInput(userId, userInputId, clientSessionId)) {
- return ''
+ return null
}
const clientToolResult = await requestToolCall(
@@ -417,22 +433,18 @@ export function executeCustomToolCall({
toolCall.toolName,
toolCall.input,
)
- return (
- clientToolResult.error ??
- (clientToolResult.output?.type === 'text'
- ? clientToolResult.output.value
- : 'undefined')
- )
+ return clientToolResult.output satisfies ToolResultOutput[]
})
.then((result) => {
+ if (result === null) {
+ return
+ }
const toolResult = {
+ type: 'tool-result',
toolName,
toolCallId: toolCall.toolCallId,
- output: {
- type: 'text' as const,
- value: result as string,
- },
- }
+ output: result,
+ } satisfies ToolResultPart
logger.debug(
{ input, toolResult },
`${toolName} custom tool call & result (${toolResult.toolCallId})`,
@@ -450,8 +462,9 @@ export function executeCustomToolCall({
toolResults.push(toolResult)
state.messages.push({
- role: 'user' as const,
- content: asSystemMessage(renderToolResults([toolResult])),
- })
+ role: 'tool' as const,
+ content: toolResult,
+ } satisfies Message)
+ return
})
}
diff --git a/backend/src/util/__tests__/messages.test.ts b/backend/src/util/__tests__/messages.test.ts
index d417ed290..fb8c636e7 100644
--- a/backend/src/util/__tests__/messages.test.ts
+++ b/backend/src/util/__tests__/messages.test.ts
@@ -11,14 +11,14 @@ import {
import { trimMessagesToFitTokenLimit, messagesWithSystem } from '../messages'
import * as tokenCounter from '../token-counter'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
describe('messagesWithSystem', () => {
it('prepends system message to array', () => {
const messages = [
{ role: 'user', content: 'hello' },
{ role: 'assistant', content: 'hi' },
- ] as CodebuffMessage[]
+ ] as Message[]
const system = 'Be helpful'
const result = messagesWithSystem(messages, system)
@@ -58,53 +58,82 @@ describe('trimMessagesToFitTokenLimit', () => {
'This is a long message that would normally be shortened but since it has no tool calls it should be preserved completely intact no matter what',
},
{
- role: 'user',
- content: [
- // Terminal output 0 (oldest) - should be simplified
- {
- type: 'text',
- text: `
-run_terminal_command
-Terminal output 0${'.'.repeat(2000)}
-`,
- },
- // Terminal output 1 - should be preserved (shorter than '[Output omitted]')
- {
- type: 'text',
- text: `
-run_terminal_command
-Short output 1
-`,
- },
- ],
+ // Terminal output 0 (oldest) - should be simplified
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-0',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 0${'.'.repeat(2000)}`,
+ },
+ ],
+ },
},
- // Terminal output 2 - should be simplified
{
- role: 'user',
- content: `
-run_terminal_command
-Terminal output 2${'.'.repeat(2000)}
-`,
+ // Terminal output 1 - should be preserved (shorter than '[Output omitted]')
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-1',
+ output: [
+ {
+ type: 'json',
+ value: `Short output 1`,
+ },
+ ],
+ },
},
- // Terminal output 3 - should be preserved (5th most recent)
{
- role: 'user',
- content: `
-run_terminal_command
-Terminal output 3
-`,
+ // Terminal output 2 - should be simplified
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-2',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 2${'.'.repeat(2000)}`,
+ },
+ ],
+ },
+ },
+ {
+ // Terminal output 3 - should be preserved (5th most recent)
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-3',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 3`,
+ },
+ ],
+ },
+ },
+ {
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-4',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 4`,
+ },
+ ],
+ },
},
{
role: 'user',
content: [
- // Terminal output 4 - should be preserved (4th most recent)
- {
- type: 'text',
- text: `
-run_terminal_command
-Terminal output 4
-`,
- },
// Regular message - should never be shortened
{
type: 'image',
@@ -114,31 +143,52 @@ describe('trimMessagesToFitTokenLimit', () => {
data: 'xyz',
},
},
- // Terminal output 5 - should be preserved (3rd most recent)
- {
- type: 'text',
- text: `
-run_terminal_command
-Terminal output 5
-`,
- },
],
},
- // Terminal output 6 - should be preserved (2nd most recent)
{
- role: 'user',
- content: `
-run_terminal_command
-Terminal output 6
-`,
+ // Terminal output 5 - should be preserved (3rd most recent)
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-5',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 5`,
+ },
+ ],
+ },
},
- // Terminal output 7 - should be preserved (most recent)
{
- role: 'user',
- content: `
-run_terminal_command
-Terminal output 7
-`,
+ // Terminal output 6 - should be preserved (2nd most recent)
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-6',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 6`,
+ },
+ ],
+ },
+ },
+ {
+ // Terminal output 7 - should be preserved (most recent)
+ role: 'tool',
+ content: {
+ type: 'tool-result',
+ toolName: 'run_terminal_command',
+ toolCallId: 'test-id-7',
+ output: [
+ {
+ type: 'json',
+ value: `Terminal output 7`,
+ },
+ ],
+ },
},
// Regular message - should never be shortened
{
@@ -148,7 +198,7 @@ describe('trimMessagesToFitTokenLimit', () => {
text: 'Another long message that should never be shortened because it has no tool calls in it at all',
},
},
- ] as CodebuffMessage[]
+ ] as Message[]
it('handles all features working together correctly', () => {
const maxTotalTokens = 3000
@@ -245,7 +295,7 @@ describe('trimMessagesToFitTokenLimit', () => {
content: 'Message 5 - keep me too!',
keepDuringTruncation: true,
},
- ] as CodebuffMessage[]
+ ] as Message[]
const result = trimMessagesToFitTokenLimit(messages, 0, 1000)
@@ -275,7 +325,7 @@ describe('trimMessagesToFitTokenLimit', () => {
content: 'Short message 2',
keepDuringTruncation: true,
},
- ] as CodebuffMessage[]
+ ] as Message[]
const result = trimMessagesToFitTokenLimit(messages, 0, 10000)
@@ -291,7 +341,7 @@ describe('trimMessagesToFitTokenLimit', () => {
{ role: 'user', content: 'B'.repeat(1000) }, // Large message to be removed
{ role: 'user', content: 'C'.repeat(1000) }, // Large message to be removed
{ role: 'user', content: 'Keep this', keepDuringTruncation: true },
- ] as CodebuffMessage[]
+ ] as Message[]
const result = trimMessagesToFitTokenLimit(messages, 0, 1000)
@@ -321,7 +371,7 @@ describe('trimMessagesToFitTokenLimit', () => {
keepDuringTruncation: true,
},
{ role: 'user', content: 'C'.repeat(100) }, // Might be kept
- ] as CodebuffMessage[]
+ ] as Message[]
const result = trimMessagesToFitTokenLimit(messages, 0, 2000)
@@ -345,7 +395,7 @@ describe('trimMessagesToFitTokenLimit', () => {
{ role: 'user', content: 'B'.repeat(800) }, // Large message to force truncation
{ role: 'user', content: 'Keep 2', keepDuringTruncation: true },
{ role: 'user', content: 'C'.repeat(800) }, // Large message to force truncation
- ] as CodebuffMessage[]
+ ] as Message[]
const result = trimMessagesToFitTokenLimit(messages, 0, 500)
diff --git a/backend/src/util/__tests__/parse-tool-call-xml.test.ts b/backend/src/util/__tests__/parse-tool-call-xml.test.ts
index 258123646..e69de29bb 100644
--- a/backend/src/util/__tests__/parse-tool-call-xml.test.ts
+++ b/backend/src/util/__tests__/parse-tool-call-xml.test.ts
@@ -1,227 +0,0 @@
-import { describe, it, expect } from 'bun:test'
-
-import { parseToolCallXml } from '../parse-tool-call-xml'
-
-describe('parseToolCallXml', () => {
- it('should parse basic key-value pairs', () => {
- const xml = `value1value2`
- expect(parseToolCallXml(xml)).toEqual({
- key1: 'value1',
- key2: 'value2',
- })
- })
-
- it('should handle empty content', () => {
- const xml = `value2`
- expect(parseToolCallXml(xml)).toEqual({
- key1: '',
- key2: 'value2',
- })
- })
-
- it('should handle whitespace around values', () => {
- const xml = ` value1 \nvalue2\n`
- expect(parseToolCallXml(xml)).toEqual({
- key1: 'value1',
- key2: 'value2',
- })
- })
-
- it('should handle internal whitespace', () => {
- const xml = `value with spaces`
- expect(parseToolCallXml(xml)).toEqual({
- key1: 'value with spaces',
- })
- })
-
- it('should return an empty object for empty or whitespace-only input', () => {
- expect(parseToolCallXml('')).toEqual({})
- expect(parseToolCallXml(' ')).toEqual({})
- expect(parseToolCallXml('\n\t')).toEqual({})
- })
-
- it('should handle special XML characters within values', () => {
- const xml = `<value1>"value2's"&value3`
- expect(parseToolCallXml(xml)).toEqual({
- key1: '<value1>',
- key2: '"value2\'s"',
- key3: '&value3',
- })
- })
-
- it('should parse numbers as strings', () => {
- const xml = `12345.67-8`
- expect(parseToolCallXml(xml)).toEqual({
- key1: '123',
- key2: '45.67',
- key3: '-8',
- })
- })
-
- it('should parse booleans as strings', () => {
- const xml = `truefalse`
- expect(parseToolCallXml(xml)).toEqual({
- key1: 'true',
- key2: 'false',
- })
- })
-
- it('should parse nested range tags as raw string content', () => {
- const xml = `100120200220`
- expect(parseToolCallXml(xml)).toEqual({
- xRange: '100120',
- yRange: '200220',
- })
- })
-
- it('should parse mixed types as strings', () => {
- const xml = `hello99true`
- expect(parseToolCallXml(xml)).toEqual({
- text: 'hello',
- number: '99',
- bool: 'true',
- empty: '',
- })
- })
-
- it('should handle complex example with various types (all as strings)', () => {
- const xml = `
- click
- #submit-button
- 5000
- false
- 50.575.5
- 100150
- Submit the form
- `
- expect(parseToolCallXml(xml)).toEqual({
- action: 'click',
- selector: '#submit-button',
- timeout: '5000',
- force: 'false',
- xRange: '50.575.5',
- yRange: '100150',
- comment: 'Submit the form',
- })
- })
-
- it('should convert boolean values', () => {
- const xml = `
- true
- false
- `
- const result = parseToolCallXml(xml)
- expect(result).toEqual({
- waitForNavigation: 'true',
- headless: 'false',
- })
- })
-
- it('should convert numeric values', () => {
- const xml = `
- 50
- 80.5
- 1000
- `
- const result = parseToolCallXml(xml)
- expect(result).toEqual({
- delay: '50',
- quality: '80.5',
- timeout: '1000',
- })
- })
-
- it('should handle complex browser action example', () => {
- const xml = `
- start
- http://localhost:3000/test?param=value
- networkidle0
-
- maxRetries: 3,
- retryDelay: 1000,
- retryOnErrors: ['TimeoutError', 'TargetClosedError']
-
-
- types: ['error', 'warning'],
- minLevel: 2,
- categories: ['network', 'console']
-
- 15000
- true
- `
- const result = parseToolCallXml(xml)
- expect(result).toEqual({
- action: 'start',
- url: 'http://localhost:3000/test?param=value',
- waitUntil: 'networkidle0',
- retryOptions:
- "maxRetries: 3,\n retryDelay: 1000,\n retryOnErrors: ['TimeoutError', 'TargetClosedError']",
- logFilter:
- "types: ['error', 'warning'],\n minLevel: 2,\n categories: ['network', 'console']",
- timeout: '15000',
- headless: 'true',
- })
- })
-
- it('should handle multiline content with whitespace', () => {
- const xml = `
-
- #main-content
- .button-class
- [data-test="submit"]
-
-
- This is a
- multiline text
- with preserved whitespace
-
- `
- const result = parseToolCallXml(xml)
- expect(result).toEqual({
- selector:
- '#main-content\n .button-class\n [data-test="submit"]',
- text: 'This is a\n multiline text\n with preserved whitespace',
- })
- })
-
- it('should handle diagnostic step example', () => {
- const xml = `
- diagnose
-
- - Click login button
- - Wait for form
- - Fill credentials
- - Submit form
- - Verify redirect
-
- true
- 5
- 300000
- true
- `
- const result = parseToolCallXml(xml)
- expect(result).toEqual({
- action: 'diagnose',
- steps:
- '- Click login button\n - Wait for form\n - Fill credentials\n - Submit form\n - Verify redirect',
- automated: 'true',
- maxSteps: '5',
- sessionTimeoutMs: '300000',
- debug: 'true',
- })
- })
-
- it('should handle empty tags', () => {
- const xml = `
- stop
-
-
- `
- const result = parseToolCallXml(xml)
- expect(result).toEqual({
- action: 'stop',
- screenshot: '',
- debug: '',
- })
- })
-})
diff --git a/backend/src/util/__tests__/simplify-tool-results.test.ts b/backend/src/util/__tests__/simplify-tool-results.test.ts
index a2c991407..eedb1b749 100644
--- a/backend/src/util/__tests__/simplify-tool-results.test.ts
+++ b/backend/src/util/__tests__/simplify-tool-results.test.ts
@@ -1,379 +1,370 @@
-import { describe, expect, it } from 'bun:test'
+import {
+ afterEach,
+ beforeEach,
+ describe,
+ expect,
+ it,
+ mock,
+ spyOn,
+} from 'bun:test'
import {
simplifyReadFileResults,
- simplifyReadFileToolResult,
simplifyTerminalCommandResults,
- simplifyTerminalCommandToolResult,
} from '../simplify-tool-results'
+import * as logger from '../logger'
-describe('simplifyToolResultsInMessages', () => {
- it('should simplify read_files results while preserving others', () => {
- const messageContent = `
-
-read_files
-
-test1.txt
-content1
-None
-
-
-
-test2.txt
-content2
-None
-
-
-
-run_terminal_command
-ls -la output
-`
-
- const result = simplifyReadFileResults(messageContent)
- expect(result).toContain('Read the following files: test1.txt\ntest2.txt')
- expect(result).toContain('ls -la output') // Other tool results preserved
- })
+import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
- it('should handle array message content format', () => {
- const messageContent = [
- {},
+describe('simplifyReadFileResults', () => {
+ it('should simplify read file results by omitting content', () => {
+ const input: CodebuffToolOutput<'read_files'> = [
{
- text: `
-
-read_files
-
-test.txt
-content
-None
-
-`,
+ type: 'json',
+ value: [
+ {
+ path: 'src/file1.ts',
+ content: 'const x = 1;\nconsole.log(x);',
+ referencedBy: { 'file2.ts': ['line 5'] },
+ },
+ {
+ path: 'src/file2.ts',
+ content:
+ 'import { x } from "./file1";\nfunction test() { return x; }',
+ },
+ ],
},
]
- const result = simplifyReadFileResults(messageContent)
- expect(result).toContain('Read the following files: test.txt')
- })
-
- it('should return original content if no tool results present', () => {
- const messageContent = 'No tool results here'
- const result = simplifyReadFileResults(messageContent)
- expect(result).toBe('No tool results here')
- })
+ const result = simplifyReadFileResults(input)
- it('should handle empty content', () => {
- const result = simplifyReadFileResults('')
- expect(result).toBe('')
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: [
+ {
+ path: 'src/file1.ts',
+ contentOmittedForLength: true,
+ },
+ {
+ path: 'src/file2.ts',
+ contentOmittedForLength: true,
+ },
+ ],
+ },
+ ])
})
- it('should handle array message content with no text property', () => {
- const messageContent = [{}, {}]
- const result = simplifyReadFileResults(messageContent)
- expect(result).toBe('')
- })
+ it('should handle empty file results', () => {
+ const input: CodebuffToolOutput<'read_files'> = [
+ {
+ type: 'json',
+ value: [],
+ },
+ ]
- it('should handle array message content with undefined text property', () => {
- const messageContent = [{}, { text: undefined }]
- const result = simplifyReadFileResults(messageContent)
- expect(result).toBe('')
- })
+ const result = simplifyReadFileResults(input)
- it('should handle multiple read_files results', () => {
- const messageContent = `
-
-read_files
-
-test1.txt
-content1
-None
-
-
-
-read_files
-
-test2.txt
-content2
-None
-
-`
-
- const result = simplifyReadFileResults(messageContent)
- expect(result).toContain('Read the following files: test1.txt')
- expect(result).toContain('Read the following files: test2.txt')
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: [],
+ },
+ ])
})
- it('should handle malformed read_files result', () => {
- const messageContent = `
-
-read_files
-malformed content without read_file tags
-`
-
- const result = simplifyReadFileResults(messageContent)
- expect(result).toContain('Read the following files: ')
- })
-})
+ it('should handle files with contentOmittedForLength already set', () => {
+ const input: CodebuffToolOutput<'read_files'> = [
+ {
+ type: 'json',
+ value: [
+ {
+ path: 'src/file1.ts',
+ contentOmittedForLength: true,
+ },
+ ],
+ },
+ ]
-describe('simplifyTerminalCommandResultsInMessages', () => {
- it('should simplify long terminal command output', () => {
- const messageContent = `
-
-run_terminal_command
-Very long terminal output that should be shortened
-`
+ const result = simplifyReadFileResults(input)
- const result = simplifyTerminalCommandResults(messageContent)
- expect(result).toContain('[Output omitted]')
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: [
+ {
+ path: 'src/file1.ts',
+ contentOmittedForLength: true,
+ },
+ ],
+ },
+ ])
})
- it('should preserve short terminal command output', () => {
- const shortOutput = 'Short output'
- const messageContent = `
-
-run_terminal_command
-${shortOutput}
-`
+ it('should not mutate the original input', () => {
+ const originalInput: CodebuffToolOutput<'read_files'> = [
+ {
+ type: 'json',
+ value: [
+ {
+ path: 'src/file1.ts',
+ content: 'const x = 1;',
+ },
+ ],
+ },
+ ]
+ const input = structuredClone(originalInput)
- const result = simplifyTerminalCommandResults(messageContent)
- expect(result).toContain(shortOutput)
- })
+ simplifyReadFileResults(input)
- it('should preserve other tool results', () => {
- const messageContent = `
-
-run_terminal_command
-Very long terminal output that should be shortened
-
-
-read_files
-
-test.txt
-content
-None
-
-`
-
- const result = simplifyTerminalCommandResults(messageContent)
- expect(result).toContain('[Output omitted]')
- expect(result).toContain(
- '\ntest.txt\ncontent\nNone\n',
- )
+ // Original input should be unchanged
+ expect(input).toEqual(originalInput)
})
+})
- it('should handle multiple terminal command results', () => {
- const messageContent = `
-
-run_terminal_command
-First long output that should be shortened
-
-
-run_terminal_command
-Second long output that should also be shortened
-`
-
- const result = simplifyTerminalCommandResults(messageContent)
- const matches = result.match(/\[Output omitted\]/g) || []
- expect(matches.length).toBe(2)
+describe('simplifyTerminalCommandResults', () => {
+ beforeEach(() => {
+ // Mock the logger.error function directly
+ spyOn(logger.logger, 'error').mockImplementation(() => {})
})
- it('should handle mixed short and long terminal outputs', () => {
- const messageContent = `
-
-run_terminal_command
-Very long terminal output that should be shortened
-
-
-run_terminal_command
-ok
-`
-
- const result = simplifyTerminalCommandResults(messageContent)
- expect(result).toContain('[Output omitted]')
- expect(result).toContain('ok')
+ afterEach(() => {
+ mock.restore()
})
- it('should handle malformed terminal command result', () => {
- const messageContent = `
-
-run_terminal_command
-
-`
+ it('should simplify terminal command results with stdout', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ startingCwd: '/project',
+ message: 'Tests completed',
+ stderr: '',
+ stdout: 'Test suite passed\n✓ All tests passed',
+ exitCode: 0,
+ },
+ },
+ ]
- const result = simplifyTerminalCommandResults(messageContent)
- expect(result).toContain('')
- })
-})
+ const result = simplifyTerminalCommandResults(input)
-describe('simplifyReadFileToolResult', () => {
- it('should extract file paths from read_files result', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'read_files',
- output: {
- type: 'text' as const,
- value: `
-test1.txt
-content1
-None
-
-
-
-test2.txt
-content2
-None
-`,
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ message: 'Tests completed',
+ stdoutOmittedForLength: true,
+ exitCode: 0,
+ },
},
- }
-
- const simplified = simplifyReadFileToolResult(toolResult)
- expect(simplified.toolCallId).toBe('1')
- expect(simplified.toolName).toBe('read_files')
- expect(simplified.output.value).toBe(
- 'Read the following files: test1.txt\ntest2.txt',
- )
+ ])
})
- it('should handle single file result', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'read_files',
- output: {
- type: 'text' as const,
- value:
- 'test.txtcontentNone',
+ it('should simplify terminal command results without message', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'ls -la',
+ stdout: 'file1.txt\nfile2.txt',
+ exitCode: 0,
+ },
},
- }
+ ]
- const simplified = simplifyReadFileToolResult(toolResult)
- expect(simplified.output.value).toBe('Read the following files: test.txt')
+ const result = simplifyTerminalCommandResults(input)
+
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: 'ls -la',
+ stdoutOmittedForLength: true,
+ exitCode: 0,
+ },
+ },
+ ])
})
- it('should handle empty read_files result', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'read_files',
- output: {
- type: 'text' as const,
- value: '',
+ it('should simplify terminal command results without exitCode', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'echo hello',
+ stdout: 'hello',
+ },
},
- }
+ ]
- const simplified = simplifyReadFileToolResult(toolResult)
- expect(simplified.output.value).toBe('Read the following files: ')
+ const result = simplifyTerminalCommandResults(input)
+
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: 'echo hello',
+ stdoutOmittedForLength: true,
+ },
+ },
+ ])
})
- it('should handle malformed read_file tags', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'read_files',
- output: {
- type: 'text' as const,
- value:
- 'no path attributeNone',
+ it('should handle background process results without simplification', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'npm start',
+ processId: 12345,
+ backgroundProcessStatus: 'running' as const,
+ },
},
- }
+ ]
+
+ const result = simplifyTerminalCommandResults(input)
- const simplified = simplifyReadFileToolResult(toolResult)
- expect(simplified.output.value).toBe('Read the following files: ')
+ expect(result).toEqual(input)
})
- it('should handle read_file tags with empty path', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'read_files',
- output: {
- type: 'text' as const,
- value:
- 'contentNone',
+ it('should handle error message results without simplification', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'invalid-command',
+ errorMessage: 'Command not found',
+ },
},
- }
+ ]
+
+ const result = simplifyTerminalCommandResults(input)
- const simplified = simplifyReadFileToolResult(toolResult)
- expect(simplified.output.value).toBe('Read the following files: ')
+ expect(result).toEqual(input)
})
-})
-describe('simplifyTerminalCommandResult', () => {
- it('should shorten long terminal output', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'run_terminal_command',
- output: {
- type: 'text' as const,
- value: 'Very long terminal output that should be shortened',
+ it('should handle results that already have stdoutOmittedForLength', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ message: 'Tests completed',
+ stdoutOmittedForLength: true,
+ exitCode: 0,
+ },
},
- }
+ ]
+
+ const result = simplifyTerminalCommandResults(input)
- const simplified = simplifyTerminalCommandToolResult(toolResult)
- expect(simplified.toolCallId).toBe('1')
- expect(simplified.toolName).toBe('run_terminal_command')
- expect(simplified.output.value).toBe('[Output omitted]')
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ message: 'Tests completed',
+ stdoutOmittedForLength: true,
+ exitCode: 0,
+ },
+ },
+ ])
})
- it('should preserve short terminal output', () => {
- const shortOutput = 'ok'
- const toolResult = {
- toolCallId: '1',
- toolName: 'run_terminal_command',
- output: { type: 'text' as const, value: shortOutput },
- }
+ it('should handle errors gracefully and return fallback result', () => {
+ // Create input that will cause an error during processing
+ const malformedInput = {
+ invalidStructure: true,
+ } as any
- const simplified = simplifyTerminalCommandToolResult(toolResult)
- expect(simplified.output.value).toBe(shortOutput)
- })
+ const result = simplifyTerminalCommandResults(malformedInput)
- it('should handle empty terminal output', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'run_terminal_command',
- output: {
- type: 'text' as const,
- value: '',
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: '',
+ stdoutOmittedForLength: true,
+ },
},
- }
+ ])
- const simplified = simplifyTerminalCommandToolResult(toolResult)
- expect(simplified.output.value).toBe('')
+ // Verify error was logged
+ expect(logger.logger.error).toHaveBeenCalled()
})
- it('should handle output exactly matching omitted message length', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'run_terminal_command',
- output: {
- type: 'text' as const,
- value: '[Output omitted]', // Same length as replacement
+ it('should not mutate the original input', () => {
+ const originalInput: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ stdout: 'Test output',
+ exitCode: 0,
+ },
},
- }
+ ]
+ const input = structuredClone(originalInput)
+
+ simplifyTerminalCommandResults(input)
- const simplified = simplifyTerminalCommandToolResult(toolResult)
- expect(simplified.output.value).toBe('[Output omitted]')
+ // Original input should be unchanged
+ expect(input).toEqual(originalInput)
})
- it('should handle output one character longer than omitted message', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'run_terminal_command',
- output: {
- type: 'text' as const,
- value: '[Output omitted].', // One char longer than replacement
+ it('should handle terminal command with stderr', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ stderr: 'Warning: deprecated package',
+ stdout: 'Tests passed',
+ exitCode: 0,
+ },
},
- }
+ ]
- const simplified = simplifyTerminalCommandToolResult(toolResult)
- expect(simplified.output.value).toBe('[Output omitted]')
+ const result = simplifyTerminalCommandResults(input)
+
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: 'npm test',
+ stdoutOmittedForLength: true,
+ exitCode: 0,
+ },
+ },
+ ])
})
- it('should handle output one character shorter than omitted message', () => {
- const toolResult = {
- toolCallId: '1',
- toolName: 'run_terminal_command',
- output: {
- type: 'text' as const,
- value: '[Output omit]', // One char shorter than replacement
+ it('should handle terminal command with startingCwd', () => {
+ const input: CodebuffToolOutput<'run_terminal_command'> = [
+ {
+ type: 'json',
+ value: {
+ command: 'pwd',
+ startingCwd: '/home/user/project',
+ stdout: '/home/user/project',
+ exitCode: 0,
+ },
},
- }
+ ]
+
+ const result = simplifyTerminalCommandResults(input)
- const simplified = simplifyTerminalCommandToolResult(toolResult)
- expect(simplified.output.value).toBe('[Output omit]')
+ expect(result).toEqual([
+ {
+ type: 'json',
+ value: {
+ command: 'pwd',
+ stdoutOmittedForLength: true,
+ exitCode: 0,
+ },
+ },
+ ])
})
})
diff --git a/backend/src/util/messages.ts b/backend/src/util/messages.ts
index 9511a2160..5f0687542 100644
--- a/backend/src/util/messages.ts
+++ b/backend/src/util/messages.ts
@@ -1,19 +1,28 @@
import { AssertionError } from 'assert'
import { buildArray } from '@codebuff/common/util/array'
+import { errorToObject } from '@codebuff/common/util/object'
import { closeXml } from '@codebuff/common/util/xml'
+import { cloneDeep, isEqual } from 'lodash'
import { logger } from './logger'
import { simplifyTerminalCommandResults } from './simplify-tool-results'
import { countTokensJson } from './token-counter'
import type { System } from '../llm-apis/claude'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+ CodebuffToolMessage,
+ CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type {
+ Message,
+ ToolMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
export function messagesWithSystem(
- messages: CodebuffMessage[],
+ messages: Message[],
system: System,
-): CodebuffMessage[] {
+): Message[] {
return [
{
role: 'system',
@@ -53,9 +62,7 @@ export function isSystemMessage(str: string): boolean {
return str.startsWith('') && str.endsWith(closeXml('system'))
}
-export function castAssistantMessage(
- message: CodebuffMessage,
-): CodebuffMessage | null {
+export function castAssistantMessage(message: Message): Message | null {
if (message.role !== 'assistant') {
return message
}
@@ -87,25 +94,19 @@ export function castAssistantMessage(
// Number of terminal command outputs to keep in full form before simplifying
const numTerminalCommandsToKeep = 5
-/**
- * Helper function to simplify terminal command output while preserving some recent ones
- * @param text - Terminal output text to potentially simplify
- * @param numKept - Number of terminal outputs already kept in full form
- * @returns Object containing simplified result and updated count of kept outputs
- */
function simplifyTerminalHelper(
- text: string,
+ toolResult: CodebuffToolOutput<'run_terminal_command'>,
numKept: number,
-): { result: string; numKept: number } {
- const simplifiedText = simplifyTerminalCommandResults(text)
+): { result: CodebuffToolOutput<'run_terminal_command'>; numKept: number } {
+ const simplified = simplifyTerminalCommandResults(toolResult)
// Keep the full output for the N most recent commands
- if (numKept < numTerminalCommandsToKeep && simplifiedText !== text) {
- return { result: text, numKept: numKept + 1 }
+ if (numKept < numTerminalCommandsToKeep && !isEqual(simplified, toolResult)) {
+ return { result: toolResult, numKept: numKept + 1 }
}
return {
- result: simplifiedText,
+ result: simplified,
numKept,
}
}
@@ -115,7 +116,7 @@ const shortenedMessageTokenFactor = 0.5
const replacementMessage = {
role: 'user',
content: asSystemMessage('Previous message(s) omitted due to length'),
-} satisfies CodebuffMessage
+} satisfies Message
/**
* Trims messages from the beginning to fit within token limits while preserving
@@ -132,10 +133,10 @@ const replacementMessage = {
* @returns Trimmed array of messages that fits within token limit
*/
export function trimMessagesToFitTokenLimit(
- messages: CodebuffMessage[],
+ messages: Message[],
systemTokens: number,
maxTotalTokens: number = 190_000,
-): CodebuffMessage[] {
+): Message[] {
const maxMessageTokens = maxTotalTokens - systemTokens
// Check if we're already under the limit
@@ -145,75 +146,37 @@ export function trimMessagesToFitTokenLimit(
return messages
}
- const shortenedMessages: CodebuffMessage[] = []
+ const shortenedMessages: Message[] = []
let numKept = 0
// Process messages from newest to oldest
for (let i = messages.length - 1; i >= 0; i--) {
const m = messages[i]
- let message: CodebuffMessage
- if (m.role === 'tool' || m.role === 'system') {
- message = messages[i]
- } else if (m.role === 'user') {
- let newContent: typeof m.content
-
- // Handle string content (usually terminal output)
- if (typeof m.content === 'string') {
- const result = simplifyTerminalHelper(m.content, numKept)
- message = { role: m.role, content: result.result }
- numKept = result.numKept
- } else {
- // Handle array content (mixed content types)
- newContent = []
- // Process content parts from newest to oldest
- for (let j = m.content.length - 1; j >= 0; j--) {
- const messagePart = m.content[j]
- // Preserve non-text content (i.e. images)
- if (messagePart.type !== 'text') {
- newContent.push(messagePart)
- continue
- }
-
- const result = simplifyTerminalHelper(messagePart.text, numKept)
- newContent.push({ ...messagePart, text: result.result })
- numKept = result.numKept
- }
- newContent.reverse()
- message = { ...m, content: newContent }
+ if (m.role === 'system' || m.role === 'user' || m.role === 'assistant') {
+ shortenedMessages.push(m)
+ } else if (m.role === 'tool') {
+ if (m.content.toolName !== 'run_terminal_command') {
+ shortenedMessages.push(m)
+ continue
}
- } else if (m.role === 'assistant') {
- let newContent: typeof m.content
-
- // Handle string content (usually terminal output)
- if (typeof m.content === 'string') {
- const result = simplifyTerminalHelper(m.content, numKept)
- message = { role: m.role, content: result.result }
- numKept = result.numKept
- } else {
- // Handle array content (mixed content types)
- newContent = []
- // Process content parts from newest to oldest
- for (let j = m.content.length - 1; j >= 0; j--) {
- const messagePart = m.content[j]
- // Preserve non-text content (i.e. images)
- if (messagePart.type !== 'text') {
- newContent.push(messagePart)
- continue
- }
- const result = simplifyTerminalHelper(messagePart.text, numKept)
- newContent.push({ ...messagePart, text: result.result })
- numKept = result.numKept
- }
- newContent.reverse()
- message = { ...m, content: newContent }
- }
+ const terminalResultMessage = cloneDeep(
+ m,
+ ) as CodebuffToolMessage<'run_terminal_command'>
+
+ const result = simplifyTerminalHelper(
+ terminalResultMessage.content.output,
+ numKept,
+ )
+ terminalResultMessage.content.output = result.result
+ numKept = result.numKept
+
+ shortenedMessages.push(terminalResultMessage)
} else {
m satisfies never
- throw new AssertionError({ message: 'Not a valid role' })
+ const mAny = m as any
+ throw new AssertionError({ message: `Not a valid role: ${mAny.role}` })
}
-
- shortenedMessages.push(message)
}
shortenedMessages.reverse()
@@ -225,7 +188,7 @@ export function trimMessagesToFitTokenLimit(
(maxMessageTokens - requiredTokens) * (1 - shortenedMessageTokenFactor)
const placeholder = 'deleted'
- const filteredMessages: (CodebuffMessage | typeof placeholder)[] = []
+ const filteredMessages: (Message | typeof placeholder)[] = []
for (const message of shortenedMessages) {
if (removedTokens >= tokensToRemove || message.keepDuringTruncation) {
filteredMessages.push(message)
@@ -247,9 +210,9 @@ export function trimMessagesToFitTokenLimit(
}
export function getMessagesSubset(
- messages: CodebuffMessage[],
+ messages: Message[],
otherTokens: number,
-): CodebuffMessage[] {
+): Message[] {
const messagesSubset = trimMessagesToFitTokenLimit(messages, otherTokens)
// Remove cache_control from all messages
@@ -275,9 +238,9 @@ export function getMessagesSubset(
}
export function expireMessages(
- messages: CodebuffMessage[],
+ messages: Message[],
endOf: 'agentStep' | 'userPrompt',
-): CodebuffMessage[] {
+): Message[] {
return messages.filter((m) => {
// Keep messages with no timeToLive
if (m.timeToLive === undefined) return true
@@ -289,3 +252,77 @@ export function expireMessages(
return true
})
}
+
+export function getEditedFiles(messages: Message[]): string[] {
+ return buildArray(
+ messages
+ .filter(
+ (
+ m,
+ ): m is ToolMessage & {
+ content: { toolName: 'create_plan' | 'str_replace' | 'write_file' }
+ } => {
+ return (
+ m.role === 'tool' &&
+ (m.content.toolName === 'create_plan' ||
+ m.content.toolName === 'str_replace' ||
+ m.content.toolName === 'write_file')
+ )
+ },
+ )
+ .map((m) => {
+ try {
+ const fileInfo = (
+ m as CodebuffToolMessage<
+ 'create_plan' | 'str_replace' | 'write_file'
+ >
+ ).content.output[0].value
+ if ('errorMessage' in fileInfo) {
+ return null
+ }
+ return fileInfo.file
+ } catch (error) {
+ logger.error(
+ { error: errorToObject(error), m },
+ 'Error parsing file info',
+ )
+ return null
+ }
+ }),
+ )
+}
+
+export function getPreviouslyReadFiles(messages: Message[]): {
+ path: string
+ content: string
+ referencedBy?: Record
+}[] {
+ return buildArray(
+ messages
+ .filter(
+ (
+ m,
+ ): m is ToolMessage & {
+ content: { toolName: 'read_files' }
+ } => m.role === 'tool' && m.content.toolName === 'read_files',
+ )
+ .map((m) => {
+ try {
+ return (
+ m as CodebuffToolMessage<'read_files'>
+ ).content.output[0].value.map((file) => {
+ if ('contentOmittedForLength' in file) {
+ return undefined
+ }
+ return file
+ })
+ } catch (error) {
+ logger.error(
+ { error: errorToObject(error), m },
+ 'Error parsing read_files output from message',
+ )
+ return []
+ }
+ }),
+ )
+}
diff --git a/backend/src/util/parse-tool-call-xml.ts b/backend/src/util/parse-tool-call-xml.ts
index 1c8a109ab..ff4fc2f7b 100644
--- a/backend/src/util/parse-tool-call-xml.ts
+++ b/backend/src/util/parse-tool-call-xml.ts
@@ -1,61 +1,3 @@
-import { toContentString } from '@codebuff/common/util/messages'
-import { generateCompactId } from '@codebuff/common/util/string'
-import { closeXml } from '@codebuff/common/util/xml'
-
-import type { StringToolResultPart } from '@codebuff/common/tools/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
-
-/**
- * Parses XML content for a tool call into a structured object with only string values.
- * Example input:
- * click
- * #button
- * 5000
- */
-export function parseToolCallXml(xmlString: string): Record {
- if (!xmlString.trim()) return {}
-
- const result: Record = {}
- const tagPattern = /<(\w+)>([\s\S]*?)<\/\1>/g
- let match
-
- while ((match = tagPattern.exec(xmlString)) !== null) {
- const [_, key, rawValue] = match
-
- // Remove leading/trailing whitespace but preserve internal whitespace
- const value = rawValue.replace(/^\s+|\s+$/g, '')
-
- // Assign all values as strings
- result[key] = value
- }
-
- return result
-}
-
-export const parseToolResults = (xmlString: string): StringToolResultPart[] => {
- if (!xmlString.trim()) return []
-
- const results: StringToolResultPart[] = []
- const toolResultPattern = /([\s\S]*?)<\/tool_result>/g
- let match
-
- while ((match = toolResultPattern.exec(xmlString)) !== null) {
- const [_, toolResultContent] = match
- const toolMatch = /(.*?)<\/tool>/g.exec(toolResultContent)
- const resultMatch = /([\s\S]*?)<\/result>/g.exec(toolResultContent)
-
- if (toolMatch && resultMatch) {
- results.push({
- toolName: toolMatch[1],
- toolCallId: generateCompactId(),
- output: { type: 'text', value: resultMatch[1].trim() },
- })
- }
- }
-
- return results
-}
-
export interface TokenCallerMap {
[filePath: string]: {
[token: string]: string[] // Array of files that call this token
@@ -66,36 +8,11 @@ export function renderReadFilesResult(
files: { path: string; content: string }[],
tokenCallers: TokenCallerMap,
) {
- return files
- .map((file) => {
- const referencedBy =
- Object.entries(tokenCallers[file.path] ?? {})
- .filter(([_, callers]) => callers.length > 0)
- .map(([token, callers]) => `${token}: ${callers.join(', ')}`)
- .join('\n') || 'None'
- return `\n${file.path}${closeXml('path')}\n${file.content}${closeXml('content')}\n${referencedBy}${closeXml('referenced_by')}\n${closeXml('read_file')}`
- })
- .join('\n\n')
-}
-
-export function parseReadFilesResult(
- xmlString: string,
-): { path: string; content: string; referencedBy: string }[] {
- const files: { path: string; content: string; referencedBy: string }[] = []
- const filePattern =
- /\s*([^<>]+)<\/path>\s*([\s\S]*?)<\/content>\s*([\s\S]*?)<\/referenced_by>\s*<\/read_file>/g
- let match
-
- while ((match = filePattern.exec(xmlString)) !== null) {
- const [, filePath, content, referencedBy] = match
- if (filePath.trim()) {
- files.push({ path: filePath.trim(), content, referencedBy })
+ return files.map((file) => {
+ return {
+ path: file.path,
+ content: file.content,
+ referencedBy: tokenCallers[file.path] ?? {},
}
- }
-
- return files
-}
-
-export function isToolResult(message: CodebuffMessage): boolean {
- return toContentString(message).includes(' ToolResult,
-): string {
- const resultsStr =
- typeof messageContent === 'string'
- ? messageContent
- : ((messageContent[messageContent.length - 1] as any)?.text as string) ??
- ''
- if (!resultsStr.includes(' result.toolName === toolName,
- )
-
- if (targetResults.length === 0) {
- return resultsStr
- }
-
- // Keep non-target results unchanged
- const otherResults = toolResults.filter(
- (result) => result.toolName !== toolName,
- )
-
- // Create simplified results
- const simplifiedResults = targetResults.map(simplifyFn)
-
- // Combine both types of results
- return renderToolResults([...simplifiedResults, ...otherResults])
-}
-
-/**
- * Simplifies read_files tool results to show only file paths while preserving other tool results.
- * Useful for making tool result output more concise in message history.
- * @param messageContent - The message content containing tool results
- * @returns The message content with simplified read_files results showing only paths
- */
export function simplifyReadFileResults(
- messageContent: string | object[],
-): string {
- return simplifyToolResults(
- messageContent,
- 'read_files',
- simplifyReadFileToolResult,
- )
-}
-
-/**
- * Simplifies terminal command tool results to show a brief summary while preserving other tool results.
- * Useful for making tool result output more concise in message history.
- * @param messageContent - The message content containing tool results
- * @returns The message content with simplified terminal command results
- */
-export function simplifyTerminalCommandResults(
- messageContent: string | object[],
-): string {
- return simplifyToolResults(
- messageContent,
- 'run_terminal_command',
- simplifyTerminalCommandToolResult,
- )
-}
-
-/**
- * Simplifies a single read_files tool result by extracting just the file paths.
- * @param toolResult - The read_files tool result to simplify
- * @returns A new tool result with just the list of file paths that were read
- */
-export function simplifyReadFileToolResult(toolResult: ToolResult): ToolResult {
- const fileBlocks = parseReadFilesResult(toolResult.output.value)
- const filePaths = fileBlocks.map((block) => block.path)
- return {
- toolCallId: toolResult.toolCallId,
- toolName: 'read_files',
- output: {
- type: 'text',
- value: `Read the following files: ${filePaths.join('\n')}`,
+ messageContent: CodebuffToolOutput<'read_files'>,
+): CodebuffToolOutput<'read_files'> {
+ return [
+ {
+ type: 'json',
+ value: cloneDeep(messageContent[0]).value.map(({ path }) => {
+ return {
+ path,
+ contentOmittedForLength: true,
+ }
+ }),
},
- }
+ ]
}
-/**
- * Simplifies a single terminal command tool result by replacing output with a brief message.
- * @param toolResult - The terminal command tool result to simplify
- * @returns A new tool result with shortened output if the original was long
- */
-export function simplifyTerminalCommandToolResult(
- toolResult: ToolResult,
-): ToolResult {
- const shortenedResultCandidate = '[Output omitted]'
- return shortenedResultCandidate.length < toolResult.output.value.length
- ? {
- toolCallId: toolResult.toolCallId,
- toolName: 'run_terminal_command',
- output: {
- type: 'text',
- value: shortenedResultCandidate,
+export function simplifyTerminalCommandResults(
+ messageContent: CodebuffToolOutput<'run_terminal_command'>,
+): CodebuffToolOutput<'run_terminal_command'> {
+ try {
+ const clone = cloneDeep(messageContent)
+ const content = clone[0].value
+ if ('processId' in content || 'errorMessage' in content) {
+ return clone
+ }
+ const { command, message, exitCode } = content
+ return [
+ {
+ type: 'json',
+ value: {
+ command,
+ ...(message && { message }),
+ stdoutOmittedForLength: true,
+ ...(exitCode !== undefined && { exitCode }),
},
- }
- : toolResult
+ },
+ ]
+ } catch (error) {
+ logger.error(
+ { error: errorToObject(error), messageContent },
+ 'Error simplifying terminal command results',
+ )
+ return [
+ {
+ type: 'json',
+ value: {
+ command: '',
+ stdoutOmittedForLength: true,
+ },
+ },
+ ]
+ }
}
diff --git a/backend/src/websockets/websocket-action.ts b/backend/src/websockets/websocket-action.ts
index 0f3e41bd8..c9656fea5 100644
--- a/backend/src/websockets/websocket-action.ts
+++ b/backend/src/websockets/websocket-action.ts
@@ -30,6 +30,7 @@ import type {
ServerAction,
UsageResponse,
} from '@codebuff/common/actions'
+import type { ToolResultOutput } from '@codebuff/common/types/messages/content-part'
import type { ClientMessage } from '@codebuff/common/websockets/websocket-schema'
import type { WebSocket } from 'ws'
@@ -421,12 +422,7 @@ export async function requestToolCall(
toolName: string,
input: Record & { timeout_seconds?: number },
): Promise<{
- success: boolean
- output?: {
- type: 'text'
- value: string
- }
- error?: string
+ output: ToolResultOutput[]
}> {
return new Promise((resolve) => {
const requestId = generateCompactId()
@@ -443,8 +439,14 @@ export async function requestToolCall(
() => {
unsubscribe()
resolve({
- success: false,
- error: `Tool call '${toolName}' timed out after ${timeoutInSeconds}s`,
+ output: [
+ {
+ type: 'json',
+ value: {
+ errorMessage: `Tool call '${toolName}' timed out after ${timeoutInSeconds}s`,
+ },
+ },
+ ],
})
},
timeoutInSeconds * 1000 + 5000, // Convert to ms and add a small buffer
@@ -456,9 +458,7 @@ export async function requestToolCall(
clearTimeout(timeoutHandle)
unsubscribe()
resolve({
- success: action.success,
output: action.output,
- error: action.error,
})
}
})
diff --git a/bun.lock b/bun.lock
index c27260eeb..7a223a277 100644
--- a/bun.lock
+++ b/bun.lock
@@ -34,6 +34,9 @@
".agents": {
"name": "@codebuff/agents",
"version": "0.0.0",
+ "dependencies": {
+ "@codebuff/sdk": "workspace:*",
+ },
},
"backend": {
"name": "@codebuff/backend",
@@ -229,7 +232,7 @@
},
"sdk": {
"name": "@codebuff/sdk",
- "version": "0.1.17",
+ "version": "0.1.18",
"dependencies": {
"@vscode/tree-sitter-wasm": "0.1.4",
"ai": "^5.0.0",
diff --git a/common/src/actions.ts b/common/src/actions.ts
index e3a9f53a1..92c59032f 100644
--- a/common/src/actions.ts
+++ b/common/src/actions.ts
@@ -2,12 +2,12 @@ import { z } from 'zod/v4'
import { costModes } from './constants'
import { GrantTypeValues } from './types/grant'
-import { printModeEventSchema } from './types/print-mode'
import {
- SessionStateSchema,
- toolCallSchema,
- toolResultSchema,
-} from './types/session-state'
+ toolResultOutputSchema,
+ toolResultPartSchema,
+} from './types/messages/content-part'
+import { printModeEventSchema } from './types/print-mode'
+import { SessionStateSchema, toolCallSchema } from './types/session-state'
import { ProjectFileContextSchema } from './util/file'
export const FileChangeSchema = z.object({
@@ -29,7 +29,7 @@ export const CLIENT_ACTION_SCHEMA = z.discriminatedUnion('type', [
authToken: z.string().optional(),
costMode: z.enum(costModes).optional().default('normal'),
sessionState: SessionStateSchema,
- toolResults: z.array(toolResultSchema),
+ toolResults: z.array(toolResultPartSchema),
model: z.string().optional(),
repoUrl: z.string().optional(),
agentId: z.string().optional(),
@@ -49,14 +49,7 @@ export const CLIENT_ACTION_SCHEMA = z.discriminatedUnion('type', [
z.object({
type: z.literal('tool-call-response'),
requestId: z.string(),
- success: z.boolean(),
- output: z
- .object({
- type: z.literal('text'),
- value: z.string(),
- })
- .optional(), // Tool execution result
- error: z.string().optional(), // Error message if execution failed
+ output: toolResultOutputSchema.array(),
}),
z.object({
type: z.literal('cancel-user-input'),
@@ -111,7 +104,7 @@ export const PromptResponseSchema = z.object({
promptId: z.string(),
sessionState: SessionStateSchema,
toolCalls: z.array(toolCallSchema),
- toolResults: z.array(toolResultSchema),
+ toolResults: z.array(toolResultPartSchema),
})
export type PromptResponse = z.infer
diff --git a/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts b/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts
index 4e3e6e131..b11e63a48 100644
--- a/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts
+++ b/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts
@@ -54,6 +54,7 @@ const definition: AgentDefinition = {
content:
"I've analyzed the git diff and recent commit history. Now I'll read any relevant files to better understand the context of these changes.",
},
+ includeToolCall: false,
} satisfies ToolCall
// Step 3: Let AI generate a step to decide which files to read.
@@ -67,6 +68,7 @@ const definition: AgentDefinition = {
content:
"Now I'll analyze the changes and create a commit with a good commit message.",
},
+ includeToolCall: false,
} satisfies ToolCall
yield 'STEP_ALL'
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index cfdfb6633..fa71fef3f 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -14,6 +14,10 @@
* export default definition
*/
+import type { Message } from './codebuff-message'
+import type * as Tools from './tools'
+type ToolName = Tools.ToolName
+
// ============================================================================
// Agent Definition and Utility Types
// ============================================================================
@@ -201,25 +205,6 @@ export interface AgentState {
output: Record | undefined
}
-/**
- * Message in conversation history
- */
-export interface Message {
- role: 'user' | 'assistant'
- content:
- | string
- | Array<
- | {
- type: 'text'
- text: string
- }
- | {
- type: 'image'
- image: string
- }
- >
-}
-
/**
* Context provided to handleSteps generator function
*/
@@ -236,6 +221,7 @@ export type ToolCall = {
[K in T]: {
toolName: K
input: Tools.GetToolParams
+ includeToolCall?: boolean
}
}[T]
@@ -362,6 +348,4 @@ export type ModelName =
| 'z-ai/glm-4.5:nitro'
| (string & {})
-import type * as Tools from './tools'
export type { Tools }
-type ToolName = Tools.ToolName
diff --git a/common/src/templates/initial-agents-dir/types/codebuff-message.ts b/common/src/templates/initial-agents-dir/types/codebuff-message.ts
new file mode 100644
index 000000000..97b9fdc1a
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/codebuff-message.ts
@@ -0,0 +1,77 @@
+import z from 'zod/v4'
+
+import {
+ filePartSchema,
+ imagePartSchema,
+ reasoningPartSchema,
+ textPartSchema,
+ toolCallPartSchema,
+ toolResultPartSchema,
+} from './content-part'
+import { providerMetadataSchema } from './provider-metadata'
+
+const auxiliaryDataSchema = z.object({
+ providerOptions: providerMetadataSchema.optional(),
+ timeToLive: z
+ .union([z.literal('agentStep'), z.literal('userPrompt')])
+ .optional(),
+ keepDuringTruncation: z.boolean().optional(),
+})
+
+export const systemMessageSchema = z
+ .object({
+ role: z.literal('system'),
+ content: z.string(),
+ })
+ .and(auxiliaryDataSchema)
+export type SystemMessage = z.infer
+
+export const userMessageSchema = z
+ .object({
+ role: z.literal('user'),
+ content: z.union([
+ z.string(),
+ z.union([textPartSchema, imagePartSchema, filePartSchema]).array(),
+ ]),
+ })
+ .and(auxiliaryDataSchema)
+export type UserMessage = z.infer
+
+export const assistantMessageSchema = z
+ .object({
+ role: z.literal('assistant'),
+ content: z.union([
+ z.string(),
+ z
+ .union([textPartSchema, reasoningPartSchema, toolCallPartSchema])
+ .array(),
+ ]),
+ })
+ .and(auxiliaryDataSchema)
+export type AssistantMessage = z.infer
+
+export const toolMessageSchema = z
+ .object({
+ role: z.literal('tool'),
+ content: toolResultPartSchema,
+ })
+ .and(auxiliaryDataSchema)
+export type ToolMessage = z.infer
+
+export const messageSchema = z
+ .union([
+ systemMessageSchema,
+ userMessageSchema,
+ assistantMessageSchema,
+ toolMessageSchema,
+ ])
+ .and(
+ z.object({
+ providerOptions: providerMetadataSchema.optional(),
+ timeToLive: z
+ .union([z.literal('agentStep'), z.literal('userPrompt')])
+ .optional(),
+ keepDuringTruncation: z.boolean().optional(),
+ }),
+ )
+export type Message = z.infer
diff --git a/common/src/templates/initial-agents-dir/types/content-part.ts b/common/src/templates/initial-agents-dir/types/content-part.ts
new file mode 100644
index 000000000..474ce335e
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/content-part.ts
@@ -0,0 +1,68 @@
+import z from 'zod/v4'
+
+import { dataContentSchema } from './data-content'
+import { jsonValueSchema } from './json'
+import { providerMetadataSchema } from './provider-metadata'
+
+export const textPartSchema = z.object({
+ type: z.literal('text'),
+ text: z.string(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type TextPart = z.infer
+
+export const imagePartSchema = z.object({
+ type: z.literal('image'),
+ image: z.union([dataContentSchema, z.instanceof(URL)]),
+ mediaType: z.string().optional(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type ImagePart = z.infer
+
+export const filePartSchema = z.object({
+ type: z.literal('file'),
+ data: z.union([dataContentSchema, z.instanceof(URL)]),
+ filename: z.string().optional(),
+ mediaType: z.string(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type FilePart = z.infer
+
+export const reasoningPartSchema = z.object({
+ type: z.literal('reasoning'),
+ text: z.string(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type ReasoningPart = z.infer
+
+export const toolCallPartSchema = z.object({
+ type: z.literal('tool-call'),
+ toolCallId: z.string(),
+ toolName: z.string(),
+ input: z.record(z.string(), z.unknown()),
+ providerOptions: providerMetadataSchema.optional(),
+ providerExecuted: z.boolean().optional(),
+})
+export type ToolCallPart = z.infer
+
+export const toolResultOutputSchema = z.discriminatedUnion('type', [
+ z.object({
+ type: z.literal('json'),
+ value: jsonValueSchema,
+ }),
+ z.object({
+ type: z.literal('media'),
+ data: z.string(),
+ mediaType: z.string(),
+ }),
+])
+export type ToolResultOutput = z.infer
+
+export const toolResultPartSchema = z.object({
+ type: z.literal('tool-result'),
+ toolCallId: z.string(),
+ toolName: z.string(),
+ output: toolResultOutputSchema.array(),
+ providerOptions: providerMetadataSchema.optional(),
+})
+export type ToolResultPart = z.infer
diff --git a/common/src/templates/initial-agents-dir/types/data-content.ts b/common/src/templates/initial-agents-dir/types/data-content.ts
new file mode 100644
index 000000000..eb5c2e1ff
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/data-content.ts
@@ -0,0 +1,14 @@
+import z from 'zod/v4'
+
+export const dataContentSchema = z.union([
+ z.string(),
+ z.instanceof(Uint8Array),
+ z.instanceof(ArrayBuffer),
+ z.custom(
+ // Buffer might not be available in some environments such as CloudFlare:
+ (value: unknown): value is Buffer =>
+ globalThis.Buffer?.isBuffer(value) ?? false,
+ { message: 'Must be a Buffer' },
+ ),
+])
+export type DataContent = z.infer
diff --git a/common/src/templates/initial-agents-dir/types/json.ts b/common/src/templates/initial-agents-dir/types/json.ts
new file mode 100644
index 000000000..167f8d051
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/json.ts
@@ -0,0 +1,29 @@
+import z from 'zod/v4'
+
+export type JSONValue =
+ | null
+ | string
+ | number
+ | boolean
+ | JSONObject
+ | JSONArray
+export const jsonValueSchema: z.ZodType = z.lazy(() =>
+ z.union([
+ z.null(),
+ z.string(),
+ z.number(),
+ z.boolean(),
+ jsonObjectSchema,
+ jsonArraySchema,
+ ]),
+)
+
+export const jsonObjectSchema: z.ZodType = z.lazy(() =>
+ z.record(z.string(), jsonValueSchema),
+)
+export type JSONObject = { [key: string]: JSONValue }
+
+export const jsonArraySchema: z.ZodType = z.lazy(() =>
+ z.array(jsonValueSchema),
+)
+export type JSONArray = JSONValue[]
diff --git a/common/src/templates/initial-agents-dir/types/provider-metadata.ts b/common/src/templates/initial-agents-dir/types/provider-metadata.ts
new file mode 100644
index 000000000..5749359fe
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/provider-metadata.ts
@@ -0,0 +1,10 @@
+import z from 'zod/v4'
+
+import { jsonValueSchema } from './json'
+
+export const providerMetadataSchema = z.record(
+ z.string(),
+ z.record(z.string(), jsonValueSchema),
+)
+
+export type ProviderMetadata = z.infer
diff --git a/common/src/templates/initial-agents-dir/types/tools.ts b/common/src/templates/initial-agents-dir/types/tools.ts
index 18b20f563..9acee92bd 100644
--- a/common/src/templates/initial-agents-dir/types/tools.ts
+++ b/common/src/templates/initial-agents-dir/types/tools.ts
@@ -1,3 +1,5 @@
+import type { Message } from './codebuff-message'
+
/**
* Union type of all available tool names
*/
@@ -118,15 +120,7 @@ export interface RunTerminalCommandParams {
* Set the conversation history to the provided messages.
*/
export interface SetMessagesParams {
- messages: {
- role: 'user' | 'assistant'
- content:
- | string
- | {
- type: 'text'
- text: string
- }[]
- }[]
+ messages: Message[]
}
/**
diff --git a/common/src/tools/compile-tool-definitions.ts b/common/src/tools/compile-tool-definitions.ts
index 70cd8294a..33b304533 100644
--- a/common/src/tools/compile-tool-definitions.ts
+++ b/common/src/tools/compile-tool-definitions.ts
@@ -1,7 +1,7 @@
import z from 'zod/v4'
import { publishedTools } from './constants'
-import { llmToolCallSchema } from './list'
+import { $toolParams } from './list'
/**
* Compiles all tool definitions into a single TypeScript definition file content.
@@ -9,7 +9,7 @@ import { llmToolCallSchema } from './list'
*/
export function compileToolDefinitions(): string {
const toolEntries = publishedTools.map(
- (toolName) => [toolName, llmToolCallSchema[toolName]] as const,
+ (toolName) => [toolName, $toolParams[toolName]] as const,
)
const toolInterfaces = toolEntries
diff --git a/common/src/tools/constants.ts b/common/src/tools/constants.ts
index 35b594e78..de954a5f6 100644
--- a/common/src/tools/constants.ts
+++ b/common/src/tools/constants.ts
@@ -1,4 +1,4 @@
-import type { ToolResultPart } from 'ai'
+import type { ToolResultOutput } from '../types/messages/content-part'
import type z from 'zod/v4'
export const toolNameParam = 'cb_tool_name'
@@ -64,12 +64,9 @@ export const publishedTools = [
export type ToolName = (typeof toolNames)[number]
-export type ToolParams = {
+export type $ToolParams = {
toolName: T
endsAgentStep: boolean
parameters: z.ZodType
-}
-
-export type StringToolResultPart = Omit & {
- output: { type: 'text' }
+ outputs: z.ZodType
}
diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts
index 3eaba76f1..8c705396e 100644
--- a/common/src/tools/list.ts
+++ b/common/src/tools/list.ts
@@ -23,10 +23,14 @@ import { updateSubgoalParams } from './params/tool/update-subgoal'
import { webSearchParams } from './params/tool/web-search'
import { writeFileParams } from './params/tool/write-file'
-import type { ToolName, ToolParams } from './constants'
-import type { ToolCallPart } from 'ai'
+import type { ToolName, $ToolParams } from './constants'
+import type { ToolMessage } from '../types/messages/codebuff-message'
+import type {
+ ToolCallPart,
+ ToolResultPart,
+} from '../types/messages/content-part'
-export const llmToolCallSchema = {
+export const $toolParams = {
add_message: addMessageParams,
add_subgoal: addSubgoalParams,
browser_logs: browserLogsParams,
@@ -49,27 +53,47 @@ export const llmToolCallSchema = {
web_search: webSearchParams,
write_file: writeFileParams,
} satisfies {
- [K in ToolName]: ToolParams
+ [K in ToolName]: $ToolParams
}
// Tool call from LLM
export type CodebuffToolCall = {
[K in ToolName]: {
toolName: K
- input: z.infer<(typeof llmToolCallSchema)[K]['parameters']>
+ input: z.infer<(typeof $toolParams)[K]['parameters']>
} & Omit
}[T]
+export type CodebuffToolOutput = {
+ [K in ToolName]: z.infer<(typeof $toolParams)[K]['outputs']>
+}[T]
+export type CodebuffToolResult = {
+ [K in ToolName]: {
+ toolName: K
+ output: CodebuffToolOutput