From 59f5509a6e16b503d1aa657345963fa3d1aa15df Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Wed, 27 Aug 2025 12:23:14 -0700
Subject: [PATCH 01/18] modify CodebuffMessage

---
 common/src/types/json.ts                      |  8 +++----
 common/src/types/messages/codebuff-message.ts | 10 ++-------
 common/src/types/messages/content-part.ts     | 21 +++++++------------
 common/src/types/session-state.ts             | 10 ---------
 4 files changed, 13 insertions(+), 36 deletions(-)
diff --git a/common/src/types/json.ts b/common/src/types/json.ts
index 191393b94..167f8d051 100644
--- a/common/src/types/json.ts
+++ b/common/src/types/json.ts
@@ -13,17 +13,17 @@ export const jsonValueSchema: z.ZodType<JSONValue> = z.lazy(() =>
     z.string(),
     z.number(),
     z.boolean(),
-    JSONObjectSchema,
-    JSONArraySchema,
+    jsonObjectSchema,
+    jsonArraySchema,
   ]),
 )
 
-export const JSONObjectSchema: z.ZodType<JSONObject> = z.lazy(() =>
+export const jsonObjectSchema: z.ZodType<JSONObject> = z.lazy(() =>
   z.record(z.string(), jsonValueSchema),
 )
 export type JSONObject = { [key: string]: JSONValue }
 
-export const JSONArraySchema: z.ZodType<JSONArray> = z.lazy(() =>
+export const jsonArraySchema: z.ZodType<JSONArray> = z.lazy(() =>
   z.array(jsonValueSchema),
 )
 export type JSONArray = JSONValue[]
diff --git a/common/src/types/messages/codebuff-message.ts b/common/src/types/messages/codebuff-message.ts
index ca74fcf37..e8a109ff6 100644
--- a/common/src/types/messages/codebuff-message.ts
+++ b/common/src/types/messages/codebuff-message.ts
@@ -43,13 +43,7 @@ export const assistantCodebuffMessageSchema = z
     content: z.union([
       z.string(),
       z
-        .union([
-          textPartSchema,
-          filePartSchema,
-          reasoningPartSchema,
-          toolCallPartSchema,
-          toolResultPartSchema,
-        ])
+        .union([textPartSchema, reasoningPartSchema, toolCallPartSchema])
         .array(),
     ]),
   })
@@ -61,7 +55,7 @@ export type AssistantCodebuffMessage = z.infer<
 export const toolCodebuffMessageSchema = z
   .object({
     role: z.literal('tool'),
-    content: toolResultPartSchema.array(),
+    content: toolResultPartSchema,
   })
   .and(auxiliaryDataSchema)
 export type ToolCodebuffMessage = z.infer<typeof toolCodebuffMessageSchema>
diff --git a/common/src/types/messages/content-part.ts b/common/src/types/messages/content-part.ts
index b098af7b7..ff01b1f0c 100644
--- a/common/src/types/messages/content-part.ts
+++ b/common/src/types/messages/content-part.ts
@@ -48,19 +48,12 @@ export type ToolCallPart = z.infer<typeof toolCallPartSchema>
 export const toolResultOutputSchema = z.discriminatedUnion('type', [
   z.object({
     type: z.literal('json'),
-    value: z
-      .discriminatedUnion('type', [
-        z.object({
-          type: z.literal('json'),
-          value: jsonValueSchema,
-        }),
-        z.object({
-          type: z.literal('media'),
-          data: z.string(),
-          mediaType: z.string(),
-        }),
-      ])
-      .array(),
+    value: jsonValueSchema,
+  }),
+  z.object({
+    type: z.literal('media'),
+    data: z.string(),
+    mediaType: z.string(),
   }),
 ])
 export type ToolResultOutput = z.infer<typeof toolResultOutputSchema>
@@ -69,7 +62,7 @@ export const toolResultPartSchema = z.object({
   type: z.literal('tool-result'),
   toolCallId: z.string(),
   toolName: z.string(),
-  output: toolResultOutputSchema,
+  output: toolResultOutputSchema.array(),
   providerOptions: providerMetadataSchema.optional(),
 })
 export type ToolResultPart = z.infer<typeof toolResultPartSchema>
diff --git a/common/src/types/session-state.ts b/common/src/types/session-state.ts
index d879ed043..f6cf0fbef 100644
--- a/common/src/types/session-state.ts
+++ b/common/src/types/session-state.ts
@@ -14,16 +14,6 @@ export const toolCallSchema = z.object({
 })
 export type ToolCall = z.infer<typeof toolCallSchema>
 
-export const toolResultSchema = z.object({
-  toolName: z.string(),
-  toolCallId: z.string(),
-  output: z.object({
-    type: z.literal('text'),
-    value: z.string(),
-  }),
-})
-export type ToolResult = z.infer<typeof toolResultSchema>
-
 export const subgoalSchema = z.object({
   objective: z.string().optional(),
   status: z

From 933c5b558c9c2a2734cc4e18e5327d7cce7ef7d4 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Wed, 27 Aug 2025 13:53:05 -0700
Subject: [PATCH 02/18] add output types to tools

---
 common/src/actions.ts                         | 23 +++++----------
 common/src/tools/compile-tool-definitions.ts  |  4 +--
 common/src/tools/constants.ts                 |  9 ++----
 common/src/tools/list.ts                      | 28 ++++++++++++------
 common/src/tools/params/tool/add-message.ts   |  5 ++--
 common/src/tools/params/tool/add-subgoal.ts   | 12 ++++++--
 common/src/tools/params/tool/browser-logs.ts  | 12 ++++++--
 common/src/tools/params/tool/code-search.ts   | 20 +++++++++++--
 common/src/tools/params/tool/create-plan.ts   | 12 ++++++--
 common/src/tools/params/tool/end-turn.ts      |  5 ++--
 common/src/tools/params/tool/find-files.ts    | 12 ++++++--
 common/src/tools/params/tool/read-docs.ts     | 12 ++++++--
 common/src/tools/params/tool/read-files.ts    | 16 ++++++++--
 .../params/tool/run-file-change-hooks.ts      | 16 ++++++++--
 .../tools/params/tool/run-terminal-command.ts | 18 ++++++++++--
 common/src/tools/params/tool/set-messages.ts  | 14 ++++-----
 common/src/tools/params/tool/set-output.ts    | 12 ++++++--
 .../tools/params/tool/spawn-agent-inline.ts   |  5 ++--
 .../tools/params/tool/spawn-agents-async.ts   | 23 +++++++++++++--
 common/src/tools/params/tool/spawn-agents.ts  | 16 ++++++++--
 common/src/tools/params/tool/str-replace.ts   | 21 ++++++++++++--
 common/src/tools/params/tool/think-deeply.ts  |  5 ++--
 .../src/tools/params/tool/update-subgoal.ts   | 12 ++++++--
 common/src/tools/params/tool/web-search.ts    | 17 +++++++++--
 common/src/tools/params/tool/write-file.ts    | 12 ++++++--
 common/src/tools/utils.ts                     | 29 ++++---------------
 26 files changed, 267 insertions(+), 103 deletions(-)

diff --git a/common/src/actions.ts b/common/src/actions.ts
index e3a9f53a1..92c59032f 100644
--- a/common/src/actions.ts
+++ b/common/src/actions.ts
@@ -2,12 +2,12 @@ import { z } from 'zod/v4'
 
 import { costModes } from './constants'
 import { GrantTypeValues } from './types/grant'
-import { printModeEventSchema } from './types/print-mode'
 import {
-  SessionStateSchema,
-  toolCallSchema,
-  toolResultSchema,
-} from './types/session-state'
+  toolResultOutputSchema,
+  toolResultPartSchema,
+} from './types/messages/content-part'
+import { printModeEventSchema } from './types/print-mode'
+import { SessionStateSchema, toolCallSchema } from './types/session-state'
 import { ProjectFileContextSchema } from './util/file'
 
 export const FileChangeSchema = z.object({
@@ -29,7 +29,7 @@ export const CLIENT_ACTION_SCHEMA = z.discriminatedUnion('type', [
     authToken: z.string().optional(),
     costMode: z.enum(costModes).optional().default('normal'),
     sessionState: SessionStateSchema,
-    toolResults: z.array(toolResultSchema),
+    toolResults: z.array(toolResultPartSchema),
     model: z.string().optional(),
     repoUrl: z.string().optional(),
     agentId: z.string().optional(),
@@ -49,14 +49,7 @@ export const CLIENT_ACTION_SCHEMA = z.discriminatedUnion('type', [
   z.object({
     type: z.literal('tool-call-response'),
     requestId: z.string(),
-    success: z.boolean(),
-    output: z
-      .object({
-        type: z.literal('text'),
-        value: z.string(),
-      })
-      .optional(), // Tool execution result
-    error: z.string().optional(), // Error message if execution failed
+    output: toolResultOutputSchema.array(),
   }),
   z.object({
     type: z.literal('cancel-user-input'),
@@ -111,7 +104,7 @@ export const PromptResponseSchema = z.object({
   promptId: z.string(),
   sessionState: SessionStateSchema,
   toolCalls: z.array(toolCallSchema),
-  toolResults: z.array(toolResultSchema),
+  toolResults: z.array(toolResultPartSchema),
 })
 export type PromptResponse = z.infer<typeof PromptResponseSchema>
 
diff --git a/common/src/tools/compile-tool-definitions.ts b/common/src/tools/compile-tool-definitions.ts
index 70cd8294a..33b304533 100644
--- a/common/src/tools/compile-tool-definitions.ts
+++ b/common/src/tools/compile-tool-definitions.ts
@@ -1,7 +1,7 @@
 import z from 'zod/v4'
 
 import { publishedTools } from './constants'
-import { llmToolCallSchema } from './list'
+import { $toolParams } from './list'
 
 /**
  * Compiles all tool definitions into a single TypeScript definition file content.
@@ -9,7 +9,7 @@ import { llmToolCallSchema } from './list'
  */
 export function compileToolDefinitions(): string {
   const toolEntries = publishedTools.map(
-    (toolName) => [toolName, llmToolCallSchema[toolName]] as const,
+    (toolName) => [toolName, $toolParams[toolName]] as const,
   )
 
   const toolInterfaces = toolEntries
diff --git a/common/src/tools/constants.ts b/common/src/tools/constants.ts
index 35b594e78..de954a5f6 100644
--- a/common/src/tools/constants.ts
+++ b/common/src/tools/constants.ts
@@ -1,4 +1,4 @@
-import type { ToolResultPart } from 'ai'
+import type { ToolResultOutput } from '../types/messages/content-part'
 import type z from 'zod/v4'
 
 export const toolNameParam = 'cb_tool_name'
@@ -64,12 +64,9 @@ export const publishedTools = [
 
 export type ToolName = (typeof toolNames)[number]
 
-export type ToolParams<T extends ToolName = ToolName> = {
+export type $ToolParams<T extends ToolName = ToolName> = {
   toolName: T
   endsAgentStep: boolean
   parameters: z.ZodType
-}
-
-export type StringToolResultPart = Omit<ToolResultPart, 'type'> & {
-  output: { type: 'text' }
+  outputs: z.ZodType<ToolResultOutput[]>
 }
diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts
index 3eaba76f1..42a4f3890 100644
--- a/common/src/tools/list.ts
+++ b/common/src/tools/list.ts
@@ -23,10 +23,13 @@ import { updateSubgoalParams } from './params/tool/update-subgoal'
 import { webSearchParams } from './params/tool/web-search'
 import { writeFileParams } from './params/tool/write-file'
 
-import type { ToolName, ToolParams } from './constants'
-import type { ToolCallPart } from 'ai'
+import type { ToolName, $ToolParams } from './constants'
+import type {
+  ToolCallPart,
+  ToolResultPart,
+} from '../types/messages/content-part'
 
-export const llmToolCallSchema = {
+export const $toolParams = {
   add_message: addMessageParams,
   add_subgoal: addSubgoalParams,
   browser_logs: browserLogsParams,
@@ -49,27 +52,34 @@ export const llmToolCallSchema = {
   web_search: webSearchParams,
   write_file: writeFileParams,
 } satisfies {
-  [K in ToolName]: ToolParams<K>
+  [K in ToolName]: $ToolParams<K>
 }
 
 // Tool call from LLM
 export type CodebuffToolCall<T extends ToolName = ToolName> = {
   [K in ToolName]: {
     toolName: K
-    input: z.infer<(typeof llmToolCallSchema)[K]['parameters']>
+    input: z.infer<(typeof $toolParams)[K]['parameters']>
   } & Omit<ToolCallPart, 'type'>
 }[T]
 
+export type CodebuffToolResult<T extends ToolName = ToolName> = {
+  [K in ToolName]: {
+    toolName: K
+    output: z.infer<(typeof $toolParams)[K]['outputs']>
+  } & Omit<ToolResultPart, 'type'>
+}[T]
+
 // Tool call to send to client
 export type ClientToolName = (typeof clientToolNames)[number]
 const clientToolCallSchema = z.discriminatedUnion('toolName', [
   z.object({
     toolName: z.literal('browser_logs'),
-    input: llmToolCallSchema.browser_logs.parameters,
+    input: $toolParams.browser_logs.parameters,
   }),
   z.object({
     toolName: z.literal('code_search'),
-    input: llmToolCallSchema.code_search.parameters,
+    input: $toolParams.code_search.parameters,
   }),
   z.object({
     toolName: z.literal('create_plan'),
@@ -77,11 +87,11 @@ const clientToolCallSchema = z.discriminatedUnion('toolName', [
   }),
   z.object({
     toolName: z.literal('run_file_change_hooks'),
-    input: llmToolCallSchema.run_file_change_hooks.parameters,
+    input: $toolParams.run_file_change_hooks.parameters,
   }),
   z.object({
     toolName: z.literal('run_terminal_command'),
-    input: llmToolCallSchema.run_terminal_command.parameters.and(
+    input: $toolParams.run_terminal_command.parameters.and(
       z.object({ mode: z.enum(['assistant', 'user']) }),
     ),
   }),
diff --git a/common/src/tools/params/tool/add-message.ts b/common/src/tools/params/tool/add-message.ts
index abdddbdd5..93eae3e6d 100644
--- a/common/src/tools/params/tool/add-message.ts
+++ b/common/src/tools/params/tool/add-message.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'add_message'
 const endsAgentStep = true
@@ -15,4 +15,5 @@ export const addMessageParams = {
     .describe(
       `Add a new message to the conversation history. To be used for complex requests that can't be solved in a single step, as you may forget what happened!`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/add-subgoal.ts b/common/src/tools/params/tool/add-subgoal.ts
index a44537dbd..837966077 100644
--- a/common/src/tools/params/tool/add-subgoal.ts
+++ b/common/src/tools/params/tool/add-subgoal.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'add_subgoal'
 const endsAgentStep = false
@@ -33,4 +33,12 @@ export const addSubgoalParams = {
     .describe(
       `Add a new subgoal for tracking progress. To be used for complex requests that can't be solved in a single step, as you may forget what happened!`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z.object({
+        message: z.string(),
+      }),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/browser-logs.ts b/common/src/tools/params/tool/browser-logs.ts
index 8dbb5ff5d..c0e3f35f1 100644
--- a/common/src/tools/params/tool/browser-logs.ts
+++ b/common/src/tools/params/tool/browser-logs.ts
@@ -1,6 +1,8 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import { BrowserResponseSchema } from '../../../browser-actions'
+
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'browser_logs'
 const endsAgentStep = true
@@ -21,4 +23,10 @@ export const browserLogsParams = {
       .optional()
       .describe("When to consider navigation successful. Defaults to 'load'."),
   }),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: BrowserResponseSchema,
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/code-search.ts b/common/src/tools/params/tool/code-search.ts
index f8dd4f17a..9b1c93e57 100644
--- a/common/src/tools/params/tool/code-search.ts
+++ b/common/src/tools/params/tool/code-search.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'code_search'
 const endsAgentStep = true
@@ -29,4 +29,20 @@ export const codeSearchParams = {
     .describe(
       `Search for string patterns in the project's files. This tool uses ripgrep (rg), a fast line-oriented search tool. Use this tool only when read_files is not sufficient to find the files you need.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z.union([
+        z.object({
+          stdout: z.string(),
+          stderr: z.string().optional(),
+          exitCode: z.number().optional(),
+          message: z.string(),
+        }),
+        z.object({
+          errorMessage: z.string(),
+        }),
+      ]),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/create-plan.ts b/common/src/tools/params/tool/create-plan.ts
index ffacb50a1..eb05159be 100644
--- a/common/src/tools/params/tool/create-plan.ts
+++ b/common/src/tools/params/tool/create-plan.ts
@@ -1,6 +1,8 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import { updateFileResultSchema } from './str-replace'
+
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'create_plan'
 const endsAgentStep = false
@@ -21,4 +23,10 @@ export const createPlanParams = {
         .describe(`A detailed plan to solve the user's request.`),
     })
     .describe(`Generate a detailed markdown plan for complex tasks.`),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: updateFileResultSchema,
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/end-turn.ts b/common/src/tools/params/tool/end-turn.ts
index 167200778..fc13c3295 100644
--- a/common/src/tools/params/tool/end-turn.ts
+++ b/common/src/tools/params/tool/end-turn.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'end_turn'
 const endsAgentStep = true
@@ -12,4 +12,5 @@ export const endTurnParams = {
     .describe(
       `End your turn, regardless of any new tool results that might be coming. This will allow the user to type another prompt.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/find-files.ts b/common/src/tools/params/tool/find-files.ts
index 74412460a..f04fd2682 100644
--- a/common/src/tools/params/tool/find-files.ts
+++ b/common/src/tools/params/tool/find-files.ts
@@ -1,6 +1,8 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import { fileContentsSchema } from './read-files'
+
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'find_files'
 const endsAgentStep = true
@@ -19,4 +21,10 @@ export const findFilesParams = {
     .describe(
       `Find several files related to a brief natural language description of the files or the name of a function or class you are looking for.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: fileContentsSchema.array(),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/read-docs.ts b/common/src/tools/params/tool/read-docs.ts
index fff3c1bfd..0893025bd 100644
--- a/common/src/tools/params/tool/read-docs.ts
+++ b/common/src/tools/params/tool/read-docs.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'read_docs'
 const endsAgentStep = true
@@ -31,4 +31,12 @@ export const readDocsParams = {
     .describe(
       `Fetch up-to-date documentation for libraries and frameworks using Context7 API.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z.object({
+        documentation: z.string(),
+      }),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/read-files.ts b/common/src/tools/params/tool/read-files.ts
index 9ee0d733d..679875c3a 100644
--- a/common/src/tools/params/tool/read-files.ts
+++ b/common/src/tools/params/tool/read-files.ts
@@ -1,6 +1,12 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
+
+export const fileContentsSchema = z.object({
+  path: z.string(),
+  content: z.string(),
+  referencedBy: z.record(z.string(), z.string().array()).optional(),
+})
 
 const toolName = 'read_files'
 const endsAgentStep = true
@@ -23,4 +29,10 @@ export const readFilesParams = {
     .describe(
       `Read the multiple files from disk and return their contents. Use this tool to read as many files as would be helpful to answer the user's request.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: fileContentsSchema.array(),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/run-file-change-hooks.ts b/common/src/tools/params/tool/run-file-change-hooks.ts
index 19060bc68..9a76e970b 100644
--- a/common/src/tools/params/tool/run-file-change-hooks.ts
+++ b/common/src/tools/params/tool/run-file-change-hooks.ts
@@ -1,6 +1,8 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import { terminalCommandOutputSchema } from './run-terminal-command'
+
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'run_file_change_hooks'
 const endsAgentStep = true
@@ -14,4 +16,14 @@ export const runFileChangeHooksParams = {
         `List of file paths that were changed and should trigger file change hooks`,
       ),
   }),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: terminalCommandOutputSchema.and(
+        z.object({
+          hookName: z.string(),
+        }),
+      ),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/run-terminal-command.ts b/common/src/tools/params/tool/run-terminal-command.ts
index 1d6c89657..fbacc0591 100644
--- a/common/src/tools/params/tool/run-terminal-command.ts
+++ b/common/src/tools/params/tool/run-terminal-command.ts
@@ -1,6 +1,14 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
+
+export const terminalCommandOutputSchema = z.object({
+  command: z.string(),
+  startingCwd: z.string().optional(),
+  message: z.string(),
+  stdout: z.string(),
+  exitCode: z.number().optional(),
+})
 
 const toolName = 'run_terminal_command'
 const endsAgentStep = true
@@ -38,4 +46,10 @@ export const runTerminalCommandParams = {
     .describe(
       `Execute a CLI command from the **project root** (different from the user's cwd).`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: terminalCommandOutputSchema,
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/set-messages.ts b/common/src/tools/params/tool/set-messages.ts
index 8b5c19c49..c7f461b08 100644
--- a/common/src/tools/params/tool/set-messages.ts
+++ b/common/src/tools/params/tool/set-messages.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'set_messages'
 const endsAgentStep = true
@@ -16,15 +16,15 @@ export const setMessagesParams = {
             role: z.enum(['user', 'assistant']),
             content: z.string().or(
               z.array(
-                z
-                  .object({
-                    type: z.enum(['text']),
-                    text: z.string(),
-                  })
+                z.object({
+                  type: z.enum(['text']),
+                  text: z.string(),
+                }),
               ),
             ),
           }),
       ),
     })
     .describe(`Set the conversation history to the provided messages.`),
-} satisfies ToolParams
+  outputs: z.tuple([]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/set-output.ts b/common/src/tools/params/tool/set-output.ts
index 5a9c317c0..ff76844d7 100644
--- a/common/src/tools/params/tool/set-output.ts
+++ b/common/src/tools/params/tool/set-output.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'set_output'
 const endsAgentStep = false
@@ -12,4 +12,12 @@ export const setOutputParams = {
     .describe(
       'JSON object to set as the agent output. This completely replaces any previous output. If the agent was spawned, this value will be passed back to its parent. If the agent has an outputSchema defined, the output will be validated against it.',
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z.object({
+        message: z.string(),
+      }),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/spawn-agent-inline.ts b/common/src/tools/params/tool/spawn-agent-inline.ts
index 75e7ea469..4a22dc0b5 100644
--- a/common/src/tools/params/tool/spawn-agent-inline.ts
+++ b/common/src/tools/params/tool/spawn-agent-inline.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'spawn_agent_inline'
 const endsAgentStep = true
@@ -19,4 +19,5 @@ export const spawnAgentInlineParams = {
     .describe(
       `Spawn a single agent that runs within the current message history.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/spawn-agents-async.ts b/common/src/tools/params/tool/spawn-agents-async.ts
index 355e6b815..c2c7feaa7 100644
--- a/common/src/tools/params/tool/spawn-agents-async.ts
+++ b/common/src/tools/params/tool/spawn-agents-async.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'spawn_agents_async'
 const endsAgentStep = false
@@ -19,4 +19,23 @@ export const spawnAgentsAsyncParams = {
       })
       .array(),
   }),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z
+        .discriminatedUnion('success', [
+          z.object({
+            agentType: z.string(),
+            success: z.literal(true),
+            agentId: z.string(),
+          }),
+          z.object({
+            agentType: z.string(),
+            success: z.literal(false),
+            errorMessage: z.string(),
+          }),
+        ])
+        .array(),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/spawn-agents.ts b/common/src/tools/params/tool/spawn-agents.ts
index c96321d00..f2f1ee334 100644
--- a/common/src/tools/params/tool/spawn-agents.ts
+++ b/common/src/tools/params/tool/spawn-agents.ts
@@ -1,6 +1,7 @@
+import { jsonObjectSchema } from 'src/types/json'
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'spawn_agents'
 const endsAgentStep = true
@@ -23,4 +24,15 @@ export const spawnAgentsParams = {
     .describe(
       `Spawn multiple agents and send a prompt and/or parameters to each of them. These agents will run in parallel. Note that that means they will run independently. If you need to run agents sequentially, use spawn_agents with one agent at a time instead.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z
+        .object({
+          agent: z.string(),
+        })
+        .and(jsonObjectSchema)
+        .array(),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/str-replace.ts b/common/src/tools/params/tool/str-replace.ts
index 7da125ddf..913485269 100644
--- a/common/src/tools/params/tool/str-replace.ts
+++ b/common/src/tools/params/tool/str-replace.ts
@@ -1,6 +1,17 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
+
+export const updateFileResultSchema = z.union([
+  z.object({
+    file: z.string(),
+    message: z.string(),
+    unifiedDiff: z.string(),
+  }),
+  z.object({
+    errorMessage: z.string(),
+  }),
+])
 
 const toolName = 'str_replace'
 const endsAgentStep = false
@@ -42,4 +53,10 @@ export const strReplaceParams = {
         .describe('Array of replacements to make.'),
     })
     .describe(`Replace strings in a file with new strings.`),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: updateFileResultSchema,
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/think-deeply.ts b/common/src/tools/params/tool/think-deeply.ts
index 80e6b5286..275dfe05d 100644
--- a/common/src/tools/params/tool/think-deeply.ts
+++ b/common/src/tools/params/tool/think-deeply.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'think_deeply'
 const endsAgentStep = false
@@ -19,4 +19,5 @@ export const thinkDeeplyParams = {
     .describe(
       `Deeply consider complex tasks by brainstorming approaches and tradeoffs step-by-step.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/update-subgoal.ts b/common/src/tools/params/tool/update-subgoal.ts
index 877b774db..dc19235ad 100644
--- a/common/src/tools/params/tool/update-subgoal.ts
+++ b/common/src/tools/params/tool/update-subgoal.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'update_subgoal'
 const endsAgentStep = false
@@ -28,4 +28,12 @@ export const updateSubgoalParams = {
     .describe(
       `Update a subgoal in the context given the id, and optionally the status or plan, or a new log to append. Feel free to update any combination of the status, plan, or log in one invocation.`,
     ),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z.object({
+        message: z.string(),
+      }),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/web-search.ts b/common/src/tools/params/tool/web-search.ts
index 37754dcd2..65f8e787c 100644
--- a/common/src/tools/params/tool/web-search.ts
+++ b/common/src/tools/params/tool/web-search.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'web_search'
 const endsAgentStep = true
@@ -22,4 +22,17 @@ export const webSearchParams = {
         ),
     })
     .describe(`Search the web for current information using Linkup API.`),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: z.union([
+        z.object({
+          result: z.string(),
+        }),
+        z.object({
+          errorMessage: z.string(),
+        }),
+      ]),
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/params/tool/write-file.ts b/common/src/tools/params/tool/write-file.ts
index d69d5e413..4afd64fff 100644
--- a/common/src/tools/params/tool/write-file.ts
+++ b/common/src/tools/params/tool/write-file.ts
@@ -1,6 +1,8 @@
 import z from 'zod/v4'
 
-import type { ToolParams } from '../../constants'
+import { updateFileResultSchema } from './str-replace'
+
+import type { $ToolParams } from '../../constants'
 
 const toolName = 'write_file'
 const endsAgentStep = false
@@ -19,4 +21,10 @@ export const writeFileParams = {
       content: z.string().describe(`Edit snippet to apply to the file.`),
     })
     .describe(`Create or edit a file with the given content.`),
-} satisfies ToolParams
+  outputs: z.tuple([
+    z.object({
+      type: z.literal('json'),
+      value: updateFileResultSchema,
+    }),
+  ]),
+} satisfies $ToolParams
diff --git a/common/src/tools/utils.ts b/common/src/tools/utils.ts
index bba2395a1..e787592c2 100644
--- a/common/src/tools/utils.ts
+++ b/common/src/tools/utils.ts
@@ -4,23 +4,21 @@ import {
   startToolTag,
   toolNameParam,
 } from './constants'
-import { llmToolCallSchema } from './list'
-import { closeXml } from '../util/xml'
+import { $toolParams } from './list'
 
-import type { StringToolResultPart, ToolName } from './constants'
+import type { ToolName } from './constants'
 import type z from 'zod/v4'
 
 export function getToolCallString<T extends ToolName | (string & {})>(
   toolName: T,
   params: T extends ToolName
-    ? z.input<(typeof llmToolCallSchema)[T]['parameters']>
+    ? z.input<(typeof $toolParams)[T]['parameters']>
     : Record<string, any>,
   ...endsAgentStep: T extends ToolName ? [] : [boolean]
 ): string {
   const endsAgentStepValue =
-    toolName in llmToolCallSchema
-      ? llmToolCallSchema[toolName as keyof typeof llmToolCallSchema]
-          .endsAgentStep
+    toolName in $toolParams
+      ? $toolParams[toolName as keyof typeof $toolParams].endsAgentStep
       : endsAgentStep[0] ?? false
   const obj: Record<string, any> = {
     [toolNameParam]: toolName,
@@ -31,20 +29,3 @@ export function getToolCallString<T extends ToolName | (string & {})>(
   }
   return [startToolTag, JSON.stringify(obj, null, 2), endToolTag].join('')
 }
-
-export function renderToolResults(toolResults: StringToolResultPart[]): string {
-  if (toolResults.length === 0) {
-    return ''
-  }
-
-  return `
-${toolResults
-  .map(
-    (result) => `<tool_result>
-<tool>${result.toolName}${closeXml('tool')}
-<result>${result.output.value}${closeXml('result')}
-${closeXml('tool_result')}`,
-  )
-  .join('\n\n')}
-`.trim()
-}

From 7cc8f64cb0bd1bbf99d05a3a14abf568180c9f57 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Wed, 27 Aug 2025 15:19:45 -0700
Subject: [PATCH 03/18] add types to tool handlers

---
 backend/src/tools/definitions/list.ts         |   6 +-
 .../tools/handlers/handler-function-type.ts   |   8 +-
 .../src/tools/handlers/tool/add-message.ts    |  15 +-
 .../src/tools/handlers/tool/add-subgoal.ts    |  19 +-
 .../src/tools/handlers/tool/browser-logs.ts   |  12 +-
 .../src/tools/handlers/tool/code-search.ts    |  12 +-
 .../src/tools/handlers/tool/create-plan.ts    |  10 +-
 backend/src/tools/handlers/tool/end-turn.ts   |  19 +-
 backend/src/tools/handlers/tool/find-files.ts |  44 ++++-
 backend/src/tools/handlers/tool/read-docs.ts  |  16 +-
 backend/src/tools/handlers/tool/read-files.ts |  19 +-
 .../handlers/tool/run-file-change-hooks.ts    |  17 +-
 .../handlers/tool/run-terminal-command.ts     |  29 +--
 .../src/tools/handlers/tool/set-messages.ts   |  14 +-
 backend/src/tools/handlers/tool/set-output.ts |  24 ++-
 .../tools/handlers/tool/spawn-agent-inline.ts |  18 +-
 .../tools/handlers/tool/spawn-agent-utils.ts  |  76 +++++---
 .../tools/handlers/tool/spawn-agents-async.ts |  49 +++--
 .../src/tools/handlers/tool/spawn-agents.ts   |  30 ++-
 .../src/tools/handlers/tool/str-replace.ts    |   5 +-
 .../src/tools/handlers/tool/think-deeply.ts   |   9 +-
 .../src/tools/handlers/tool/update-subgoal.ts |  24 ++-
 backend/src/tools/handlers/tool/web-search.ts | 171 ++++++++++--------
 backend/src/tools/handlers/tool/write-file.ts |  89 +++++----
 common/src/tools/list.ts                      |   5 +-
 common/src/tools/params/tool/find-files.ts    |   7 +-
 common/src/tools/params/tool/str-replace.ts   |   1 +
 27 files changed, 484 insertions(+), 264 deletions(-)

diff --git a/backend/src/tools/definitions/list.ts b/backend/src/tools/definitions/list.ts
index 33cc4f53c..b4f21d93b 100644
--- a/backend/src/tools/definitions/list.ts
+++ b/backend/src/tools/definitions/list.ts
@@ -1,4 +1,4 @@
-import { llmToolCallSchema } from '@codebuff/common/tools/list'
+import { $toolParams } from '@codebuff/common/tools/list'
 
 import { addMessageTool } from './tool/add-message'
 import { addSubgoalTool } from './tool/add-subgoal'
@@ -53,7 +53,7 @@ const toolDescriptions = {
 }
 
 export type ToolDefinition<T extends ToolName = ToolName> = {
-  [K in ToolName]: (typeof toolDescriptions)[K] & (typeof llmToolCallSchema)[K]
+  [K in ToolName]: (typeof toolDescriptions)[K] & (typeof $toolParams)[K]
 }[T]
 
 export const codebuffToolDefs = Object.fromEntries(
@@ -61,7 +61,7 @@ export const codebuffToolDefs = Object.fromEntries(
     toolName,
     {
       ...toolDescriptions[toolName as ToolName],
-      ...llmToolCallSchema[toolName as ToolName],
+      ...$toolParams[toolName as ToolName],
     } satisfies ToolDefinition,
   ]),
 ) as { [K in ToolName]: ToolDefinition<K> } satisfies ToolSet
diff --git a/backend/src/tools/handlers/handler-function-type.ts b/backend/src/tools/handlers/handler-function-type.ts
index 7ce02ce53..ebcc6d824 100644
--- a/backend/src/tools/handlers/handler-function-type.ts
+++ b/backend/src/tools/handlers/handler-function-type.ts
@@ -3,8 +3,10 @@ import type {
   ClientToolCall,
   ClientToolName,
   CodebuffToolCall,
+  CodebuffToolOutput,
+  CodebuffToolResult,
 } from '@codebuff/common/tools/list'
-import { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 
 type PresentOrAbsent<K extends PropertyKey, V> =
@@ -31,9 +33,9 @@ export type CodebuffToolHandlerFunction<T extends ToolName = ToolName> = (
     'requestClientToolCall',
     (
       toolCall: ClientToolCall<T extends ClientToolName ? T : never>,
-    ) => Promise<string>
+    ) => Promise<CodebuffToolOutput<T extends ClientToolName ? T : never>>
   >,
 ) => {
-  result: Promise<string | undefined>
+  result: Promise<CodebuffToolResult<T>['output']>
   state?: Record<string, any>
 }
diff --git a/backend/src/tools/handlers/tool/add-message.ts b/backend/src/tools/handlers/tool/add-message.ts
index fa1072b79..f30514484 100644
--- a/backend/src/tools/handlers/tool/add-message.ts
+++ b/backend/src/tools/handlers/tool/add-message.ts
@@ -1,5 +1,8 @@
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 
 export const handleAddMessage = (({
@@ -11,14 +14,16 @@ export const handleAddMessage = (({
   toolCall: CodebuffToolCall<'add_message'>
   getLatestState: () => { messages: CodebuffMessage[] }
 }): {
-  result: Promise<undefined>
+  result: Promise<CodebuffToolOutput<'add_message'>>
   state: {}
 } => {
   return {
-    result: previousToolCallFinished.then(() => {
+    result: (async () => {
+      await previousToolCallFinished
+
       getLatestState().messages.push(toolCall.input)
-      return undefined
-    }),
+      return []
+    })(),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'add_message'>
diff --git a/backend/src/tools/handlers/tool/add-subgoal.ts b/backend/src/tools/handlers/tool/add-subgoal.ts
index 3c9c60b33..fec5d87cc 100644
--- a/backend/src/tools/handlers/tool/add-subgoal.ts
+++ b/backend/src/tools/handlers/tool/add-subgoal.ts
@@ -1,7 +1,10 @@
 import { buildArray } from '@codebuff/common/util/array'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { Subgoal } from '@codebuff/common/types/session-state'
 
 export const handleAddSubgoal = ((params: {
@@ -9,7 +12,7 @@ export const handleAddSubgoal = ((params: {
   toolCall: CodebuffToolCall<'add_subgoal'>
   state: { agentContext?: Record<string, Subgoal> }
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<'add_subgoal'>>
   state: { agentContext: Record<string, Subgoal> }
 } => {
   const { previousToolCallFinished, toolCall, state } = params
@@ -23,7 +26,17 @@ export const handleAddSubgoal = ((params: {
   }
 
   return {
-    result: previousToolCallFinished.then(() => 'Successfully added subgoal'),
+    result: (async () => {
+      await previousToolCallFinished
+      return [
+        {
+          type: 'json',
+          value: {
+            message: 'Successfully added subgoal',
+          },
+        },
+      ]
+    })(),
     state: { agentContext },
   }
 }) satisfies CodebuffToolHandlerFunction<'add_subgoal'>
diff --git a/backend/src/tools/handlers/tool/browser-logs.ts b/backend/src/tools/handlers/tool/browser-logs.ts
index 7d1234dc3..dc2f460d4 100644
--- a/backend/src/tools/handlers/tool/browser-logs.ts
+++ b/backend/src/tools/handlers/tool/browser-logs.ts
@@ -2,6 +2,7 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 
 export const handleBrowserLogs = ((params: {
@@ -9,14 +10,15 @@ export const handleBrowserLogs = ((params: {
   toolCall: CodebuffToolCall<'browser_logs'>
   requestClientToolCall: (
     toolCall: ClientToolCall<'browser_logs'>,
-  ) => Promise<string>
-}): { result: Promise<string>; state: {} } => {
+  ) => Promise<CodebuffToolOutput<'browser_logs'>>
+}): { result: Promise<CodebuffToolOutput<'browser_logs'>>; state: {} } => {
   const { previousToolCallFinished, toolCall, requestClientToolCall } = params
 
   return {
-    result: previousToolCallFinished.then(() =>
-      requestClientToolCall(toolCall),
-    ),
+    result: (async () => {
+      await previousToolCallFinished
+      return await requestClientToolCall(toolCall)
+    })(),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'browser_logs'>
diff --git a/backend/src/tools/handlers/tool/code-search.ts b/backend/src/tools/handlers/tool/code-search.ts
index 3221985b7..fb05802cf 100644
--- a/backend/src/tools/handlers/tool/code-search.ts
+++ b/backend/src/tools/handlers/tool/code-search.ts
@@ -2,6 +2,7 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 
 export const handleCodeSearch = ((params: {
@@ -9,14 +10,15 @@ export const handleCodeSearch = ((params: {
   toolCall: CodebuffToolCall<'code_search'>
   requestClientToolCall: (
     toolCall: ClientToolCall<'code_search'>,
-  ) => Promise<string>
-}): { result: Promise<string>; state: {} } => {
+  ) => Promise<CodebuffToolOutput<'code_search'>>
+}): { result: Promise<CodebuffToolOutput<'code_search'>>; state: {} } => {
   const { previousToolCallFinished, toolCall, requestClientToolCall } = params
 
   return {
-    result: previousToolCallFinished.then(() =>
-      requestClientToolCall(toolCall),
-    ),
+    result: (async () => {
+      await previousToolCallFinished
+      return await requestClientToolCall(toolCall)
+    })(),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'code_search'>
diff --git a/backend/src/tools/handlers/tool/create-plan.ts b/backend/src/tools/handlers/tool/create-plan.ts
index d242b5a7c..9363c6f80 100644
--- a/backend/src/tools/handlers/tool/create-plan.ts
+++ b/backend/src/tools/handlers/tool/create-plan.ts
@@ -12,6 +12,7 @@ import type {
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 
 export const handleCreatePlan = ((params: {
@@ -19,7 +20,7 @@ export const handleCreatePlan = ((params: {
   toolCall: CodebuffToolCall<'create_plan'>
   requestClientToolCall: (
     toolCall: ClientToolCall<'create_plan'>,
-  ) => Promise<string>
+  ) => Promise<CodebuffToolOutput<'create_plan'>>
   writeToClient: (chunk: string) => void
 
   getLatestState: () => FileProcessingState
@@ -32,7 +33,7 @@ export const handleCreatePlan = ((params: {
     repoId?: string
   } & OptionalFileProcessingState
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<'create_plan'>>
   state: FileProcessingState
 } => {
   const {
@@ -86,14 +87,15 @@ export const handleCreatePlan = ((params: {
   fileProcessingState.allPromises.push(Promise.resolve(change))
 
   return {
-    result: previousToolCallFinished.then(async () => {
+    result: (async () => {
+      await previousToolCallFinished
       return await postStreamProcessing<'create_plan'>(
         change,
         getLatestState(),
         writeToClient,
         requestClientToolCall,
       )
-    }),
+    })(),
     state: fileProcessingState,
   }
 }) satisfies CodebuffToolHandlerFunction<'create_plan'>
diff --git a/backend/src/tools/handlers/tool/end-turn.ts b/backend/src/tools/handlers/tool/end-turn.ts
index 80be2fe4e..a04dbceab 100644
--- a/backend/src/tools/handlers/tool/end-turn.ts
+++ b/backend/src/tools/handlers/tool/end-turn.ts
@@ -1,9 +1,20 @@
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 
-export const handleEndTurn = ((params: {
+export const handleEndTurn = (({
+  previousToolCallFinished,
+}: {
   previousToolCallFinished: Promise<any>
   toolCall: CodebuffToolCall<'end_turn'>
-}): { result: Promise<string>; state: {} } => {
-  return { result: params.previousToolCallFinished.then(() => ''), state: {} }
+}): { result: Promise<CodebuffToolOutput<'end_turn'>>; state: {} } => {
+  return {
+    result: (async () => {
+      await previousToolCallFinished
+      return []
+    })(),
+    state: {},
+  }
 }) satisfies CodebuffToolHandlerFunction<'end_turn'>
diff --git a/backend/src/tools/handlers/tool/find-files.ts b/backend/src/tools/handlers/tool/find-files.ts
index a4fb94b8f..554544ba9 100644
--- a/backend/src/tools/handlers/tool/find-files.ts
+++ b/backend/src/tools/handlers/tool/find-files.ts
@@ -14,7 +14,10 @@ import { requestFiles } from '../../../websockets/websocket-action'
 import type { TextBlock } from '../../../llm-apis/claude'
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type { GetExpandedFileContextForTrainingBlobTrace } from '@codebuff/bigquery'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
@@ -39,7 +42,7 @@ export const handleFindFiles = ((params: {
     repoId?: string
     messages?: CodebuffMessage[]
   }
-}): { result: Promise<string>; state: {} } => {
+}): { result: Promise<CodebuffToolOutput<'find_files'>>; state: {} } => {
   const {
     previousToolCallFinished,
     toolCall,
@@ -73,7 +76,9 @@ export const handleFindFiles = ((params: {
     userId,
   })
 
-  const triggerFindFiles = async () => {
+  const triggerFindFiles: () => Promise<
+    CodebuffToolOutput<'find_files'>
+  > = async () => {
     const requestedFiles = await requestRelevantFiles(
       { messages, system },
       fileContext,
@@ -119,16 +124,41 @@ export const handleFindFiles = ((params: {
       }
 
       if (addedFiles.length > 0) {
-        return renderReadFilesResult(addedFiles, fileContext.tokenCallers ?? {})
+        return [
+          {
+            type: 'json',
+            value: renderReadFilesResult(
+              addedFiles,
+              fileContext.tokenCallers ?? {},
+            ),
+          },
+        ]
       }
-      return `No new relevant files found for prompt: ${prompt}`
+      return [
+        {
+          type: 'json',
+          value: {
+            message: `No new relevant files found for prompt: ${prompt}`,
+          },
+        },
+      ]
     } else {
-      return `No relevant files found for prompt: ${prompt}`
+      return [
+        {
+          type: 'json',
+          value: {
+            message: `No relevant files found for prompt: ${prompt}`,
+          },
+        },
+      ]
     }
   }
 
   return {
-    result: previousToolCallFinished.then(triggerFindFiles),
+    result: (async () => {
+      await previousToolCallFinished
+      return await triggerFindFiles()
+    })(),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'find_files'>
diff --git a/backend/src/tools/handlers/tool/read-docs.ts b/backend/src/tools/handlers/tool/read-docs.ts
index e27f34ab8..cf2b9eae2 100644
--- a/backend/src/tools/handlers/tool/read-docs.ts
+++ b/backend/src/tools/handlers/tool/read-docs.ts
@@ -2,7 +2,10 @@ import { fetchContext7LibraryDocumentation } from '../../../llm-apis/context7-ap
 import { logger } from '../../../util/logger'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 
 export const handleReadDocs = (({
   previousToolCallFinished,
@@ -25,7 +28,7 @@ export const handleReadDocs = (({
     repoId?: string
   }
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<'read_docs'>>
   state: {}
 } => {
   const { libraryTitle, topic, max_tokens } = toolCall.input
@@ -121,7 +124,14 @@ export const handleReadDocs = (({
   return {
     result: (async () => {
       await previousToolCallFinished
-      return await documentationPromise
+      return [
+        {
+          type: 'json',
+          value: {
+            documentation: await documentationPromise,
+          },
+        },
+      ]
     })(),
     state: {},
   }
diff --git a/backend/src/tools/handlers/tool/read-files.ts b/backend/src/tools/handlers/tool/read-files.ts
index 89bd8666a..b2e681897 100644
--- a/backend/src/tools/handlers/tool/read-files.ts
+++ b/backend/src/tools/handlers/tool/read-files.ts
@@ -2,14 +2,18 @@ import { getFileReadingUpdates } from '../../../get-file-reading-updates'
 import { renderReadFilesResult } from '../../../util/parse-tool-call-xml'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
 
+type ToolName = 'read_files'
 export const handleReadFiles = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'read_files'>
+  toolCall: CodebuffToolCall<ToolName>
 
   agentStepId: string
   clientSessionId: string
@@ -24,7 +28,7 @@ export const handleReadFiles = ((params: {
     messages?: CodebuffMessage[]
   }
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<ToolName>>
   state: {}
 } => {
   const {
@@ -77,8 +81,13 @@ export const handleReadFiles = ((params: {
   return {
     result: (async () => {
       await previousToolCallFinished
-      return await readFilesResultsPromise
+      return [
+        {
+          type: 'json',
+          value: await readFilesResultsPromise,
+        },
+      ]
     })(),
     state: {},
   }
-}) satisfies CodebuffToolHandlerFunction<'read_files'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/run-file-change-hooks.ts b/backend/src/tools/handlers/tool/run-file-change-hooks.ts
index 28e00bb3b..e0a20e895 100644
--- a/backend/src/tools/handlers/tool/run-file-change-hooks.ts
+++ b/backend/src/tools/handlers/tool/run-file-change-hooks.ts
@@ -2,21 +2,24 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 
+type ToolName = 'run_file_change_hooks'
 export const handleRunFileChangeHooks = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'run_file_change_hooks'>
+  toolCall: CodebuffToolCall<ToolName>
   requestClientToolCall: (
-    toolCall: ClientToolCall<'run_file_change_hooks'>,
-  ) => Promise<string>
-}): { result: Promise<string>; state: {} } => {
+    toolCall: ClientToolCall<ToolName>,
+  ) => Promise<CodebuffToolOutput<ToolName>>
+}): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
   const { previousToolCallFinished, toolCall, requestClientToolCall } = params
 
   return {
-    result: previousToolCallFinished.then(() =>
-      requestClientToolCall(toolCall),
-    ),
+    result: (async () => {
+      await previousToolCallFinished
+      return await requestClientToolCall(toolCall)
+    })(),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'run_file_change_hooks'>
diff --git a/backend/src/tools/handlers/tool/run-terminal-command.ts b/backend/src/tools/handlers/tool/run-terminal-command.ts
index 4d094539f..6cc2e1ada 100644
--- a/backend/src/tools/handlers/tool/run-terminal-command.ts
+++ b/backend/src/tools/handlers/tool/run-terminal-command.ts
@@ -2,18 +2,22 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 
-export const handleRunTerminalCommand = ((params: {
+type ToolName = 'run_terminal_command'
+export const handleRunTerminalCommand = (({
+  previousToolCallFinished,
+  toolCall,
+  requestClientToolCall,
+}: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'run_terminal_command'>
+  toolCall: CodebuffToolCall<ToolName>
   requestClientToolCall: (
-    toolCall: ClientToolCall<'run_terminal_command'>,
-  ) => Promise<string>
-}): { result: Promise<string>; state: {} } => {
-  const { previousToolCallFinished, toolCall, requestClientToolCall } = params
-
-  const clientToolCall: ClientToolCall<'run_terminal_command'> = {
+    toolCall: ClientToolCall<ToolName>,
+  ) => Promise<CodebuffToolOutput<ToolName>>
+}): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
+  const clientToolCall: ClientToolCall<ToolName> = {
     toolName: 'run_terminal_command',
     toolCallId: toolCall.toolCallId,
     input: {
@@ -25,9 +29,10 @@ export const handleRunTerminalCommand = ((params: {
     },
   }
   return {
-    result: previousToolCallFinished.then(() =>
-      requestClientToolCall(clientToolCall),
-    ),
+    result: (async () => {
+      await previousToolCallFinished
+      return await requestClientToolCall(clientToolCall)
+    })(),
     state: {},
   }
-}) satisfies CodebuffToolHandlerFunction<'run_terminal_command'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/set-messages.ts b/backend/src/tools/handlers/tool/set-messages.ts
index 8b84fea07..d6e1eeec8 100644
--- a/backend/src/tools/handlers/tool/set-messages.ts
+++ b/backend/src/tools/handlers/tool/set-messages.ts
@@ -1,5 +1,8 @@
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 
 export const handleSetMessages = (({
@@ -11,14 +14,15 @@ export const handleSetMessages = (({
   toolCall: CodebuffToolCall<'set_messages'>
   getLatestState: () => { messages: CodebuffMessage[] }
 }): {
-  result: Promise<undefined>
+  result: Promise<CodebuffToolOutput<'set_messages'>>
   state: {}
 } => {
   return {
-    result: previousToolCallFinished.then(() => {
+    result: (async () => {
+      await previousToolCallFinished
       getLatestState().messages = toolCall.input.messages
-      return undefined
-    }),
+      return []
+    })(),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'set_messages'>
diff --git a/backend/src/tools/handlers/tool/set-output.ts b/backend/src/tools/handlers/tool/set-output.ts
index 45366e594..1b3331b78 100644
--- a/backend/src/tools/handlers/tool/set-output.ts
+++ b/backend/src/tools/handlers/tool/set-output.ts
@@ -2,21 +2,25 @@ import { getAgentTemplate } from '../../../templates/agent-registry'
 import { logger } from '../../../util/logger'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
 import type { AgentState } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 
+type ToolName = 'set_output'
 export const handleSetOutput = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'set_output'>
+  toolCall: CodebuffToolCall<ToolName>
   fileContext: ProjectFileContext
   state: {
     agentState?: AgentState
     localAgentTemplates?: Record<string, AgentTemplate>
   }
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<ToolName>>
   state: { agentState: AgentState }
 } => {
   const { previousToolCallFinished, toolCall, state } = params
@@ -69,7 +73,17 @@ export const handleSetOutput = ((params: {
   }
 
   return {
-    result: previousToolCallFinished.then(triggerSetOutput),
+    result: (async () => {
+      await previousToolCallFinished
+      return [
+        {
+          type: 'json',
+          value: {
+            message: await triggerSetOutput(),
+          },
+        },
+      ]
+    })(),
     state: { agentState: agentState },
   }
-}) satisfies CodebuffToolHandlerFunction<'set_output'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/spawn-agent-inline.ts b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
index 77acd59d8..2a4cc7ac4 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-inline.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
@@ -10,7 +10,10 @@ import {
 } from './spawn-agent-utils'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
@@ -18,9 +21,10 @@ import type { AgentState } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
 
+type ToolName = 'spawn_agent_inline'
 export const handleSpawnAgentInline = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'spawn_agent_inline'>
+  toolCall: CodebuffToolCall<ToolName>
   fileContext: ProjectFileContext
   clientSessionId: string
   userInputId: string
@@ -36,7 +40,7 @@ export const handleSpawnAgentInline = ((params: {
     messages?: CodebuffMessage[]
     agentState?: AgentState
   }
-}): { result: Promise<undefined>; state: {} } => {
+}): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
   const {
     previousToolCallFinished,
     toolCall,
@@ -127,7 +131,11 @@ export const handleSpawnAgentInline = ((params: {
   }
 
   return {
-    result: previousToolCallFinished.then(triggerSpawnAgentInline),
+    result: (async () => {
+      await previousToolCallFinished
+      await triggerSpawnAgentInline()
+      return []
+    })(),
     state: {},
   }
-}) satisfies CodebuffToolHandlerFunction<'spawn_agent_inline'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
index ecb3ba510..615b9fd5c 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -6,7 +6,10 @@ import { getAgentTemplate } from '../../../templates/agent-registry'
 import { logger } from '../../../util/logger'
 
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+  AssistantCodebuffMessage,
+  CodebuffMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   AgentState,
@@ -371,42 +374,61 @@ export async function formatAgentResult(
   result: { agentState: AgentState },
   agentTemplate: AgentTemplate,
   agentTypeStr: string,
-): Promise<string> {
-  const agentName = agentTemplate.displayName
-  let report = ''
+): Promise<
+  {
+    agentType: string
+    agentName: string
+  } & (
+    | { errorMessage: string }
+    | { structuredOutput: Record<string, any> | undefined }
+    | {
+        lastMessage: AssistantCodebuffMessage['content']
+      }
+    | {
+        allMessages: CodebuffMessage[]
+      }
+  )
+> {
+  const agentInfo = {
+    agentType: agentTemplate.id,
+    agentName: agentTemplate.displayName,
+  }
 
   if (agentTemplate.outputMode === 'structured_output') {
-    report = JSON.stringify(result.agentState.output, null, 2)
-  } else if (agentTemplate.outputMode === 'last_message') {
+    return {
+      ...agentInfo,
+      structuredOutput: result.agentState.output,
+    }
+  }
+  if (agentTemplate.outputMode === 'last_message') {
     const { agentState } = result
     const assistantMessages = agentState.messageHistory.filter(
-      (message) => message.role === 'assistant',
+      (message): message is AssistantCodebuffMessage =>
+        message.role === 'assistant',
     )
     const lastAssistantMessage = assistantMessages[assistantMessages.length - 1]
     if (!lastAssistantMessage) {
-      report = 'No response from agent'
-    } else if (typeof lastAssistantMessage.content === 'string') {
-      report = lastAssistantMessage.content
-    } else {
-      report = JSON.stringify(lastAssistantMessage.content, null, 2)
+      return {
+        ...agentInfo,
+        errorMessage: 'No response from agent',
+      }
+    }
+    return {
+      ...agentInfo,
+      lastMessage: lastAssistantMessage.content,
     }
-  } else if (agentTemplate.outputMode === 'all_messages') {
+  }
+  if (agentTemplate.outputMode === 'all_messages') {
     const { agentState } = result
     // Remove the first message, which includes the previous conversation history.
     const agentMessages = agentState.messageHistory.slice(1)
-    report = `Agent messages:\n\n${JSON.stringify(agentMessages, null, 2)}`
-  } else {
-    throw new Error(
-      `Unknown output mode: ${'outputMode' in agentTemplate ? agentTemplate.outputMode : 'undefined'}`,
-    )
+    return {
+      ...agentInfo,
+      allMessages: agentMessages,
+    }
   }
-
-  return `**${agentName}(${agentTypeStr}):**\n${report}`
-}
-
-/**
- * Formats error result for failed agent spawn
- */
-export function formatAgentError(agentTypeStr: string, error: any): string {
-  return `**Agent (${agentTypeStr}):**\nError spawning agent: ${error}`
+  agentTemplate.outputMode satisfies never
+  throw new Error(
+    `Unknown output mode: ${'outputMode' in agentTemplate ? agentTemplate.outputMode : 'undefined'}`,
+  )
 }
diff --git a/backend/src/tools/handlers/tool/spawn-agents-async.ts b/backend/src/tools/handlers/tool/spawn-agents-async.ts
index 95ed17c5e..b06d73775 100644
--- a/backend/src/tools/handlers/tool/spawn-agents-async.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents-async.ts
@@ -15,7 +15,10 @@ import { logger } from '../../../util/logger'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type { SendSubagentChunk } from './spawn-agents'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
@@ -23,9 +26,10 @@ import type { AgentState } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
 
+type ToolName = 'spawn_agents_async'
 export const handleSpawnAgentsAsync = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'spawn_agents_async'>
+  toolCall: CodebuffToolCall<ToolName>
 
   fileContext: ProjectFileContext
   clientSessionId: string
@@ -43,7 +47,7 @@ export const handleSpawnAgentsAsync = ((params: {
     messages?: CodebuffMessage[]
     agentState?: AgentState
   }
-}): { result: Promise<string>; state: {} } => {
+}): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
   if (!ASYNC_AGENTS_ENABLED) {
     return handleSpawnAgents({
       ...params,
@@ -81,13 +85,10 @@ export const handleSpawnAgentsAsync = ((params: {
     )
   }
 
-  const triggerSpawnAgentsAsync = async () => {
-    const results: Array<{
-      agentType: string
-      success: boolean
-      agentId?: string
-      error?: string
-    }> = []
+  const triggerSpawnAgentsAsync: () => Promise<
+    CodebuffToolOutput<ToolName>[0]['value']
+  > = async () => {
+    const results: CodebuffToolOutput<ToolName>[0]['value'] = []
 
     const conversationHistoryMessage = createConversationHistoryMessage(
       getLatestState().messages,
@@ -257,7 +258,7 @@ export const handleSpawnAgentsAsync = ((params: {
         results.push({
           agentType: agentTypeStr,
           success: false,
-          error: errorMessage,
+          errorMessage,
         })
         logger.error(
           { agentType: agentTypeStr, error },
@@ -267,23 +268,19 @@ export const handleSpawnAgentsAsync = ((params: {
       }
     }
 
-    const successful = results.filter((r) => r.success)
-
-    let result = `Agent spawn results (${successful.length}/${results.length} successful):\n`
-
-    results.forEach(({ agentType, success, agentId, error }) => {
-      if (success) {
-        result += `✓ ${agentType}: spawned (${agentId})\n`
-      } else {
-        result += `✗ ${agentType}: failed - ${error}\n`
-      }
-    })
-
-    return result.trim()
+    return results
   }
 
   return {
-    result: previousToolCallFinished.then(triggerSpawnAgentsAsync),
+    result: (async () => {
+      await previousToolCallFinished
+      return [
+        {
+          type: 'json',
+          value: await triggerSpawnAgentsAsync(),
+        },
+      ]
+    })(),
     state: {},
   }
-}) satisfies CodebuffToolHandlerFunction<'spawn_agents_async'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/spawn-agents.ts b/backend/src/tools/handlers/tool/spawn-agents.ts
index 34b616b2f..437da4f76 100644
--- a/backend/src/tools/handlers/tool/spawn-agents.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents.ts
@@ -7,12 +7,14 @@ import {
   logAgentSpawn,
   executeAgent,
   formatAgentResult,
-  formatAgentError,
 } from './spawn-agent-utils'
 import { logger } from '../../../util/logger'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
@@ -28,9 +30,10 @@ export type SendSubagentChunk = (data: {
   prompt?: string
 }) => void
 
+type ToolName = 'spawn_agents'
 export const handleSpawnAgents = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'spawn_agents'>
+  toolCall: CodebuffToolCall<ToolName>
 
   fileContext: ProjectFileContext
   clientSessionId: string
@@ -48,7 +51,7 @@ export const handleSpawnAgents = ((params: {
     messages?: CodebuffMessage[]
     agentState?: AgentState
   }
-}): { result: Promise<string>; state: {} } => {
+}): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
   const {
     previousToolCallFinished,
     toolCall,
@@ -172,7 +175,10 @@ export const handleSpawnAgents = ((params: {
             agentTypeStr,
           )
         } else {
-          return formatAgentError(agentTypeStr, result.reason)
+          return {
+            agentType: agentTypeStr,
+            errorMessage: result.reason,
+          }
         }
       }),
     )
@@ -219,11 +225,17 @@ export const handleSpawnAgents = ((params: {
     })
 
     return reports
-      .map((report: string) => `<agent_report>${report}</agent_report>`)
-      .join('\n')
   }
   return {
-    result: previousToolCallFinished.then(triggerSpawnAgents),
+    result: (async () => {
+      await previousToolCallFinished
+      return [
+        {
+          type: 'json',
+          value: await triggerSpawnAgents(),
+        },
+      ]
+    })(),
     state: {},
   }
-}) satisfies CodebuffToolHandlerFunction<'spawn_agents'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/str-replace.ts b/backend/src/tools/handlers/tool/str-replace.ts
index 141f9f70a..f89f6bf52 100644
--- a/backend/src/tools/handlers/tool/str-replace.ts
+++ b/backend/src/tools/handlers/tool/str-replace.ts
@@ -11,6 +11,7 @@ import type {
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 import type { WebSocket } from 'ws'
 
@@ -19,7 +20,7 @@ export const handleStrReplace = ((params: {
   toolCall: CodebuffToolCall<'str_replace'>
   requestClientToolCall: (
     toolCall: ClientToolCall<'str_replace'>,
-  ) => Promise<string>
+  ) => Promise<CodebuffToolOutput<'str_replace'>>
   writeToClient: (chunk: string) => void
 
   getLatestState: () => FileProcessingState
@@ -27,7 +28,7 @@ export const handleStrReplace = ((params: {
     ws?: WebSocket
   } & OptionalFileProcessingState
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<'str_replace'>>
   state: FileProcessingState
 } => {
   const {
diff --git a/backend/src/tools/handlers/tool/think-deeply.ts b/backend/src/tools/handlers/tool/think-deeply.ts
index bb8a3aece..8bdd8d9aa 100644
--- a/backend/src/tools/handlers/tool/think-deeply.ts
+++ b/backend/src/tools/handlers/tool/think-deeply.ts
@@ -1,12 +1,15 @@
 import { logger } from '../../../util/logger'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 
 export const handleThinkDeeply = ((params: {
   previousToolCallFinished: Promise<any>
   toolCall: CodebuffToolCall<'think_deeply'>
-}): { result: Promise<string>; state: {} } => {
+}): { result: Promise<CodebuffToolOutput<'think_deeply'>>; state: {} } => {
   const { previousToolCallFinished, toolCall } = params
   const { thought } = toolCall.input
 
@@ -18,7 +21,7 @@ export const handleThinkDeeply = ((params: {
   )
 
   return {
-    result: previousToolCallFinished.then(() => 'Deep thinking completed.'),
+    result: previousToolCallFinished.then(() => []),
     state: {},
   }
 }) satisfies CodebuffToolHandlerFunction<'think_deeply'>
diff --git a/backend/src/tools/handlers/tool/update-subgoal.ts b/backend/src/tools/handlers/tool/update-subgoal.ts
index 3cbf612f7..bf39ff881 100644
--- a/backend/src/tools/handlers/tool/update-subgoal.ts
+++ b/backend/src/tools/handlers/tool/update-subgoal.ts
@@ -1,13 +1,17 @@
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 import type { Subgoal } from '@codebuff/common/types/session-state'
 
+type ToolName = 'update_subgoal'
 export const handleUpdateSubgoal = ((params: {
   previousToolCallFinished: Promise<void>
-  toolCall: CodebuffToolCall<'update_subgoal'>
+  toolCall: CodebuffToolCall<ToolName>
   state: { agentContext?: Record<string, Subgoal> }
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<ToolName>>
   state: { agentContext: Record<string, Subgoal> }
 } => {
   const { previousToolCallFinished, toolCall, state } = params
@@ -36,7 +40,17 @@ export const handleUpdateSubgoal = ((params: {
   }
   messages.push('Successfully updated subgoal.')
   return {
-    result: previousToolCallFinished.then(() => messages.join('\n\n')),
+    result: (async () => {
+      await previousToolCallFinished
+      return [
+        {
+          type: 'json',
+          value: {
+            message: messages.join('\n\n'),
+          },
+        },
+      ]
+    })(),
     state: { agentContext },
   }
-}) satisfies CodebuffToolHandlerFunction<'update_subgoal'>
+}) satisfies CodebuffToolHandlerFunction<ToolName>
diff --git a/backend/src/tools/handlers/tool/web-search.ts b/backend/src/tools/handlers/tool/web-search.ts
index bd31c63a7..c2abb57a5 100644
--- a/backend/src/tools/handlers/tool/web-search.ts
+++ b/backend/src/tools/handlers/tool/web-search.ts
@@ -6,7 +6,10 @@ import { PROFIT_MARGIN } from '../../../llm-apis/message-cost-tracker'
 import { logger } from '../../../util/logger'
 
 import type { CodebuffToolHandlerFunction } from '../handler-function-type'
-import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type {
+  CodebuffToolCall,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
 
 export const handleWebSearch = ((params: {
   previousToolCallFinished: Promise<void>
@@ -21,7 +24,7 @@ export const handleWebSearch = ((params: {
     fingerprintId?: string
     repoId?: string
   }
-}): { result: Promise<string>; state: {} } => {
+}): { result: Promise<CodebuffToolOutput<'web_search'>>; state: {} } => {
   const {
     previousToolCallFinished,
     toolCall,
@@ -51,93 +54,113 @@ export const handleWebSearch = ((params: {
     repoId,
   }
 
-  const webSearchPromise: Promise<string> = (async () => {
-    try {
-      const searchResult = await searchWeb(query, { depth })
-      const searchDuration = Date.now() - searchStartTime
-      const resultLength = searchResult?.length || 0
-      const hasResults = Boolean(searchResult && searchResult.trim())
+  const webSearchPromise: Promise<CodebuffToolOutput<'web_search'>> =
+    (async () => {
+      try {
+        const searchResult = await searchWeb(query, { depth })
+        const searchDuration = Date.now() - searchStartTime
+        const resultLength = searchResult?.length || 0
+        const hasResults = Boolean(searchResult && searchResult.trim())
 
-      // Charge credits for web search usage
-      let creditResult = null
-      if (userId) {
-        const creditsToCharge = Math.round(
-          (depth === 'deep' ? 5 : 1) * (1 + PROFIT_MARGIN),
-        )
-        const requestContext = getRequestContext()
-        const repoUrl = requestContext?.processedRepoUrl
+        // Charge credits for web search usage
+        let creditResult = null
+        if (userId) {
+          const creditsToCharge = Math.round(
+            (depth === 'deep' ? 5 : 1) * (1 + PROFIT_MARGIN),
+          )
+          const requestContext = getRequestContext()
+          const repoUrl = requestContext?.processedRepoUrl
+
+          creditResult = await consumeCreditsWithFallback({
+            userId,
+            creditsToCharge,
+            repoUrl,
+            context: 'web search',
+          })
+
+          if (!creditResult.success) {
+            logger.error(
+              {
+                ...searchContext,
+                error: creditResult.error,
+                creditsToCharge,
+                searchDuration,
+              },
+              'Failed to charge credits for web search',
+            )
+          }
+        }
 
-        creditResult = await consumeCreditsWithFallback({
-          userId,
-          creditsToCharge,
-          repoUrl,
-          context: 'web search',
-        })
+        logger.info(
+          {
+            ...searchContext,
+            searchDuration,
+            resultLength,
+            hasResults,
+            creditsCharged: creditResult?.success
+              ? depth === 'deep'
+                ? 5
+                : 1
+              : 0,
+            success: true,
+          },
+          'Search completed',
+        )
 
-        if (!creditResult.success) {
-          logger.error(
+        if (searchResult) {
+          return [
+            {
+              type: 'json',
+              value: { result: searchResult },
+            },
+          ]
+        } else {
+          logger.warn(
             {
               ...searchContext,
-              error: creditResult.error,
-              creditsToCharge,
               searchDuration,
             },
-            'Failed to charge credits for web search',
+            'No results returned from search API',
           )
+          return [
+            {
+              type: 'json',
+              value: {
+                errorMessage: `No search results found for "${query}". Try refining your search query or using different keywords.`,
+              },
+            },
+          ]
         }
-      }
-
-      logger.info(
-        {
-          ...searchContext,
-          searchDuration,
-          resultLength,
-          hasResults,
-          creditsCharged: creditResult?.success
-            ? depth === 'deep'
-              ? 5
-              : 1
-            : 0,
-          success: true,
-        },
-        'Search completed',
-      )
-
-      if (searchResult) {
-        return searchResult
-      } else {
-        logger.warn(
+      } catch (error) {
+        const searchDuration = Date.now() - searchStartTime
+        logger.error(
           {
             ...searchContext,
+            error:
+              error instanceof Error
+                ? {
+                    name: error.name,
+                    message: error.message,
+                    stack: error.stack,
+                  }
+                : error,
             searchDuration,
+            success: false,
           },
-          'No results returned from search API',
+          'Search failed with error',
         )
-        return `No search results found for "${query}". Try refining your search query or using different keywords.`
+        return [
+          {
+            type: 'json',
+            value: {
+              errorMessage: `Error performing web search for "${query}": ${
+                error instanceof Error ? error.message : 'Unknown error'
+              }`,
+            },
+          },
+        ]
       }
-    } catch (error) {
-      const searchDuration = Date.now() - searchStartTime
-      logger.error(
-        {
-          ...searchContext,
-          error:
-            error instanceof Error
-              ? {
-                  name: error.name,
-                  message: error.message,
-                  stack: error.stack,
-                }
-              : error,
-          searchDuration,
-          success: false,
-        },
-        'Search failed with error',
-      )
-      return `Error performing web search for "${query}": ${
-        error instanceof Error ? error.message : 'Unknown error'
-      }`
-    }
-  })()
+    })()
 
   return {
     result: (async () => {
diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts
index 6cf5a0dfa..8b4fbe5ab 100644
--- a/backend/src/tools/handlers/tool/write-file.ts
+++ b/backend/src/tools/handlers/tool/write-file.ts
@@ -8,6 +8,7 @@ import type { CodebuffToolHandlerFunction } from '../handler-function-type'
 import type {
   ClientToolCall,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
 import type { WebSocket } from 'ws'
@@ -61,7 +62,19 @@ export function getFileProcessingValues(
   return fileProcessingValues
 }
 
-export const handleWriteFile = ((params: {
+export const handleWriteFile = (({
+  previousToolCallFinished,
+  toolCall,
+
+  clientSessionId,
+  userInputId,
+
+  requestClientToolCall,
+  writeToClient,
+
+  getLatestState,
+  state,
+}: {
   previousToolCallFinished: Promise<void>
   toolCall: CodebuffToolCall<'write_file'>
 
@@ -70,7 +83,7 @@ export const handleWriteFile = ((params: {
 
   requestClientToolCall: (
     toolCall: ClientToolCall<'write_file'>,
-  ) => Promise<string>
+  ) => Promise<CodebuffToolOutput<'write_file'>>
   writeToClient: (chunk: string) => void
 
   getLatestState: () => FileProcessingState
@@ -83,22 +96,9 @@ export const handleWriteFile = ((params: {
     messages?: CodebuffMessage[]
   } & OptionalFileProcessingState
 }): {
-  result: Promise<string>
+  result: Promise<CodebuffToolOutput<'write_file'>>
   state: FileProcessingState
 } => {
-  const {
-    previousToolCallFinished,
-    toolCall,
-
-    clientSessionId,
-    userInputId,
-
-    requestClientToolCall,
-    writeToClient,
-
-    getLatestState,
-    state,
-  } = params
   const { path, instructions, content } = toolCall.input
   const { ws, fingerprintId, userId, fullResponse, prompt } = state
   if (!ws) {
@@ -168,14 +168,15 @@ export const handleWriteFile = ((params: {
   fileProcessingPromises.push(newPromise)
 
   return {
-    result: previousToolCallFinished.then(async () => {
+    result: (async () => {
+      await previousToolCallFinished
       return await postStreamProcessing<'write_file'>(
         await newPromise,
         getLatestState(),
         writeToClient,
         requestClientToolCall,
       )
-    }),
+    })(),
     state: fileProcessingState,
   }
 }) satisfies CodebuffToolHandlerFunction<'write_file'>
@@ -184,8 +185,10 @@ export async function postStreamProcessing<T extends FileProcessingTools>(
   toolCall: FileProcessing<T>,
   fileProcessingState: FileProcessingState,
   writeToClient: (chunk: string) => void,
-  requestClientToolCall: (toolCall: ClientToolCall<T>) => Promise<string>,
-) {
+  requestClientToolCall: (
+    toolCall: ClientToolCall<T>,
+  ) => Promise<CodebuffToolOutput<T>>,
+): Promise<CodebuffToolOutput<T>> {
   const allFileProcessingResults = await Promise.all(
     fileProcessingState.allPromises,
   )
@@ -224,25 +227,41 @@ export async function postStreamProcessing<T extends FileProcessingTools>(
   const errors = fileProcessingState.fileChangeErrors.filter(
     (result) => result.toolCallId === toolCall.toolCallId,
   )
-  toolCallResults.push(
-    ...errors.map(({ path, error }) => `Error processing ${path}: ${error}`),
-  )
+  if (errors.length > 0) {
+    if (errors.length > 1) {
+      throw new Error(
+        `Internal error: Unexpected number of matching errors for ${{ toolCall }}, found ${errors.length}, expected 1`,
+      )
+    }
+
+    const { path, error } = errors[0]
+    return [
+      {
+        type: 'json',
+        value: {
+          file: path,
+          errorMessage: error,
+        },
+      },
+    ]
+  }
 
   const changes = fileProcessingState.fileChanges.filter(
     (result) => result.toolCallId === toolCall.toolCallId,
   )
-  for (const { path, content, patch } of changes) {
-    const clientToolCall: ClientToolCall<T> = {
-      toolCallId: toolCall.toolCallId,
-      toolName: toolCall.tool,
-      input: patch
-        ? { type: 'patch' as const, path, content: patch }
-        : { type: 'file' as const, path, content },
-    } as ClientToolCall<T>
-    const clientResult = await requestClientToolCall(clientToolCall)
-
-    toolCallResults.push(clientResult)
+  if (changes.length !== 1) {
+    throw new Error(
+      `Internal error: Unexpected number of matching changes for ${{ toolCall }}, found ${changes.length}, expected 1`,
+    )
   }
 
-  return toolCallResults.join('\n\n')
+  const { patch, content, path } = changes[0]
+  const clientToolCall: ClientToolCall<T> = {
+    toolCallId: toolCall.toolCallId,
+    toolName: toolCall.tool,
+    input: patch
+      ? { type: 'patch' as const, path, content: patch }
+      : { type: 'file' as const, path, content },
+  } as ClientToolCall<T>
+  return await requestClientToolCall(clientToolCall)
 }
diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts
index 42a4f3890..fca91e656 100644
--- a/common/src/tools/list.ts
+++ b/common/src/tools/list.ts
@@ -63,10 +63,13 @@ export type CodebuffToolCall<T extends ToolName = ToolName> = {
   } & Omit<ToolCallPart, 'type'>
 }[T]
 
+export type CodebuffToolOutput<T extends ToolName = ToolName> = {
+  [K in ToolName]: z.infer<(typeof $toolParams)[K]['outputs']>
+}[T]
 export type CodebuffToolResult<T extends ToolName = ToolName> = {
   [K in ToolName]: {
     toolName: K
-    output: z.infer<(typeof $toolParams)[K]['outputs']>
+    output: CodebuffToolOutput<K>
   } & Omit<ToolResultPart, 'type'>
 }[T]
 
diff --git a/common/src/tools/params/tool/find-files.ts b/common/src/tools/params/tool/find-files.ts
index f04fd2682..0b091f98b 100644
--- a/common/src/tools/params/tool/find-files.ts
+++ b/common/src/tools/params/tool/find-files.ts
@@ -24,7 +24,12 @@ export const findFilesParams = {
   outputs: z.tuple([
     z.object({
       type: z.literal('json'),
-      value: fileContentsSchema.array(),
+      value: z.union([
+        fileContentsSchema.array(),
+        z.object({
+          message: z.string(),
+        }),
+      ]),
     }),
   ]),
 } satisfies $ToolParams
diff --git a/common/src/tools/params/tool/str-replace.ts b/common/src/tools/params/tool/str-replace.ts
index 913485269..e890efd6d 100644
--- a/common/src/tools/params/tool/str-replace.ts
+++ b/common/src/tools/params/tool/str-replace.ts
@@ -9,6 +9,7 @@ export const updateFileResultSchema = z.union([
     unifiedDiff: z.string(),
   }),
   z.object({
+    file: z.string(),
     errorMessage: z.string(),
   }),
 ])

From febb31add5d8dbf4e5801d5edfdb175cdf16d2cb Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Wed, 27 Aug 2025 18:18:53 -0700
Subject: [PATCH 04/18] rename CodebuffMessage to Message

---
 .../__tests__/request-files-prompt.test.ts    |   6 +-
 .../spawn-agents-message-history.test.ts      |  10 +-
 backend/src/admin/relabelRuns.ts              |  17 +-
 backend/src/fast-rewrite.ts                   |  44 +-
 .../src/find-files/request-files-prompt.ts    |  10 +-
 backend/src/get-file-reading-updates.ts       |  36 +-
 backend/src/llm-apis/gemini-with-fallbacks.ts |   4 +-
 backend/src/llm-apis/message-cost-tracker.ts  |   6 +-
 backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts  |  43 +-
 backend/src/process-file-block.ts             |   4 +-
 backend/src/prompt-agent-stream.ts            |   4 +-
 backend/src/run-agent-step.ts                 |  63 ++-
 backend/src/templates/strings.ts              |  43 +-
 .../src/tools/handlers/tool/add-message.ts    |   4 +-
 backend/src/tools/handlers/tool/find-files.ts |   6 +-
 backend/src/tools/handlers/tool/read-files.ts |   4 +-
 .../src/tools/handlers/tool/set-messages.ts   |   4 +-
 .../tools/handlers/tool/spawn-agent-inline.ts |   6 +-
 .../tools/handlers/tool/spawn-agent-utils.ts  |  21 +-
 .../tools/handlers/tool/spawn-agents-async.ts |   8 +-
 .../src/tools/handlers/tool/spawn-agents.ts   |   8 +-
 backend/src/tools/handlers/tool/write-file.ts |   4 +-
 backend/src/tools/stream-parser.ts            |  18 +-
 backend/src/util/__tests__/messages.test.ts   |  16 +-
 backend/src/util/messages.ts                  | 188 ++++----
 backend/src/util/parse-tool-call-xml.ts       |  95 +---
 backend/src/util/simplify-tool-results.ts     | 140 ++----
 backend/src/websockets/websocket-action.ts    |  20 +-
 common/src/tools/list.ts                      |  13 +-
 common/src/tools/params/tool/read-files.ts    |  16 +-
 .../params/tool/run-file-change-hooks.ts      |  17 +-
 .../tools/params/tool/run-terminal-command.ts |  34 +-
 common/src/types/messages/codebuff-message.ts |  30 +-
 common/src/types/session-state.ts             |   8 +-
 common/src/util/messages.ts                   |  58 +--
 npm-app/src/background-process-manager.ts     |  94 ++--
 npm-app/src/chat-storage.ts                   |   6 +-
 npm-app/src/client.ts                         |  46 +-
 npm-app/src/dev-process-manager.ts            |   6 +-
 npm-app/src/json-config/hooks.ts              |  29 +-
 npm-app/src/terminal/background.ts            |  46 +-
 npm-app/src/terminal/run-command.ts           | 121 +++--
 npm-app/src/tool-handlers.ts                  | 435 +++++++++---------
 .../background-process-manager.test.ts        |  28 +-
 sdk/src/client.ts                             | 120 ++---
 sdk/src/custom-tool.ts                        |  25 +-
 sdk/src/tools/change-file.ts                  |  41 +-
 sdk/src/tools/run-terminal-command.ts         |  17 +-
 sdk/src/websocket-client.ts                   |   8 +-
 49 files changed, 967 insertions(+), 1063 deletions(-)

diff --git a/backend/src/__tests__/request-files-prompt.test.ts b/backend/src/__tests__/request-files-prompt.test.ts
index 9afc1e92a..eb496fb6a 100644
--- a/backend/src/__tests__/request-files-prompt.test.ts
+++ b/backend/src/__tests__/request-files-prompt.test.ts
@@ -12,7 +12,7 @@ import * as OriginalRequestFilesPromptModule from '../find-files/request-files-p
 import * as geminiWithFallbacksModule from '../llm-apis/gemini-with-fallbacks'
 
 import type { CostMode } from '@codebuff/common/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { Mock } from 'bun:test'
 
@@ -52,9 +52,7 @@ bunMockFn.module('@codebuff/bigquery', () => ({
 }))
 
 describe('requestRelevantFiles', () => {
-  const mockMessages: CodebuffMessage[] = [
-    { role: 'user', content: 'test prompt' },
-  ]
+  const mockMessages: Message[] = [{ role: 'user', content: 'test prompt' }]
   const mockSystem = 'test system'
   const mockFileContext: ProjectFileContext = {
     projectRoot: '/test/project',
diff --git a/backend/src/__tests__/spawn-agents-message-history.test.ts b/backend/src/__tests__/spawn-agents-message-history.test.ts
index a01c97320..5c3f464ec 100644
--- a/backend/src/__tests__/spawn-agents-message-history.test.ts
+++ b/backend/src/__tests__/spawn-agents-message-history.test.ts
@@ -17,7 +17,7 @@ import * as loggerModule from '../util/logger'
 
 import type { CodebuffToolCall } from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { WebSocket } from 'ws'
 
 describe('Spawn Agents Message History', () => {
@@ -102,7 +102,7 @@ describe('Spawn Agents Message History', () => {
     const toolCall = createSpawnToolCall('child-agent')
 
     // Create mock messages including system message
-    const mockMessages: CodebuffMessage[] = [
+    const mockMessages: Message[] = [
       {
         role: 'system',
         content: 'This is the parent system prompt that should be excluded',
@@ -183,7 +183,7 @@ describe('Spawn Agents Message History', () => {
     const sessionState = getInitialSessionState(mockFileContext)
     const toolCall = createSpawnToolCall('child-agent')
 
-    const mockMessages: CodebuffMessage[] = [
+    const mockMessages: Message[] = [
       { role: 'system', content: 'System prompt' },
       { role: 'user', content: 'Hello' },
       { role: 'assistant', content: 'Hi there!' },
@@ -222,7 +222,7 @@ describe('Spawn Agents Message History', () => {
     const sessionState = getInitialSessionState(mockFileContext)
     const toolCall = createSpawnToolCall('child-agent')
 
-    const mockMessages: CodebuffMessage[] = [] // Empty message history
+    const mockMessages: Message[] = [] // Empty message history
 
     const { result } = handleSpawnAgents({
       previousToolCallFinished: Promise.resolve(),
@@ -259,7 +259,7 @@ describe('Spawn Agents Message History', () => {
     const sessionState = getInitialSessionState(mockFileContext)
     const toolCall = createSpawnToolCall('child-agent')
 
-    const mockMessages: CodebuffMessage[] = [
+    const mockMessages: Message[] = [
       { role: 'system', content: 'System prompt 1' },
       { role: 'system', content: 'System prompt 2' },
     ]
diff --git a/backend/src/admin/relabelRuns.ts b/backend/src/admin/relabelRuns.ts
index eb85e8182..73a58c42f 100644
--- a/backend/src/admin/relabelRuns.ts
+++ b/backend/src/admin/relabelRuns.ts
@@ -13,11 +13,9 @@ import { generateCompactId } from '@codebuff/common/util/string'
 import { closeXml } from '@codebuff/common/util/xml'
 
 import { rerank } from '../llm-apis/relace-api'
-import {
-  promptAiSdk,
-  transformMessages,
-} from '../llm-apis/vercel-ai-sdk/ai-sdk'
+import { promptAiSdk } from '../llm-apis/vercel-ai-sdk/ai-sdk'
 import { logger } from '../util/logger'
+import { messagesWithSystem } from '../util/messages'
 
 import type { System } from '../llm-apis/claude'
 import type {
@@ -27,7 +25,7 @@ import type {
   GetRelevantFilesTrace,
   Relabel,
 } from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { Request, Response } from 'express'
 
 // --- GET Handler Logic ---
@@ -171,8 +169,8 @@ export async function relabelForUserHandler(req: Request, res: Response) {
             const system = payload.system
 
             output = await promptAiSdk({
-              messages: transformMessages(
-                messages as CodebuffMessage[],
+              messages: messagesWithSystem(
+                messages as Message[],
                 system as System,
               ),
               model: model,
@@ -397,10 +395,7 @@ export async function relabelWithClaudeWithFullFileContext(
   }
 
   const output = await promptAiSdk({
-    messages: transformMessages(
-      trace.payload.messages as CodebuffMessage[],
-      system,
-    ),
+    messages: messagesWithSystem(trace.payload.messages as Message[], system),
     model: model as any, // Model type is string here for flexibility
     clientSessionId: 'relabel-trace-api',
     fingerprintId: 'relabel-trace-api',
diff --git a/backend/src/fast-rewrite.ts b/backend/src/fast-rewrite.ts
index 5fc6f6148..1d7537ce8 100644
--- a/backend/src/fast-rewrite.ts
+++ b/backend/src/fast-rewrite.ts
@@ -1,9 +1,6 @@
 import { geminiModels, openaiModels } from '@codebuff/common/constants'
 import { buildArray } from '@codebuff/common/util/array'
-import {
-  parseFileBlocks,
-  parseMarkdownCodeBlock,
-} from '@codebuff/common/util/file'
+import { parseMarkdownCodeBlock } from '@codebuff/common/util/file'
 import { generateCompactId, hasLazyEdit } from '@codebuff/common/util/string'
 
 import { promptFlashWithFallbacks } from './llm-apis/gemini-with-fallbacks'
@@ -11,7 +8,11 @@ import { promptRelaceAI } from './llm-apis/relace-api'
 import { promptAiSdk } from './llm-apis/vercel-ai-sdk/ai-sdk'
 import { logger } from './util/logger'
 
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
+import type {
+  Message,
+  ToolMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
 
 export async function fastRewrite(
   initialContent: string,
@@ -135,23 +136,34 @@ export const shouldAddFilePlaceholders = async (
   filePath: string,
   oldContent: string,
   rewrittenNewContent: string,
-  messageHistory: CodebuffMessage[],
+  messageHistory: Message[],
   fullResponse: string,
   userId: string | undefined,
   clientSessionId: string,
   fingerprintId: string,
   userInputId: string,
 ) => {
-  const fileBlocks = parseFileBlocks(
-    messageHistory
-      .map((message) =>
-        typeof message.content === 'string'
-          ? message.content
-          : message.content.map((c) => ('text' in c ? c.text : '')).join('\n'),
-      )
-      .join('\n') + fullResponse,
-  )
-  const fileWasPreviouslyEdited = Object.keys(fileBlocks).includes(filePath)
+  const fileWasPreviouslyEdited = messageHistory
+    .filter(
+      (
+        m,
+      ): m is ToolMessage & {
+        content: { toolName: 'create_plan' | 'str_replace' | 'write_file' }
+      } => {
+        return (
+          m.role === 'tool' &&
+          (m.content.toolName === 'create_plan' ||
+            m.content.toolName === 'str_replace' ||
+            m.content.toolName === 'write_file')
+        )
+      },
+    )
+    .some((m) => {
+      const message = m as CodebuffToolMessage<
+        'create_plan' | 'str_replace' | 'write_file'
+      >
+      return message.content.output[0].value.file === filePath
+    })
   if (!fileWasPreviouslyEdited) {
     // If Claude hasn't edited this file before, it's almost certainly not a local-only change.
     // Usually, it's only when Claude is editing a function for a second or third time that
diff --git a/backend/src/find-files/request-files-prompt.ts b/backend/src/find-files/request-files-prompt.ts
index 3cdbb766d..905592fb9 100644
--- a/backend/src/find-files/request-files-prompt.ts
+++ b/backend/src/find-files/request-files-prompt.ts
@@ -29,7 +29,7 @@ import type {
   GetExpandedFileContextForTrainingTrace,
   GetRelevantFilesTrace,
 } from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 
 const NUMBER_OF_EXAMPLE_FILES = 100
@@ -123,7 +123,7 @@ export async function requestRelevantFiles(
     messages,
     system,
   }: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     system: string | Array<TextBlock>
   },
   fileContext: ProjectFileContext,
@@ -221,7 +221,7 @@ export async function requestRelevantFilesForTraining(
     messages,
     system,
   }: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     system: string | Array<TextBlock>
   },
   fileContext: ProjectFileContext,
@@ -302,7 +302,7 @@ async function getRelevantFiles(
     messages,
     system,
   }: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     system: string | Array<TextBlock>
   },
   userPrompt: string,
@@ -385,7 +385,7 @@ async function getRelevantFilesForTraining(
     messages,
     system,
   }: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     system: string | Array<TextBlock>
   },
   userPrompt: string,
diff --git a/backend/src/get-file-reading-updates.ts b/backend/src/get-file-reading-updates.ts
index 1ea30e387..41dec21ad 100644
--- a/backend/src/get-file-reading-updates.ts
+++ b/backend/src/get-file-reading-updates.ts
@@ -1,19 +1,13 @@
 import { HIDDEN_FILE_READ_STATUS } from '@codebuff/common/constants'
-import { parseFileBlocks } from '@codebuff/common/util/file'
-import { toContentString } from '@codebuff/common/util/messages'
 import { countTokens } from 'gpt-tokenizer'
 import { uniq, difference } from 'lodash'
 
 import { logger } from './util/logger'
-import {
-  isToolResult,
-  parseToolResults,
-  parseReadFilesResult,
-} from './util/parse-tool-call-xml'
+import { getEditedFiles, getPreviouslyReadFiles } from './util/messages'
 import { countTokensJson } from './util/token-counter'
 import { requestFiles } from './websockets/websocket-action'
 
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
 
@@ -39,7 +33,7 @@ const getInitialFiles = (fileContext: ProjectFileContext) => {
 
 export async function getFileReadingUpdates(
   ws: WebSocket,
-  messages: CodebuffMessage[],
+  messages: Message[],
   fileContext: ProjectFileContext,
   options: {
     requestedFiles?: string[]
@@ -50,27 +44,25 @@ export async function getFileReadingUpdates(
     userId: string | undefined
     repoId: string | undefined
   },
-) {
+): Promise<{
+  addedFiles: {
+    path: string
+    content: string
+  }[]
+  updatedFilePaths: string[]
+  printedPaths: string[]
+  clearReadFileToolResults: boolean
+}> {
   const FILE_TOKEN_BUDGET = 100_000
 
-  const toolResults = messages
-    .filter(isToolResult)
-    .flatMap((content) => parseToolResults(toContentString(content)))
-  const previousFileList = toolResults
-    .filter(({ toolName }) => toolName === 'read_files')
-    .flatMap(({ output }) => parseReadFilesResult(output.value))
+  const previousFileList = getPreviouslyReadFiles(messages)
 
   const previousFiles = Object.fromEntries(
     previousFileList.map(({ path, content }) => [path, content]),
   )
   const previousFilePaths = uniq(Object.keys(previousFiles))
 
-  const editedFilePaths = messages
-    .filter(({ role }) => role === 'assistant')
-    .map(toContentString)
-    .filter((content) => content.includes('<write_file'))
-    .flatMap((content) => Object.keys(parseFileBlocks(content)))
-    .filter((path) => path !== undefined)
+  const editedFilePaths = getEditedFiles(messages)
 
   const requestedFiles = options.requestedFiles ?? []
 
diff --git a/backend/src/llm-apis/gemini-with-fallbacks.ts b/backend/src/llm-apis/gemini-with-fallbacks.ts
index 30f529df9..4f76ffd6e 100644
--- a/backend/src/llm-apis/gemini-with-fallbacks.ts
+++ b/backend/src/llm-apis/gemini-with-fallbacks.ts
@@ -8,7 +8,7 @@ import type {
   FinetunedVertexModel,
   GeminiModel,
 } from '@codebuff/common/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 /**
  * Prompts a Gemini model with fallback logic.
@@ -36,7 +36,7 @@ import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-m
  * @throws If all API calls (primary and fallbacks) fail.
  */
 export async function promptFlashWithFallbacks(
-  messages: CodebuffMessage[],
+  messages: Message[],
   options: {
     clientSessionId: string
     fingerprintId: string
diff --git a/backend/src/llm-apis/message-cost-tracker.ts b/backend/src/llm-apis/message-cost-tracker.ts
index e7253bde0..fbf468ef1 100644
--- a/backend/src/llm-apis/message-cost-tracker.ts
+++ b/backend/src/llm-apis/message-cost-tracker.ts
@@ -22,7 +22,7 @@ import { SWITCHBOARD } from '../websockets/server'
 import { sendAction } from '../websockets/websocket-action'
 
 import type { ClientState } from '../websockets/switchboard'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 export const PROFIT_MARGIN = 0.055
 
@@ -332,7 +332,7 @@ type InsertMessageParams = {
   fingerprintId: string
   userInputId: string
   model: string
-  request: CodebuffMessage[]
+  request: Message[]
   response: string
   inputTokens: number
   outputTokens: number
@@ -533,7 +533,7 @@ export const saveMessage = async (value: {
   fingerprintId: string
   userInputId: string
   model: string
-  request: CodebuffMessage[]
+  request: Message[]
   response: string
   inputTokens: number
   outputTokens: number
diff --git a/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts b/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
index 9f40519eb..51979d467 100644
--- a/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
+++ b/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
@@ -23,13 +23,12 @@ import { saveMessage } from '../message-cost-tracker'
 import { openRouterLanguageModel } from '../openrouter'
 import { vertexFinetuned } from './vertex-finetuned'
 
-import type { System } from '../claude'
 import type {
   GeminiModel,
   Model,
   OpenAIModel,
 } from '@codebuff/common/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type {
   OpenRouterProviderOptions,
   OpenRouterUsageAccounting,
@@ -64,7 +63,7 @@ const modelToAiSDKModel = (model: Model): LanguageModel => {
 // eg: [{model: "gemini-2.0-flash-001"}, {model: "vertex/gemini-2.0-flash-001"}, {model: "claude-3-5-haiku", retries: 3}]
 export const promptAiSdkStream = async function* (
   options: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     clientSessionId: string
     fingerprintId: string
     model: Model
@@ -76,7 +75,7 @@ export const promptAiSdkStream = async function* (
     maxRetries?: number
     onCostCalculated?: (credits: number) => Promise<void>
     includeCacheControl?: boolean
-  } & Omit<Parameters<typeof streamText>[0], 'model'>,
+  } & Omit<Parameters<typeof streamText>[0], 'model' | 'messages'>,
 ) {
   if (
     !checkLiveUserInput(
@@ -225,7 +224,7 @@ export const promptAiSdkStream = async function* (
 // TODO: figure out a nice way to unify stream & non-stream versions maybe?
 export const promptAiSdk = async function (
   options: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     clientSessionId: string
     fingerprintId: string
     userInputId: string
@@ -235,7 +234,7 @@ export const promptAiSdk = async function (
     agentId?: string
     onCostCalculated?: (credits: number) => Promise<void>
     includeCacheControl?: boolean
-  } & Omit<Parameters<typeof generateText>[0], 'model'>,
+  } & Omit<Parameters<typeof generateText>[0], 'model' | 'messages'>,
 ): Promise<string> {
   if (
     !checkLiveUserInput(
@@ -295,7 +294,7 @@ export const promptAiSdk = async function (
 
 // Copied over exactly from promptAiSdk but with a schema
 export const promptAiSdkStructured = async function <T>(options: {
-  messages: CodebuffMessage[]
+  messages: Message[]
   schema: z.ZodType<T>
   clientSessionId: string
   fingerprintId: string
@@ -369,33 +368,3 @@ export const promptAiSdkStructured = async function <T>(options: {
 
   return content
 }
-
-// TODO: temporary - ideally we move to using CodebuffMessage[] directly
-// and don't need this transform!!
-export function transformMessages(
-  messages: CodebuffMessage[],
-  system?: System,
-): CodebuffMessage[] {
-  const codebuffMessages: CodebuffMessage[] = []
-
-  if (system) {
-    codebuffMessages.push({
-      role: 'system',
-      content:
-        typeof system === 'string'
-          ? system
-          : system.map((block) => block.text).join('\n\n'),
-    })
-  }
-
-  return buildArray<CodebuffMessage>([
-    system && {
-      role: 'system',
-      content:
-        typeof system === 'string'
-          ? system
-          : system.map((block) => block.text).join('\n\n'),
-    },
-    messages,
-  ])
-}
diff --git a/backend/src/process-file-block.ts b/backend/src/process-file-block.ts
index 736fb4d92..a77551c79 100644
--- a/backend/src/process-file-block.ts
+++ b/backend/src/process-file-block.ts
@@ -12,14 +12,14 @@ import { promptAiSdk } from './llm-apis/vercel-ai-sdk/ai-sdk'
 import { logger } from './util/logger'
 import { countTokens } from './util/token-counter'
 
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 export async function processFileBlock(
   path: string,
   instructions: string | undefined,
   initialContentPromise: Promise<string | null>,
   newContent: string,
-  messages: CodebuffMessage[],
+  messages: Message[],
   fullResponse: string,
   lastUserPrompt: string | undefined,
   clientSessionId: string,
diff --git a/backend/src/prompt-agent-stream.ts b/backend/src/prompt-agent-stream.ts
index f12243bd5..cb923d9de 100644
--- a/backend/src/prompt-agent-stream.ts
+++ b/backend/src/prompt-agent-stream.ts
@@ -4,7 +4,7 @@ import { promptAiSdkStream } from './llm-apis/vercel-ai-sdk/ai-sdk'
 import { globalStopSequence } from './tools/constants'
 
 import type { AgentTemplate } from './templates/types'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { OpenRouterProviderOptions } from '@codebuff/internal/openrouter-ai-sdk'
 
 export const getAgentStreamFromTemplate = (params: {
@@ -35,7 +35,7 @@ export const getAgentStreamFromTemplate = (params: {
 
   const { model } = template
 
-  const getStream = (messages: CodebuffMessage[]) => {
+  const getStream = (messages: Message[]) => {
     const options: Parameters<typeof promptAiSdkStream>[0] = {
       messages,
       model,
diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index 56d779bf4..a4116c176 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -6,7 +6,6 @@ import {
 } from '@codebuff/common/constants'
 import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
 import { TOOLS_WHICH_WONT_FORCE_NEXT_STEP } from '@codebuff/common/tools/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
 import { buildArray } from '@codebuff/common/util/array'
 import { generateCompactId } from '@codebuff/common/util/string'
 
@@ -29,19 +28,20 @@ import {
   getMessagesSubset,
   isSystemInstruction,
 } from './util/messages'
-import { isToolResult, renderReadFilesResult } from './util/parse-tool-call-xml'
+import { renderReadFilesResult } from './util/parse-tool-call-xml'
 import { simplifyReadFileResults } from './util/simplify-tool-results'
 import { countTokensJson } from './util/token-counter'
 import { getRequestContext } from './websockets/request-context'
 
 import type { AgentResponseTrace } from '@codebuff/bigquery'
+import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   AgentTemplateType,
   AgentState,
-  ToolResult,
 } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
@@ -149,8 +149,12 @@ export const runAgentStep = async (
   if (clearReadFileToolResults) {
     // Update message history.
     for (const message of messageHistory) {
-      if (isToolResult(message)) {
-        message.content = simplifyReadFileResults(message.content)
+      if (
+        message.role === 'tool' &&
+        message.content.toolName === 'read_files'
+      ) {
+        const m = message as CodebuffToolMessage<'read_files'>
+        m.content.output = simplifyReadFileResults(m.content.output)
       }
     }
 
@@ -162,7 +166,7 @@ export const runAgentStep = async (
     })
   }
 
-  const toolResults: ToolResult[] = []
+  const toolResults: ToolResultPart[] = []
 
   const updatedFiles = addedFiles.filter((f) =>
     updatedFilePaths.includes(f.path),
@@ -170,14 +174,21 @@ export const runAgentStep = async (
 
   if (updatedFiles.length > 0) {
     toolResults.push({
+      type: 'tool-result',
       toolName: 'file_updates',
       toolCallId: generateCompactId(),
-      output: {
-        type: 'text',
-        value:
-          `These are the updates made to the files since the last response (either by you or by the user). These are the most recent versions of these files. You MUST be considerate of the user's changes:\n` +
-          renderReadFilesResult(updatedFiles, fileContext.tokenCallers ?? {}),
-      },
+      output: [
+        {
+          type: 'json',
+          value: {
+            message: `These are the updates made to the files since the last response (either by you or by the user). These are the most recent versions of these files. You MUST be considerate of the user's changes.`,
+            files: renderReadFilesResult(
+              updatedFiles,
+              fileContext.tokenCallers ?? {},
+            ),
+          },
+        },
+      ],
     })
   }
 
@@ -217,13 +228,15 @@ export const runAgentStep = async (
     localAgentTemplates,
   )
 
-  const agentMessagesUntruncated = buildArray<CodebuffMessage>(
+  const agentMessagesUntruncated = buildArray<Message>(
     ...expireMessages(messageHistory, 'agentStep'),
 
-    toolResults.length > 0 && {
-      role: 'user' as const,
-      content: asSystemMessage(renderToolResults(toolResults)),
-    },
+    toolResults.map((result) => {
+      return {
+        role: 'tool',
+        content: result,
+      }
+    }),
 
     stepPrompt && {
       role: 'user' as const,
@@ -451,7 +464,7 @@ export const loopAgentSteps = async (
     params: Record<string, any> | undefined
     fingerprintId: string
     fileContext: ProjectFileContext
-    toolResults: ToolResult[]
+    toolResults: ToolResultPart[]
     localAgentTemplates: Record<string, AgentTemplate>
 
     userId: string | undefined
@@ -479,16 +492,18 @@ export const loopAgentSteps = async (
     : undefined
 
   // Build the initial message history with user prompt and instructions
-  const initialMessages = buildArray<CodebuffMessage>(
+  const initialMessages = buildArray<Message>(
     ...agentState.messageHistory.map((m) => ({
       ...m,
       keepDuringTruncation: false,
     })),
 
-    toolResults.length > 0 && {
-      role: 'user' as const,
-      content: asSystemMessage(renderToolResults(toolResults)),
-    },
+    toolResults.map((result) => {
+      return {
+        role: 'tool' as const,
+        content: result,
+      }
+    }),
 
     hasPrompt && [
       {
diff --git a/backend/src/templates/strings.ts b/backend/src/templates/strings.ts
index 8035b8299..76f6f8a2e 100644
--- a/backend/src/templates/strings.ts
+++ b/backend/src/templates/strings.ts
@@ -1,6 +1,5 @@
 import { CodebuffConfigSchema } from '@codebuff/common/json-config/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
-import { escapeString, generateCompactId } from '@codebuff/common/util/string'
+import { escapeString } from '@codebuff/common/util/string'
 import { schemaToJsonStr } from '@codebuff/common/util/zod-schema'
 import { z } from 'zod/v4'
 
@@ -74,27 +73,25 @@ export async function formatPrompt(
     [PLACEHOLDER.USER_CWD]: fileContext.cwd,
     [PLACEHOLDER.USER_INPUT_PROMPT]: escapeString(lastUserInput ?? ''),
     [PLACEHOLDER.INITIAL_AGENT_PROMPT]: escapeString(intitialAgentPrompt ?? ''),
-    [PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS]: renderToolResults(
-      Object.entries({
-        ...Object.fromEntries(
-          Object.entries(fileContext.knowledgeFiles)
-            .filter(([path]) =>
-              [
-                'knowledge.md',
-                'CLAUDE.md',
-                'codebuff.json',
-                'codebuff.jsonc',
-              ].includes(path),
-            )
-            .map(([path, content]) => [path, content.trim()]),
-        ),
-        ...fileContext.userKnowledgeFiles,
-      }).map(([path, content]) => ({
-        toolName: 'read_files',
-        toolCallId: generateCompactId(),
-        output: { type: 'text', value: JSON.stringify({ path, content }) },
-      })),
-    ),
+    [PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS]: Object.entries({
+      ...Object.fromEntries(
+        Object.entries(fileContext.knowledgeFiles)
+          .filter(([path]) =>
+            [
+              'knowledge.md',
+              'CLAUDE.md',
+              'codebuff.json',
+              'codebuff.jsonc',
+            ].includes(path),
+          )
+          .map(([path, content]) => [path, content.trim()]),
+      ),
+      ...fileContext.userKnowledgeFiles,
+    })
+      .map(([path, content]) => {
+        return `\`\`\`${path}\n${content.trim()}\n\`\`\``
+      })
+      .join('\n\n'),
   }
 
   for (const varName of placeholderValues) {
diff --git a/backend/src/tools/handlers/tool/add-message.ts b/backend/src/tools/handlers/tool/add-message.ts
index f30514484..4e99920e6 100644
--- a/backend/src/tools/handlers/tool/add-message.ts
+++ b/backend/src/tools/handlers/tool/add-message.ts
@@ -3,7 +3,7 @@ import type {
   CodebuffToolCall,
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 export const handleAddMessage = (({
   previousToolCallFinished,
@@ -12,7 +12,7 @@ export const handleAddMessage = (({
 }: {
   previousToolCallFinished: Promise<void>
   toolCall: CodebuffToolCall<'add_message'>
-  getLatestState: () => { messages: CodebuffMessage[] }
+  getLatestState: () => { messages: Message[] }
 }): {
   result: Promise<CodebuffToolOutput<'add_message'>>
   state: {}
diff --git a/backend/src/tools/handlers/tool/find-files.ts b/backend/src/tools/handlers/tool/find-files.ts
index 554544ba9..03438a17e 100644
--- a/backend/src/tools/handlers/tool/find-files.ts
+++ b/backend/src/tools/handlers/tool/find-files.ts
@@ -18,7 +18,7 @@ import type {
   CodebuffToolCall,
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
 
@@ -40,7 +40,7 @@ export const handleFindFiles = ((params: {
     fingerprintId?: string
     userId?: string
     repoId?: string
-    messages?: CodebuffMessage[]
+    messages?: Message[]
   }
 }): { result: Promise<CodebuffToolOutput<'find_files'>>; state: {} } => {
   const {
@@ -169,7 +169,7 @@ async function uploadExpandedFileContextForTraining(
     messages,
     system,
   }: {
-    messages: CodebuffMessage[]
+    messages: Message[]
     system: string | Array<TextBlock>
   },
   fileContext: ProjectFileContext,
diff --git a/backend/src/tools/handlers/tool/read-files.ts b/backend/src/tools/handlers/tool/read-files.ts
index b2e681897..503983a7d 100644
--- a/backend/src/tools/handlers/tool/read-files.ts
+++ b/backend/src/tools/handlers/tool/read-files.ts
@@ -6,7 +6,7 @@ import type {
   CodebuffToolCall,
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
 
@@ -25,7 +25,7 @@ export const handleReadFiles = ((params: {
     userId?: string
     fingerprintId?: string
     repoId?: string
-    messages?: CodebuffMessage[]
+    messages?: Message[]
   }
 }): {
   result: Promise<CodebuffToolOutput<ToolName>>
diff --git a/backend/src/tools/handlers/tool/set-messages.ts b/backend/src/tools/handlers/tool/set-messages.ts
index d6e1eeec8..9054535ca 100644
--- a/backend/src/tools/handlers/tool/set-messages.ts
+++ b/backend/src/tools/handlers/tool/set-messages.ts
@@ -3,7 +3,7 @@ import type {
   CodebuffToolCall,
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 export const handleSetMessages = (({
   previousToolCallFinished,
@@ -12,7 +12,7 @@ export const handleSetMessages = (({
 }: {
   previousToolCallFinished: Promise<void>
   toolCall: CodebuffToolCall<'set_messages'>
-  getLatestState: () => { messages: CodebuffMessage[] }
+  getLatestState: () => { messages: Message[] }
 }): {
   result: Promise<CodebuffToolOutput<'set_messages'>>
   state: {}
diff --git a/backend/src/tools/handlers/tool/spawn-agent-inline.ts b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
index 2a4cc7ac4..0e6ec0073 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-inline.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
@@ -15,7 +15,7 @@ import type {
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type { AgentState } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
@@ -30,14 +30,14 @@ export const handleSpawnAgentInline = ((params: {
   userInputId: string
   writeToClient: (chunk: string | PrintModeEvent) => void
 
-  getLatestState: () => { messages: CodebuffMessage[] }
+  getLatestState: () => { messages: Message[] }
   state: {
     ws?: WebSocket
     fingerprintId?: string
     userId?: string
     agentTemplate?: AgentTemplate
     localAgentTemplates?: Record<string, AgentTemplate>
-    messages?: CodebuffMessage[]
+    messages?: Message[]
     agentState?: AgentState
   }
 }): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
index 615b9fd5c..ff9cdb538 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -7,8 +7,8 @@ import { logger } from '../../../util/logger'
 
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
 import type {
-  AssistantCodebuffMessage,
-  CodebuffMessage,
+  AssistantMessage,
+  Message,
 } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
@@ -30,7 +30,7 @@ export interface BaseSpawnState {
   userId?: string
   agentTemplate?: AgentTemplate
   localAgentTemplates?: Record<string, AgentTemplate>
-  messages?: CodebuffMessage[]
+  messages?: Message[]
   agentState?: AgentState
 }
 
@@ -38,7 +38,7 @@ export interface SpawnContext {
   fileContext: ProjectFileContext
   clientSessionId: string
   userInputId: string
-  getLatestState: () => { messages: CodebuffMessage[] }
+  getLatestState: () => { messages: Message[] }
 }
 
 /**
@@ -224,9 +224,7 @@ export function validateAgentInput(
 /**
  * Creates conversation history message for spawned agents
  */
-export function createConversationHistoryMessage(
-  messages: CodebuffMessage[],
-): CodebuffMessage {
+export function createConversationHistoryMessage(messages: Message[]): Message {
   // Filter out system messages from conversation history to avoid including parent's system prompt
   const messagesWithoutSystem = messages.filter(
     (message) => message.role !== 'system',
@@ -247,7 +245,7 @@ export function createConversationHistoryMessage(
 export function createAgentState(
   agentType: string,
   parentAgentState: AgentState,
-  messageHistory: CodebuffMessage[],
+  messageHistory: Message[],
 ): AgentState {
   const agentId = generateCompactId()
 
@@ -382,10 +380,10 @@ export async function formatAgentResult(
     | { errorMessage: string }
     | { structuredOutput: Record<string, any> | undefined }
     | {
-        lastMessage: AssistantCodebuffMessage['content']
+        lastMessage: AssistantMessage['content']
       }
     | {
-        allMessages: CodebuffMessage[]
+        allMessages: Message[]
       }
   )
 > {
@@ -403,8 +401,7 @@ export async function formatAgentResult(
   if (agentTemplate.outputMode === 'last_message') {
     const { agentState } = result
     const assistantMessages = agentState.messageHistory.filter(
-      (message): message is AssistantCodebuffMessage =>
-        message.role === 'assistant',
+      (message): message is AssistantMessage => message.role === 'assistant',
     )
     const lastAssistantMessage = assistantMessages[assistantMessages.length - 1]
     if (!lastAssistantMessage) {
diff --git a/backend/src/tools/handlers/tool/spawn-agents-async.ts b/backend/src/tools/handlers/tool/spawn-agents-async.ts
index b06d73775..1e67a2935 100644
--- a/backend/src/tools/handlers/tool/spawn-agents-async.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents-async.ts
@@ -20,7 +20,7 @@ import type {
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type { AgentState } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
@@ -36,7 +36,7 @@ export const handleSpawnAgentsAsync = ((params: {
   userInputId: string
   writeToClient: (chunk: string | PrintModeEvent) => void
 
-  getLatestState: () => { messages: CodebuffMessage[] }
+  getLatestState: () => { messages: Message[] }
   state: {
     ws?: WebSocket
     fingerprintId?: string
@@ -44,7 +44,7 @@ export const handleSpawnAgentsAsync = ((params: {
     agentTemplate?: AgentTemplate
     localAgentTemplates?: Record<string, AgentTemplate>
     sendSubagentChunk?: SendSubagentChunk
-    messages?: CodebuffMessage[]
+    messages?: Message[]
     agentState?: AgentState
   }
 }): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
@@ -105,7 +105,7 @@ export const handleSpawnAgentsAsync = ((params: {
 
         validateAgentInput(agentTemplate, agentType, prompt, params)
 
-        const subAgentMessages: CodebuffMessage[] = []
+        const subAgentMessages: Message[] = []
         if (agentTemplate.includeMessageHistory) {
           subAgentMessages.push(conversationHistoryMessage)
         }
diff --git a/backend/src/tools/handlers/tool/spawn-agents.ts b/backend/src/tools/handlers/tool/spawn-agents.ts
index 437da4f76..dba7f04d1 100644
--- a/backend/src/tools/handlers/tool/spawn-agents.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents.ts
@@ -16,7 +16,7 @@ import type {
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type { AgentState } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
@@ -40,7 +40,7 @@ export const handleSpawnAgents = ((params: {
   userInputId: string
   writeToClient: (chunk: string | PrintModeEvent) => void
 
-  getLatestState: () => { messages: CodebuffMessage[] }
+  getLatestState: () => { messages: Message[] }
   state: {
     ws?: WebSocket
     fingerprintId?: string
@@ -48,7 +48,7 @@ export const handleSpawnAgents = ((params: {
     agentTemplate?: AgentTemplate
     localAgentTemplates?: Record<string, AgentTemplate>
     sendSubagentChunk?: SendSubagentChunk
-    messages?: CodebuffMessage[]
+    messages?: Message[]
     agentState?: AgentState
   }
 }): { result: Promise<CodebuffToolOutput<ToolName>>; state: {} } => {
@@ -98,7 +98,7 @@ export const handleSpawnAgents = ((params: {
 
         validateAgentInput(agentTemplate, agentType, prompt, params)
 
-        const subAgentMessages: CodebuffMessage[] = []
+        const subAgentMessages: Message[] = []
         if (agentTemplate.includeMessageHistory) {
           subAgentMessages.push(conversationHistoryMessage)
         }
diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts
index 8b4fbe5ab..4b912a061 100644
--- a/backend/src/tools/handlers/tool/write-file.ts
+++ b/backend/src/tools/handlers/tool/write-file.ts
@@ -10,7 +10,7 @@ import type {
   CodebuffToolCall,
   CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 import type { WebSocket } from 'ws'
 
 type FileProcessingTools = 'write_file' | 'str_replace' | 'create_plan'
@@ -93,7 +93,7 @@ export const handleWriteFile = (({
     userId?: string
     fullResponse?: string
     prompt?: string
-    messages?: CodebuffMessage[]
+    messages?: Message[]
   } & OptionalFileProcessingState
 }): {
   result: Promise<CodebuffToolOutput<'write_file'>>
diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts
index 1a0ddbd56..583298386 100644
--- a/backend/src/tools/stream-parser.ts
+++ b/backend/src/tools/stream-parser.ts
@@ -11,13 +11,10 @@ import type { CustomToolCall } from './tool-executor'
 import type { AgentTemplate } from '../templates/types'
 import type { ToolName } from '@codebuff/common/tools/constants'
 import type { CodebuffToolCall } from '@codebuff/common/tools/list'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type {
-  AgentState,
-  Subgoal,
-  ToolResult,
-} from '@codebuff/common/types/session-state'
+import type { AgentState, Subgoal } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { ToolCallPart } from 'ai'
 import type { WebSocket } from 'ws'
@@ -40,7 +37,7 @@ export async function processStreamWithTools<T extends string>(options: {
   agentTemplate: AgentTemplate
   localAgentTemplates: Record<string, AgentTemplate>
   fileContext: ProjectFileContext
-  messages: CodebuffMessage[]
+  messages: Message[]
   agentState: AgentState
   agentContext: Record<string, Subgoal>
   onResponseChunk: (chunk: string | PrintModeEvent) => void
@@ -66,7 +63,7 @@ export async function processStreamWithTools<T extends string>(options: {
 
   const messages = [...options.messages]
 
-  const toolResults: ToolResult[] = []
+  const toolResults: ToolResultPart[] = []
   const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
   const { promise: streamDonePromise, resolve: resolveStreamDonePromise } =
     Promise.withResolvers<void>()
@@ -158,9 +155,10 @@ export async function processStreamWithTools<T extends string>(options: {
     ]),
     (toolName, error) => {
       toolResults.push({
+        type: 'tool-result',
         toolName,
         toolCallId: generateCompactId(),
-        output: { type: 'text', value: error },
+        output: [{ type: 'json', value: { errorMessage: error } }],
       })
     },
     onResponseChunk,
@@ -176,7 +174,7 @@ export async function processStreamWithTools<T extends string>(options: {
     fullResponseChunks.push(chunk)
   }
 
-  state.messages = buildArray<CodebuffMessage>([
+  state.messages = buildArray<Message>([
     ...expireMessages(state.messages, 'agentStep'),
     fullResponseChunks.length > 0 && {
       role: 'assistant' as const,
diff --git a/backend/src/util/__tests__/messages.test.ts b/backend/src/util/__tests__/messages.test.ts
index d417ed290..8adb978bd 100644
--- a/backend/src/util/__tests__/messages.test.ts
+++ b/backend/src/util/__tests__/messages.test.ts
@@ -11,14 +11,14 @@ import {
 import { trimMessagesToFitTokenLimit, messagesWithSystem } from '../messages'
 import * as tokenCounter from '../token-counter'
 
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 describe('messagesWithSystem', () => {
   it('prepends system message to array', () => {
     const messages = [
       { role: 'user', content: 'hello' },
       { role: 'assistant', content: 'hi' },
-    ] as CodebuffMessage[]
+    ] as Message[]
     const system = 'Be helpful'
 
     const result = messagesWithSystem(messages, system)
@@ -148,7 +148,7 @@ describe('trimMessagesToFitTokenLimit', () => {
         text: 'Another long message that should never be shortened because it has no tool calls in it at all',
       },
     },
-  ] as CodebuffMessage[]
+  ] as Message[]
 
   it('handles all features working together correctly', () => {
     const maxTotalTokens = 3000
@@ -245,7 +245,7 @@ describe('trimMessagesToFitTokenLimit', () => {
           content: 'Message 5 - keep me too!',
           keepDuringTruncation: true,
         },
-      ] as CodebuffMessage[]
+      ] as Message[]
 
       const result = trimMessagesToFitTokenLimit(messages, 0, 1000)
 
@@ -275,7 +275,7 @@ describe('trimMessagesToFitTokenLimit', () => {
           content: 'Short message 2',
           keepDuringTruncation: true,
         },
-      ] as CodebuffMessage[]
+      ] as Message[]
 
       const result = trimMessagesToFitTokenLimit(messages, 0, 10000)
 
@@ -291,7 +291,7 @@ describe('trimMessagesToFitTokenLimit', () => {
         { role: 'user', content: 'B'.repeat(1000) }, // Large message to be removed
         { role: 'user', content: 'C'.repeat(1000) }, // Large message to be removed
         { role: 'user', content: 'Keep this', keepDuringTruncation: true },
-      ] as CodebuffMessage[]
+      ] as Message[]
 
       const result = trimMessagesToFitTokenLimit(messages, 0, 1000)
 
@@ -321,7 +321,7 @@ describe('trimMessagesToFitTokenLimit', () => {
           keepDuringTruncation: true,
         },
         { role: 'user', content: 'C'.repeat(100) }, // Might be kept
-      ] as CodebuffMessage[]
+      ] as Message[]
 
       const result = trimMessagesToFitTokenLimit(messages, 0, 2000)
 
@@ -345,7 +345,7 @@ describe('trimMessagesToFitTokenLimit', () => {
         { role: 'user', content: 'B'.repeat(800) }, // Large message to force truncation
         { role: 'user', content: 'Keep 2', keepDuringTruncation: true },
         { role: 'user', content: 'C'.repeat(800) }, // Large message to force truncation
-      ] as CodebuffMessage[]
+      ] as Message[]
 
       const result = trimMessagesToFitTokenLimit(messages, 0, 500)
 
diff --git a/backend/src/util/messages.ts b/backend/src/util/messages.ts
index 9511a2160..3b54ff2e0 100644
--- a/backend/src/util/messages.ts
+++ b/backend/src/util/messages.ts
@@ -2,18 +2,26 @@ import { AssertionError } from 'assert'
 
 import { buildArray } from '@codebuff/common/util/array'
 import { closeXml } from '@codebuff/common/util/xml'
+import { cloneDeep, isEqual } from 'lodash'
 
 import { logger } from './logger'
 import { simplifyTerminalCommandResults } from './simplify-tool-results'
 import { countTokensJson } from './token-counter'
 
 import type { System } from '../llm-apis/claude'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+  CodebuffToolMessage,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type {
+  Message,
+  ToolMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
 
 export function messagesWithSystem(
-  messages: CodebuffMessage[],
+  messages: Message[],
   system: System,
-): CodebuffMessage[] {
+): Message[] {
   return [
     {
       role: 'system',
@@ -53,9 +61,7 @@ export function isSystemMessage(str: string): boolean {
   return str.startsWith('<system>') && str.endsWith(closeXml('system'))
 }
 
-export function castAssistantMessage(
-  message: CodebuffMessage,
-): CodebuffMessage | null {
+export function castAssistantMessage(message: Message): Message | null {
   if (message.role !== 'assistant') {
     return message
   }
@@ -87,25 +93,19 @@ export function castAssistantMessage(
 // Number of terminal command outputs to keep in full form before simplifying
 const numTerminalCommandsToKeep = 5
 
-/**
- * Helper function to simplify terminal command output while preserving some recent ones
- * @param text - Terminal output text to potentially simplify
- * @param numKept - Number of terminal outputs already kept in full form
- * @returns Object containing simplified result and updated count of kept outputs
- */
 function simplifyTerminalHelper(
-  text: string,
+  toolResult: CodebuffToolOutput<'run_terminal_command'>,
   numKept: number,
-): { result: string; numKept: number } {
-  const simplifiedText = simplifyTerminalCommandResults(text)
+): { result: CodebuffToolOutput<'run_terminal_command'>; numKept: number } {
+  const simplified = simplifyTerminalCommandResults(toolResult)
 
   // Keep the full output for the N most recent commands
-  if (numKept < numTerminalCommandsToKeep && simplifiedText !== text) {
-    return { result: text, numKept: numKept + 1 }
+  if (numKept < numTerminalCommandsToKeep && !isEqual(simplified, toolResult)) {
+    return { result: toolResult, numKept: numKept + 1 }
   }
 
   return {
-    result: simplifiedText,
+    result: simplified,
     numKept,
   }
 }
@@ -115,7 +115,7 @@ const shortenedMessageTokenFactor = 0.5
 const replacementMessage = {
   role: 'user',
   content: asSystemMessage('Previous message(s) omitted due to length'),
-} satisfies CodebuffMessage
+} satisfies Message
 
 /**
  * Trims messages from the beginning to fit within token limits while preserving
@@ -132,10 +132,10 @@ const replacementMessage = {
  * @returns Trimmed array of messages that fits within token limit
  */
 export function trimMessagesToFitTokenLimit(
-  messages: CodebuffMessage[],
+  messages: Message[],
   systemTokens: number,
   maxTotalTokens: number = 190_000,
-): CodebuffMessage[] {
+): Message[] {
   const maxMessageTokens = maxTotalTokens - systemTokens
 
   // Check if we're already under the limit
@@ -145,75 +145,37 @@ export function trimMessagesToFitTokenLimit(
     return messages
   }
 
-  const shortenedMessages: CodebuffMessage[] = []
+  const shortenedMessages: Message[] = []
   let numKept = 0
 
   // Process messages from newest to oldest
   for (let i = messages.length - 1; i >= 0; i--) {
     const m = messages[i]
-    let message: CodebuffMessage
-    if (m.role === 'tool' || m.role === 'system') {
-      message = messages[i]
-    } else if (m.role === 'user') {
-      let newContent: typeof m.content
-
-      // Handle string content (usually terminal output)
-      if (typeof m.content === 'string') {
-        const result = simplifyTerminalHelper(m.content, numKept)
-        message = { role: m.role, content: result.result }
-        numKept = result.numKept
-      } else {
-        // Handle array content (mixed content types)
-        newContent = []
-        // Process content parts from newest to oldest
-        for (let j = m.content.length - 1; j >= 0; j--) {
-          const messagePart = m.content[j]
-          // Preserve non-text content (i.e. images)
-          if (messagePart.type !== 'text') {
-            newContent.push(messagePart)
-            continue
-          }
-
-          const result = simplifyTerminalHelper(messagePart.text, numKept)
-          newContent.push({ ...messagePart, text: result.result })
-          numKept = result.numKept
-        }
-        newContent.reverse()
-        message = { ...m, content: newContent }
+    if (m.role === 'system' || m.role === 'user' || m.role === 'assistant') {
+      shortenedMessages.push(m)
+    } else if (m.role === 'tool') {
+      if (m.content.toolName !== 'run_terminal_command') {
+        shortenedMessages.push(m)
+        continue
       }
-    } else if (m.role === 'assistant') {
-      let newContent: typeof m.content
-
-      // Handle string content (usually terminal output)
-      if (typeof m.content === 'string') {
-        const result = simplifyTerminalHelper(m.content, numKept)
-        message = { role: m.role, content: result.result }
-        numKept = result.numKept
-      } else {
-        // Handle array content (mixed content types)
-        newContent = []
-        // Process content parts from newest to oldest
-        for (let j = m.content.length - 1; j >= 0; j--) {
-          const messagePart = m.content[j]
-          // Preserve non-text content (i.e. images)
-          if (messagePart.type !== 'text') {
-            newContent.push(messagePart)
-            continue
-          }
 
-          const result = simplifyTerminalHelper(messagePart.text, numKept)
-          newContent.push({ ...messagePart, text: result.result })
-          numKept = result.numKept
-        }
-        newContent.reverse()
-        message = { ...m, content: newContent }
-      }
+      const terminalResultMessage = cloneDeep(
+        m,
+      ) as CodebuffToolMessage<'run_terminal_command'>
+
+      const result = simplifyTerminalHelper(
+        terminalResultMessage.content.output,
+        numKept,
+      )
+      terminalResultMessage.content.output = result.result
+      numKept = result.numKept
+
+      shortenedMessages.push(terminalResultMessage)
     } else {
       m satisfies never
-      throw new AssertionError({ message: 'Not a valid role' })
+      const mAny = m as any
+      throw new AssertionError({ message: `Not a valid role: ${mAny.role}` })
     }
-
-    shortenedMessages.push(message)
   }
   shortenedMessages.reverse()
 
@@ -225,7 +187,7 @@ export function trimMessagesToFitTokenLimit(
     (maxMessageTokens - requiredTokens) * (1 - shortenedMessageTokenFactor)
 
   const placeholder = 'deleted'
-  const filteredMessages: (CodebuffMessage | typeof placeholder)[] = []
+  const filteredMessages: (Message | typeof placeholder)[] = []
   for (const message of shortenedMessages) {
     if (removedTokens >= tokensToRemove || message.keepDuringTruncation) {
       filteredMessages.push(message)
@@ -247,9 +209,9 @@ export function trimMessagesToFitTokenLimit(
 }
 
 export function getMessagesSubset(
-  messages: CodebuffMessage[],
+  messages: Message[],
   otherTokens: number,
-): CodebuffMessage[] {
+): Message[] {
   const messagesSubset = trimMessagesToFitTokenLimit(messages, otherTokens)
 
   // Remove cache_control from all messages
@@ -275,9 +237,9 @@ export function getMessagesSubset(
 }
 
 export function expireMessages(
-  messages: CodebuffMessage[],
+  messages: Message[],
   endOf: 'agentStep' | 'userPrompt',
-): CodebuffMessage[] {
+): Message[] {
   return messages.filter((m) => {
     // Keep messages with no timeToLive
     if (m.timeToLive === undefined) return true
@@ -289,3 +251,59 @@ export function expireMessages(
     return true
   })
 }
+
+export function getEditedFiles(messages: Message[]): string[] {
+  return buildArray(
+    messages
+      .filter(
+        (
+          m,
+        ): m is ToolMessage & {
+          content: { toolName: 'create_plan' | 'str_replace' | 'write_file' }
+        } => {
+          return (
+            m.role === 'tool' &&
+            (m.content.toolName === 'create_plan' ||
+              m.content.toolName === 'str_replace' ||
+              m.content.toolName === 'write_file')
+          )
+        },
+      )
+      .map((m) => {
+        const fileInfo = (
+          m as CodebuffToolMessage<'create_plan' | 'str_replace' | 'write_file'>
+        ).content.output[0].value
+        if ('errorMessage' in fileInfo) {
+          return null
+        }
+        return fileInfo.file
+      }),
+  )
+}
+
+export function getPreviouslyReadFiles(messages: Message[]): {
+  path: string
+  content: string
+  referencedBy?: Record<string, string[]>
+}[] {
+  return buildArray(
+    messages
+      .filter(
+        (
+          m,
+        ): m is ToolMessage & {
+          content: { toolName: 'read_files' }
+        } => m.role === 'tool' && m.content.toolName === 'read_files',
+      )
+      .map((m) => {
+        return (
+          m as CodebuffToolMessage<'read_files'>
+        ).content.output[0].value.map((file) => {
+          if ('contentOmittedForLength' in file) {
+            return undefined
+          }
+          return file
+        })
+      }),
+  )
+}
diff --git a/backend/src/util/parse-tool-call-xml.ts b/backend/src/util/parse-tool-call-xml.ts
index 1c8a109ab..ff4fc2f7b 100644
--- a/backend/src/util/parse-tool-call-xml.ts
+++ b/backend/src/util/parse-tool-call-xml.ts
@@ -1,61 +1,3 @@
-import { toContentString } from '@codebuff/common/util/messages'
-import { generateCompactId } from '@codebuff/common/util/string'
-import { closeXml } from '@codebuff/common/util/xml'
-
-import type { StringToolResultPart } from '@codebuff/common/tools/constants'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
-
-/**
- * Parses XML content for a tool call into a structured object with only string values.
- * Example input:
- * <type>click</type>
- * <selector>#button</selector>
- * <timeout>5000</timeout>
- */
-export function parseToolCallXml(xmlString: string): Record<string, string> {
-  if (!xmlString.trim()) return {}
-
-  const result: Record<string, string> = {}
-  const tagPattern = /<(\w+)>([\s\S]*?)<\/\1>/g
-  let match
-
-  while ((match = tagPattern.exec(xmlString)) !== null) {
-    const [_, key, rawValue] = match
-
-    // Remove leading/trailing whitespace but preserve internal whitespace
-    const value = rawValue.replace(/^\s+|\s+$/g, '')
-
-    // Assign all values as strings
-    result[key] = value
-  }
-
-  return result
-}
-
-export const parseToolResults = (xmlString: string): StringToolResultPart[] => {
-  if (!xmlString.trim()) return []
-
-  const results: StringToolResultPart[] = []
-  const toolResultPattern = /<tool_result>([\s\S]*?)<\/tool_result>/g
-  let match
-
-  while ((match = toolResultPattern.exec(xmlString)) !== null) {
-    const [_, toolResultContent] = match
-    const toolMatch = /<tool>(.*?)<\/tool>/g.exec(toolResultContent)
-    const resultMatch = /<result>([\s\S]*?)<\/result>/g.exec(toolResultContent)
-
-    if (toolMatch && resultMatch) {
-      results.push({
-        toolName: toolMatch[1],
-        toolCallId: generateCompactId(),
-        output: { type: 'text', value: resultMatch[1].trim() },
-      })
-    }
-  }
-
-  return results
-}
-
 export interface TokenCallerMap {
   [filePath: string]: {
     [token: string]: string[] // Array of files that call this token
@@ -66,36 +8,11 @@ export function renderReadFilesResult(
   files: { path: string; content: string }[],
   tokenCallers: TokenCallerMap,
 ) {
-  return files
-    .map((file) => {
-      const referencedBy =
-        Object.entries(tokenCallers[file.path] ?? {})
-          .filter(([_, callers]) => callers.length > 0)
-          .map(([token, callers]) => `${token}: ${callers.join(', ')}`)
-          .join('\n') || 'None'
-      return `<read_file>\n<path>${file.path}${closeXml('path')}\n<content>${file.content}${closeXml('content')}\n<referenced_by>${referencedBy}${closeXml('referenced_by')}\n${closeXml('read_file')}`
-    })
-    .join('\n\n')
-}
-
-export function parseReadFilesResult(
-  xmlString: string,
-): { path: string; content: string; referencedBy: string }[] {
-  const files: { path: string; content: string; referencedBy: string }[] = []
-  const filePattern =
-    /<read_file>\s*<path>([^<>]+)<\/path>\s*<content>([\s\S]*?)<\/content>\s*<referenced_by>([\s\S]*?)<\/referenced_by>\s*<\/read_file>/g
-  let match
-
-  while ((match = filePattern.exec(xmlString)) !== null) {
-    const [, filePath, content, referencedBy] = match
-    if (filePath.trim()) {
-      files.push({ path: filePath.trim(), content, referencedBy })
+  return files.map((file) => {
+    return {
+      path: file.path,
+      content: file.content,
+      referencedBy: tokenCallers[file.path] ?? {},
     }
-  }
-
-  return files
-}
-
-export function isToolResult(message: CodebuffMessage): boolean {
-  return toContentString(message).includes('<tool_result')
+  })
 }
diff --git a/backend/src/util/simplify-tool-results.ts b/backend/src/util/simplify-tool-results.ts
index e0a17ac01..46c0f7b75 100644
--- a/backend/src/util/simplify-tool-results.ts
+++ b/backend/src/util/simplify-tool-results.ts
@@ -1,120 +1,36 @@
-import { renderToolResults } from '@codebuff/common/tools/utils'
+import { cloneDeep } from 'lodash'
 
-import { parseReadFilesResult, parseToolResults } from './parse-tool-call-xml'
+import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
 
-import type { ToolResult } from '@codebuff/common/types/session-state'
-
-/**
- * Helper function to simplify tool results of a specific type while preserving others.
- * Extracts results of the specified tool type, applies a simplification function to them,
- * and combines them back with other unchanged tool results.
- * @param messageContent - The message content containing tool results, either as a string or array
- * @param toolName - The name of the tool whose results should be simplified
- * @param simplifyFn - Function to apply to each matching tool result
- * @returns The message content with simplified results for the specified tool type
- */
-function simplifyToolResults(
-  messageContent: string | object[],
-  toolName: string,
-  simplifyFn: (result: ToolResult) => ToolResult,
-): string {
-  const resultsStr =
-    typeof messageContent === 'string'
-      ? messageContent
-      : ((messageContent[messageContent.length - 1] as any)?.text as string) ??
-        ''
-  if (!resultsStr.includes('<tool_result')) {
-    return resultsStr
-  }
-
-  const toolResults = parseToolResults(resultsStr)
-  const targetResults = toolResults.filter(
-    (result) => result.toolName === toolName,
-  )
-
-  if (targetResults.length === 0) {
-    return resultsStr
-  }
-
-  // Keep non-target results unchanged
-  const otherResults = toolResults.filter(
-    (result) => result.toolName !== toolName,
-  )
-
-  // Create simplified results
-  const simplifiedResults = targetResults.map(simplifyFn)
-
-  // Combine both types of results
-  return renderToolResults([...simplifiedResults, ...otherResults])
-}
-
-/**
- * Simplifies read_files tool results to show only file paths while preserving other tool results.
- * Useful for making tool result output more concise in message history.
- * @param messageContent - The message content containing tool results
- * @returns The message content with simplified read_files results showing only paths
- */
 export function simplifyReadFileResults(
-  messageContent: string | object[],
-): string {
-  return simplifyToolResults(
-    messageContent,
-    'read_files',
-    simplifyReadFileToolResult,
-  )
+  messageContent: CodebuffToolOutput<'read_files'>,
+): CodebuffToolOutput<'read_files'> {
+  return [
+    {
+      type: 'json',
+      value: cloneDeep(messageContent[0]).value.map(({ path }) => {
+        return {
+          path,
+          contentOmittedForLength: true,
+        }
+      }),
+    },
+  ]
 }
 
-/**
- * Simplifies terminal command tool results to show a brief summary while preserving other tool results.
- * Useful for making tool result output more concise in message history.
- * @param messageContent - The message content containing tool results
- * @returns The message content with simplified terminal command results
- */
 export function simplifyTerminalCommandResults(
-  messageContent: string | object[],
-): string {
-  return simplifyToolResults(
-    messageContent,
-    'run_terminal_command',
-    simplifyTerminalCommandToolResult,
-  )
-}
-
-/**
- * Simplifies a single read_files tool result by extracting just the file paths.
- * @param toolResult - The read_files tool result to simplify
- * @returns A new tool result with just the list of file paths that were read
- */
-export function simplifyReadFileToolResult(toolResult: ToolResult): ToolResult {
-  const fileBlocks = parseReadFilesResult(toolResult.output.value)
-  const filePaths = fileBlocks.map((block) => block.path)
-  return {
-    toolCallId: toolResult.toolCallId,
-    toolName: 'read_files',
-    output: {
-      type: 'text',
-      value: `Read the following files: ${filePaths.join('\n')}`,
+  messageContent: CodebuffToolOutput<'run_terminal_command'>,
+): CodebuffToolOutput<'run_terminal_command'> {
+  const { command, message, exitCode } = cloneDeep(messageContent)[0].value
+  return [
+    {
+      type: 'json',
+      value: {
+        command,
+        message,
+        stdoutOmittedForLength: true,
+        ...(exitCode !== undefined && { exitCode }),
+      },
     },
-  }
-}
-
-/**
- * Simplifies a single terminal command tool result by replacing output with a brief message.
- * @param toolResult - The terminal command tool result to simplify
- * @returns A new tool result with shortened output if the original was long
- */
-export function simplifyTerminalCommandToolResult(
-  toolResult: ToolResult,
-): ToolResult {
-  const shortenedResultCandidate = '[Output omitted]'
-  return shortenedResultCandidate.length < toolResult.output.value.length
-    ? {
-        toolCallId: toolResult.toolCallId,
-        toolName: 'run_terminal_command',
-        output: {
-          type: 'text',
-          value: shortenedResultCandidate,
-        },
-      }
-    : toolResult
+  ]
 }
diff --git a/backend/src/websockets/websocket-action.ts b/backend/src/websockets/websocket-action.ts
index 0f3e41bd8..c9656fea5 100644
--- a/backend/src/websockets/websocket-action.ts
+++ b/backend/src/websockets/websocket-action.ts
@@ -30,6 +30,7 @@ import type {
   ServerAction,
   UsageResponse,
 } from '@codebuff/common/actions'
+import type { ToolResultOutput } from '@codebuff/common/types/messages/content-part'
 import type { ClientMessage } from '@codebuff/common/websockets/websocket-schema'
 import type { WebSocket } from 'ws'
 
@@ -421,12 +422,7 @@ export async function requestToolCall(
   toolName: string,
   input: Record<string, any> & { timeout_seconds?: number },
 ): Promise<{
-  success: boolean
-  output?: {
-    type: 'text'
-    value: string
-  }
-  error?: string
+  output: ToolResultOutput[]
 }> {
   return new Promise((resolve) => {
     const requestId = generateCompactId()
@@ -443,8 +439,14 @@ export async function requestToolCall(
             () => {
               unsubscribe()
               resolve({
-                success: false,
-                error: `Tool call '${toolName}' timed out after ${timeoutInSeconds}s`,
+                output: [
+                  {
+                    type: 'json',
+                    value: {
+                      errorMessage: `Tool call '${toolName}' timed out after ${timeoutInSeconds}s`,
+                    },
+                  },
+                ],
               })
             },
             timeoutInSeconds * 1000 + 5000, // Convert to ms and add a small buffer
@@ -456,9 +458,7 @@ export async function requestToolCall(
         clearTimeout(timeoutHandle)
         unsubscribe()
         resolve({
-          success: action.success,
           output: action.output,
-          error: action.error,
         })
       }
     })
diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts
index fca91e656..8c705396e 100644
--- a/common/src/tools/list.ts
+++ b/common/src/tools/list.ts
@@ -24,6 +24,7 @@ import { webSearchParams } from './params/tool/web-search'
 import { writeFileParams } from './params/tool/write-file'
 
 import type { ToolName, $ToolParams } from './constants'
+import type { ToolMessage } from '../types/messages/codebuff-message'
 import type {
   ToolCallPart,
   ToolResultPart,
@@ -73,9 +74,19 @@ export type CodebuffToolResult<T extends ToolName = ToolName> = {
   } & Omit<ToolResultPart, 'type'>
 }[T]
 
+export type CodebuffToolMessage<T extends ToolName = ToolName> = ToolMessage &
+  {
+    [K in ToolName]: {
+      toolName: K
+      content: {
+        output: CodebuffToolOutput<K>
+      }
+    }
+  }[T]
+
 // Tool call to send to client
 export type ClientToolName = (typeof clientToolNames)[number]
-const clientToolCallSchema = z.discriminatedUnion('toolName', [
+export const clientToolCallSchema = z.discriminatedUnion('toolName', [
   z.object({
     toolName: z.literal('browser_logs'),
     input: $toolParams.browser_logs.parameters,
diff --git a/common/src/tools/params/tool/read-files.ts b/common/src/tools/params/tool/read-files.ts
index 679875c3a..edf17f61b 100644
--- a/common/src/tools/params/tool/read-files.ts
+++ b/common/src/tools/params/tool/read-files.ts
@@ -2,11 +2,17 @@ import z from 'zod/v4'
 
 import type { $ToolParams } from '../../constants'
 
-export const fileContentsSchema = z.object({
-  path: z.string(),
-  content: z.string(),
-  referencedBy: z.record(z.string(), z.string().array()).optional(),
-})
+export const fileContentsSchema = z.union([
+  z.object({
+    path: z.string(),
+    content: z.string(),
+    referencedBy: z.record(z.string(), z.string().array()).optional(),
+  }),
+  z.object({
+    path: z.string(),
+    contentOmittedForLength: z.literal(true),
+  }),
+])
 
 const toolName = 'read_files'
 const endsAgentStep = true
diff --git a/common/src/tools/params/tool/run-file-change-hooks.ts b/common/src/tools/params/tool/run-file-change-hooks.ts
index 9a76e970b..f550ffe5a 100644
--- a/common/src/tools/params/tool/run-file-change-hooks.ts
+++ b/common/src/tools/params/tool/run-file-change-hooks.ts
@@ -19,11 +19,18 @@ export const runFileChangeHooksParams = {
   outputs: z.tuple([
     z.object({
       type: z.literal('json'),
-      value: terminalCommandOutputSchema.and(
-        z.object({
-          hookName: z.string(),
-        }),
-      ),
+      value: z
+        .union([
+          terminalCommandOutputSchema.and(
+            z.object({
+              hookName: z.string(),
+            }),
+          ),
+          z.object({
+            errorMessage: z.string(),
+          }),
+        ])
+        .array(),
     }),
   ]),
 } satisfies $ToolParams
diff --git a/common/src/tools/params/tool/run-terminal-command.ts b/common/src/tools/params/tool/run-terminal-command.ts
index fbacc0591..bd598e919 100644
--- a/common/src/tools/params/tool/run-terminal-command.ts
+++ b/common/src/tools/params/tool/run-terminal-command.ts
@@ -2,13 +2,33 @@ import z from 'zod/v4'
 
 import type { $ToolParams } from '../../constants'
 
-export const terminalCommandOutputSchema = z.object({
-  command: z.string(),
-  startingCwd: z.string().optional(),
-  message: z.string(),
-  stdout: z.string(),
-  exitCode: z.number().optional(),
-})
+export const terminalCommandOutputSchema = z.union([
+  z.object({
+    command: z.string(),
+    startingCwd: z.string().optional(),
+    message: z.string().optional(),
+    stderr: z.string().optional(),
+    stdout: z.string().optional(),
+    exitCode: z.number().optional(),
+  }),
+  z.object({
+    command: z.string(),
+    startingCwd: z.string().optional(),
+    message: z.string().optional(),
+    stderr: z.string().optional(),
+    stdoutOmittedForLength: z.literal(true),
+    exitCode: z.number().optional(),
+  }),
+  z.object({
+    command: z.string(),
+    processId: z.number(),
+    backgroundProcessStatus: z.enum(['running', 'completed', 'error']),
+  }),
+  z.object({
+    command: z.string(),
+    errorMessage: z.string(),
+  }),
+])
 
 const toolName = 'run_terminal_command'
 const endsAgentStep = true
diff --git a/common/src/types/messages/codebuff-message.ts b/common/src/types/messages/codebuff-message.ts
index e8a109ff6..97b9fdc1a 100644
--- a/common/src/types/messages/codebuff-message.ts
+++ b/common/src/types/messages/codebuff-message.ts
@@ -18,15 +18,15 @@ const auxiliaryDataSchema = z.object({
   keepDuringTruncation: z.boolean().optional(),
 })
 
-export const systemCodebuffMessageSchema = z
+export const systemMessageSchema = z
   .object({
     role: z.literal('system'),
     content: z.string(),
   })
   .and(auxiliaryDataSchema)
-export type SystemCodebuffMessage = z.infer<typeof systemCodebuffMessageSchema>
+export type SystemMessage = z.infer<typeof systemMessageSchema>
 
-export const userCodebuffMessageSchema = z
+export const userMessageSchema = z
   .object({
     role: z.literal('user'),
     content: z.union([
@@ -35,9 +35,9 @@ export const userCodebuffMessageSchema = z
     ]),
   })
   .and(auxiliaryDataSchema)
-export type UserCodebuffMessage = z.infer<typeof userCodebuffMessageSchema>
+export type UserMessage = z.infer<typeof userMessageSchema>
 
-export const assistantCodebuffMessageSchema = z
+export const assistantMessageSchema = z
   .object({
     role: z.literal('assistant'),
     content: z.union([
@@ -48,24 +48,22 @@ export const assistantCodebuffMessageSchema = z
     ]),
   })
   .and(auxiliaryDataSchema)
-export type AssistantCodebuffMessage = z.infer<
-  typeof assistantCodebuffMessageSchema
->
+export type AssistantMessage = z.infer<typeof assistantMessageSchema>
 
-export const toolCodebuffMessageSchema = z
+export const toolMessageSchema = z
   .object({
     role: z.literal('tool'),
     content: toolResultPartSchema,
   })
   .and(auxiliaryDataSchema)
-export type ToolCodebuffMessage = z.infer<typeof toolCodebuffMessageSchema>
+export type ToolMessage = z.infer<typeof toolMessageSchema>
 
-export const codebuffMessageSchema = z
+export const messageSchema = z
   .union([
-    systemCodebuffMessageSchema,
-    userCodebuffMessageSchema,
-    assistantCodebuffMessageSchema,
-    toolCodebuffMessageSchema,
+    systemMessageSchema,
+    userMessageSchema,
+    assistantMessageSchema,
+    toolMessageSchema,
   ])
   .and(
     z.object({
@@ -76,4 +74,4 @@ export const codebuffMessageSchema = z
       keepDuringTruncation: z.boolean().optional(),
     }),
   )
-export type CodebuffMessage = z.infer<typeof codebuffMessageSchema>
+export type Message = z.infer<typeof messageSchema>
diff --git a/common/src/types/session-state.ts b/common/src/types/session-state.ts
index f6cf0fbef..43ea8707b 100644
--- a/common/src/types/session-state.ts
+++ b/common/src/types/session-state.ts
@@ -2,9 +2,9 @@ import { z } from 'zod/v4'
 
 import { MAX_AGENT_STEPS_DEFAULT } from '../constants/agents'
 import { ProjectFileContextSchema } from '../util/file'
-import { codebuffMessageSchema } from './messages/codebuff-message'
+import { messageSchema } from './messages/codebuff-message'
 
-import type { CodebuffMessage } from './messages/codebuff-message'
+import type { Message } from './messages/codebuff-message'
 import type { ProjectFileContext } from '../util/file'
 
 export const toolCallSchema = z.object({
@@ -29,7 +29,7 @@ export const AgentStateSchema: z.ZodType<{
   agentType: AgentTemplateType | null
   agentContext: Record<string, Subgoal>
   subagents: AgentState[]
-  messageHistory: CodebuffMessage[]
+  messageHistory: Message[]
   stepsRemaining: number
   creditsUsed: number
   output?: Record<string, any>
@@ -40,7 +40,7 @@ export const AgentStateSchema: z.ZodType<{
     agentType: z.string().nullable(),
     agentContext: z.record(z.string(), subgoalSchema),
     subagents: AgentStateSchema.array(),
-    messageHistory: codebuffMessageSchema.array(),
+    messageHistory: messageSchema.array(),
     stepsRemaining: z.number(),
     creditsUsed: z.number().default(0),
     output: z.record(z.string(), z.any()).optional(),
diff --git a/common/src/util/messages.ts b/common/src/util/messages.ts
index 8a3df1630..67229b4e6 100644
--- a/common/src/util/messages.ts
+++ b/common/src/util/messages.ts
@@ -4,11 +4,11 @@ import { buildArray } from './array'
 import { getToolCallString } from '../tools/utils'
 
 import type {
-  AssistantCodebuffMessage,
-  CodebuffMessage,
-  SystemCodebuffMessage,
-  ToolCodebuffMessage,
-  UserCodebuffMessage,
+  AssistantMessage,
+  Message,
+  SystemMessage,
+  ToolMessage,
+  UserMessage,
 } from '../types/messages/codebuff-message'
 import type { ProviderMetadata } from '../types/messages/provider-metadata'
 import type { ModelMessage } from 'ai'
@@ -83,18 +83,18 @@ type NonStringContent<Message extends { content: any }> = Omit<
 }
 
 function userToCodebuffMessage(
-  message: Omit<UserCodebuffMessage, 'content'> & {
-    content: Exclude<UserCodebuffMessage['content'], string>[number]
+  message: Omit<UserMessage, 'content'> & {
+    content: Exclude<UserMessage['content'], string>[number]
   },
-): NonStringContent<UserCodebuffMessage> {
+): NonStringContent<UserMessage> {
   return { ...message, content: [message.content] }
 }
 
 function assistantToCodebuffMessage(
-  message: Omit<AssistantCodebuffMessage, 'content'> & {
-    content: Exclude<AssistantCodebuffMessage['content'], string>[number]
+  message: Omit<AssistantMessage, 'content'> & {
+    content: Exclude<AssistantMessage['content'], string>[number]
   },
-): NonStringContent<AssistantCodebuffMessage> {
+): NonStringContent<AssistantMessage> {
   if (message.content.type === 'tool-call') {
     return {
       ...message,
@@ -114,18 +114,13 @@ function assistantToCodebuffMessage(
 }
 
 function toolToCodebuffMessage(
-  message: Omit<ToolCodebuffMessage, 'content'> & {
-    content: Exclude<ToolCodebuffMessage['content'], string>[number]
-  },
-): Nested<
-  | NonStringContent<UserCodebuffMessage>
-  | NonStringContent<AssistantCodebuffMessage>
-> {
-  return message.content.output.value.map((o) => {
+  message: ToolMessage,
+): Nested<NonStringContent<UserMessage> | NonStringContent<AssistantMessage>> {
+  return message.content.output.map((o) => {
     if (o.type === 'json') {
       const toolResult = {
-        tool_name: message.content.toolName,
-        id: message.content.toolCallId,
+        toolName: message.content.toolName,
+        toolCallId: message.content.toolCallId,
         output: o.value,
       }
       return {
@@ -137,14 +132,14 @@ function toolToCodebuffMessage(
             text: `<tool_result>\n${JSON.stringify(toolResult, null, 2)}\n</tool_result>`,
           },
         ],
-      } satisfies NonStringContent<UserCodebuffMessage>
+      } satisfies NonStringContent<UserMessage>
     }
     if (o.type === 'media') {
       return {
         ...message,
         role: 'user',
         content: [{ type: 'file', data: o.data, mediaType: o.mediaType }],
-      } satisfies NonStringContent<UserCodebuffMessage>
+      } satisfies NonStringContent<UserMessage>
     }
     o satisfies never
     const oAny = o as any
@@ -153,11 +148,11 @@ function toolToCodebuffMessage(
 }
 
 function convertToolMessages(
-  message: CodebuffMessage,
+  message: Message,
 ): Nested<
-  | SystemCodebuffMessage
-  | NonStringContent<UserCodebuffMessage>
-  | NonStringContent<AssistantCodebuffMessage>
+  | SystemMessage
+  | NonStringContent<UserMessage>
+  | NonStringContent<AssistantMessage>
 > {
   if (message.role === 'system') {
     return message
@@ -195,19 +190,14 @@ function convertToolMessages(
     const messageAny = message as any
     throw new Error(`Invalid message role: ${messageAny.role}`)
   }
-  return message.content.map((c) => {
-    return toolToCodebuffMessage({
-      ...message,
-      content: c,
-    })
-  })
+  return toolToCodebuffMessage(message)
 }
 
 export function convertCbToModelMessages({
   messages,
   includeCacheControl = true,
 }: {
-  messages: CodebuffMessage[]
+  messages: Message[]
   includeCacheControl?: boolean
 }): ModelMessage[] {
   const noToolMessages = buildArray(messages.map((m) => convertToolMessages(m)))
diff --git a/npm-app/src/background-process-manager.ts b/npm-app/src/background-process-manager.ts
index d610623ce..c67663d2c 100644
--- a/npm-app/src/background-process-manager.ts
+++ b/npm-app/src/background-process-manager.ts
@@ -11,16 +11,15 @@ import path from 'path'
 import process from 'process'
 
 import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
-import { buildArray } from '@codebuff/common/util/array'
 import { truncateStringWithMessage } from '@codebuff/common/util/string'
-import { closeXml } from '@codebuff/common/util/xml'
 import { gray, red } from 'picocolors'
 import { z } from 'zod/v4'
 
 import { CONFIG_DIR } from './credentials'
 import { logger } from './utils/logger'
 
-import type { ToolResult } from '@codebuff/common/types/session-state'
+import type { JSONObject } from '@codebuff/common/types/json'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type {
   ChildProcessByStdio,
   ChildProcessWithoutNullStreams,
@@ -88,67 +87,81 @@ function getOutputWithContext(
 /**
  * Formats a single background process's info into a string
  */
-export function getBackgroundProcessInfoString(
-  info: BackgroundProcessInfo,
-): string {
+export function getBackgroundProcessUpdate(info: BackgroundProcessInfo) {
   const newStdout = info.stdoutBuffer
     .join('')
     .slice(info.lastReportedStdoutLength)
+  info.lastReportedStdoutLength = newStdout.length
   const newStderr = info.stderrBuffer
     .join('')
     .slice(info.lastReportedStderrLength)
+  info.lastReportedStderrLength = newStderr.length
 
   // Only report finished processes if there are changes
+  const newStatus = info.status
   if (
-    info.status !== 'running' &&
+    newStatus !== 'running' &&
     !newStdout &&
     !newStderr &&
-    info.status === info.lastReportedStatus
+    newStatus === info.lastReportedStatus
   ) {
-    return ''
+    return null
   }
+  info.lastReportedStatus = newStatus
 
   // Calculate duration in milliseconds
   const duration = info.endTime
     ? info.endTime - info.startTime
     : Date.now() - info.startTime
 
-  return buildArray(
-    '<background_process>',
-    `<process_id>${info.pid}${closeXml('process_id')}`,
-    `<command>${info.command}${closeXml('command')}`,
-    `<start_time_utc>${new Date(info.startTime).toISOString()}${closeXml('start_time_utc')}`,
-    `<duration_ms>${duration}${closeXml('duration_ms')}`,
-    newStdout &&
-      `<stdout>${truncateStringWithMessage({
-        str: getOutputWithContext(newStdout, info.lastReportedStdoutLength),
-        maxLength: COMMAND_OUTPUT_LIMIT,
-        remove: 'START',
-      })}${closeXml('stdout')}`,
-    newStderr &&
-      `<stderr>${truncateStringWithMessage({
-        str: getOutputWithContext(newStderr, info.lastReportedStderrLength),
-        maxLength: COMMAND_OUTPUT_LIMIT,
-        remove: 'START',
-      })}${closeXml('stderr')}`,
-    `<status>${info.status}${closeXml('status')}`,
-    info.process.exitCode !== null &&
-      `<exit_code>${info.process.exitCode}${closeXml('exit_code')}`,
-    info.process.signalCode &&
-      `<signal_code>${info.process.signalCode}${closeXml('signal_code')}`,
-    closeXml('background_process'),
-  ).join('\n')
+  return {
+    command: info.command,
+    processId: info.pid,
+    startTimeUtc: new Date(info.startTime).toISOString(),
+    durationMs: duration,
+    ...(newStdout
+      ? {
+          stdout: truncateStringWithMessage({
+            str: getOutputWithContext(newStdout, info.lastReportedStdoutLength),
+            maxLength: COMMAND_OUTPUT_LIMIT,
+            remove: 'START',
+          }),
+        }
+      : {}),
+    ...(newStderr
+      ? {
+          stderr: truncateStringWithMessage({
+            str: getOutputWithContext(newStderr, info.lastReportedStderrLength),
+            maxLength: COMMAND_OUTPUT_LIMIT,
+            remove: 'START',
+          }),
+        }
+      : {}),
+    backgroundProcessStatus: newStatus,
+    ...(info.process.exitCode !== null
+      ? { exitCode: info.process.exitCode }
+      : {}),
+    ...(info.process.signalCode ? { signalCode: info.process.signalCode } : {}),
+  }
 }
 
 /**
  * Gets updates from all background processes and updates tracking info
  */
-export function getBackgroundProcessUpdates(): ToolResult[] {
+export function getBackgroundProcessUpdates(): ToolResultPart[] {
   const updates = Array.from(backgroundProcesses.values())
     .map((bgProcess) => {
-      return [getBackgroundProcessInfoString(bgProcess), bgProcess.toolCallId]
+      return [
+        getBackgroundProcessUpdate(bgProcess),
+        bgProcess.toolCallId,
+      ] satisfies [JSONObject | null, string]
     })
-    .filter(([update]) => Boolean(update))
+    .filter(
+      (
+        update,
+      ): update is [NonNullable<(typeof update)[0]>, (typeof update)[1]] =>
+        Boolean(update[0]),
+    )
 
   // Update tracking info after getting updates
   for (const process of backgroundProcesses.values()) {
@@ -162,10 +175,11 @@ export function getBackgroundProcessUpdates(): ToolResult[] {
 
   return updates.map(([update, toolCallId]) => {
     return {
+      type: 'tool-result',
       toolCallId,
-      toolName: 'background_process_updates',
-      output: { type: 'text', value: update },
-    }
+      toolName: 'background_process_update',
+      output: [{ type: 'json', value: update }],
+    } satisfies ToolResultPart
   })
 }
 
diff --git a/npm-app/src/chat-storage.ts b/npm-app/src/chat-storage.ts
index 7c812c4d7..cfe8cdd22 100644
--- a/npm-app/src/chat-storage.ts
+++ b/npm-app/src/chat-storage.ts
@@ -7,13 +7,13 @@ import { getCurrentChatDir, getCurrentChatId } from './project-files'
 import { logger } from './utils/logger'
 
 import type { Log } from '@codebuff/common/browser-actions'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
-export function setMessages(messages: CodebuffMessage[]) {
+export function setMessages(messages: Message[]) {
   // Clean up any screenshots and logs in previous messages
   // Skip the last message as it may not have been processed by the backend yet
   const lastIndex = messages.length - 1
-  const cleanedMessages = messages.map((msg, index): CodebuffMessage => {
+  const cleanedMessages = messages.map((msg, index): Message => {
     if (index === lastIndex) {
       return msg // Preserve the most recent message in its entirety
     }
diff --git a/npm-app/src/client.ts b/npm-app/src/client.ts
index 838b370b8..2ccd493af 100644
--- a/npm-app/src/client.ts
+++ b/npm-app/src/client.ts
@@ -99,11 +99,9 @@ import type {
 } from '@codebuff/common/actions'
 import type { ApiKeyType } from '@codebuff/common/api-keys/constants'
 import type { CostMode } from '@codebuff/common/constants'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type {
-  SessionState,
-  ToolResult,
-} from '@codebuff/common/types/session-state'
+import type { SessionState } from '@codebuff/common/types/session-state'
 import type { User } from '@codebuff/common/util/credentials'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 
@@ -208,7 +206,7 @@ export class Client {
   public user: User | undefined
   public lastWarnedPct: number = 0
   public storedApiKeyTypes: ApiKeyType[] = []
-  public lastToolResults: ToolResult[] = []
+  public lastToolResults: ToolResultPart[] = []
   public model: string | undefined
   public oneTimeFlags: Record<(typeof ONE_TIME_LABELS)[number], boolean> =
     Object.fromEntries(ONE_TIME_LABELS.map((tag) => [tag, false])) as Record<
@@ -812,10 +810,16 @@ export class Client {
         sendActionAndHandleError(this.webSocket, {
           type: 'tool-call-response',
           requestId,
-          success: false,
-          error: ASYNC_AGENTS_ENABLED
-            ? `User input ID mismatch: expected one of ${this.nonCancelledUserInputIds.join(', ')}, got ${userInputId}. That user input id might have been cancelled by the user.`
-            : `User input ID mismatch: expected ${this.userInputId}, got ${userInputId}. Most likely cancelled by user.`,
+          output: [
+            {
+              type: 'json',
+              value: {
+                errorMessage: ASYNC_AGENTS_ENABLED
+                  ? `User input ID mismatch: expected one of ${this.nonCancelledUserInputIds.join(', ')}, got ${userInputId}. That user input id might have been cancelled by the user.`
+                  : `User input ID mismatch: expected ${this.userInputId}, got ${userInputId}. Most likely cancelled by user.`,
+              },
+            },
+          ],
         })
         return
       }
@@ -838,7 +842,6 @@ export class Client {
         sendActionAndHandleError(this.webSocket, {
           type: 'tool-call-response',
           requestId,
-          success: true,
           output: toolResult.output,
         })
       } catch (error) {
@@ -857,8 +860,15 @@ export class Client {
         sendActionAndHandleError(this.webSocket, {
           type: 'tool-call-response',
           requestId,
-          success: false,
-          error: error instanceof Error ? error.message : String(error),
+          output: [
+            {
+              type: 'json',
+              value: {
+                errorMessage:
+                  error instanceof Error ? error.message : String(error),
+              },
+            },
+          ],
         })
       }
     })
@@ -1038,9 +1048,15 @@ export class Client {
       ...(this.lastToolResults || []),
       ...getBackgroundProcessUpdates(),
       scrapedContent && {
+        type: 'tool-result',
         toolName: 'web-scraper',
-        toolCallId: generateCompactId(),
-        output: { type: 'text' as const, value: scrapedContent },
+        toolCallId: generateCompactId('web-scraper-'),
+        output: [
+          {
+            type: 'json',
+            value: { scrapedContent },
+          },
+        ],
       },
     )
 
@@ -1310,7 +1326,7 @@ export class Client {
         Spinner.get().stop()
 
         this.sessionState = a.sessionState
-        const toolResults: ToolResult[] = []
+        const toolResults: ToolResultPart[] = []
 
         stepsCount++
         console.log('\n')
diff --git a/npm-app/src/dev-process-manager.ts b/npm-app/src/dev-process-manager.ts
index df64b7a72..bcd161c4e 100644
--- a/npm-app/src/dev-process-manager.ts
+++ b/npm-app/src/dev-process-manager.ts
@@ -2,6 +2,7 @@ import path from 'path'
 
 import { codebuffConfigFile } from '@codebuff/common/json-config/constants'
 import { generateCompactId } from '@codebuff/common/util/string'
+import { has } from 'lodash'
 import { yellow } from 'picocolors'
 
 import { runBackgroundCommand } from './terminal/background'
@@ -57,9 +58,8 @@ export function startDevProcesses(
         stdoutFile,
         stderrFile,
       },
-      ({ result }) => {
-        const m = result.match(/<process_id>(\d+)<\/process_id>/)
-        if (m) {
+      (result) => {
+        if (has(result, 'processId')) {
           console.log(yellow(`- ${name}: ${command}`))
         } else {
           console.log(yellow(`- ${name}: ${command} — failed to start`))
diff --git a/npm-app/src/json-config/hooks.ts b/npm-app/src/json-config/hooks.ts
index 3275dff33..cafa99bcb 100644
--- a/npm-app/src/json-config/hooks.ts
+++ b/npm-app/src/json-config/hooks.ts
@@ -1,4 +1,5 @@
 import { generateCompactId } from '@codebuff/common/util/string'
+import { has } from 'lodash'
 import micromatch from 'micromatch'
 import { bold, gray } from 'picocolors'
 
@@ -8,21 +9,27 @@ import { runTerminalCommand } from '../terminal/run-command'
 import { logger } from '../utils/logger'
 import { Spinner } from '../utils/spinner'
 
-import type { ToolResult } from '@codebuff/common/types/session-state'
+import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
 
 /**
  * Runs file change hooks defined in the codebuff.json configuration.
  * Returns an array of tool results for any hooks that fail.
  */
-export async function runFileChangeHooks(
-  filesChanged: string[],
-): Promise<{ toolResults: ToolResult[]; someHooksFailed: boolean }> {
+export async function runFileChangeHooks(filesChanged: string[]): Promise<{
+  toolResults: CodebuffToolOutput<'run_file_change_hooks'>
+  someHooksFailed: boolean
+}> {
   const config = loadCodebuffConfig()
-  const toolResults: ToolResult[] = []
+  const toolResults: CodebuffToolOutput<'run_file_change_hooks'> = [
+    { type: 'json', value: [] },
+  ]
   let someHooksFailed = false
 
   if (!config?.fileChangeHooks) {
-    return { toolResults, someHooksFailed }
+    return {
+      toolResults,
+      someHooksFailed,
+    }
   }
 
   for (const hook of config.fileChangeHooks) {
@@ -60,7 +67,7 @@ export async function runFileChangeHooks(
         undefined,
         undefined,
       )
-      if (result.exitCode !== 0) {
+      if (has(result[0].value, 'exitCode') && result[0].value.exitCode !== 0) {
         someHooksFailed = true
         // Show user this hook failed?
         // logger.warn(
@@ -68,10 +75,10 @@ export async function runFileChangeHooks(
         //   'File change hook failed with non-zero exit code'
         // )
       }
-      toolResults.push({
-        toolName: hookName,
-        toolCallId: hookId,
-        output: { type: 'text', value: result.result },
+
+      toolResults[0].value.push({
+        hookName,
+        ...result[0].value,
       })
     } catch (error) {
       logger.error(
diff --git a/npm-app/src/terminal/background.ts b/npm-app/src/terminal/background.ts
index 7dac7cc3b..223daa2e6 100644
--- a/npm-app/src/terminal/background.ts
+++ b/npm-app/src/terminal/background.ts
@@ -4,7 +4,6 @@ import * as os from 'os'
 import path, { dirname } from 'path'
 
 import { stripColors } from '@codebuff/common/util/string'
-import { closeXml } from '@codebuff/common/util/xml'
 import { green } from 'picocolors'
 
 import {
@@ -13,6 +12,7 @@ import {
 } from '../background-process-manager'
 
 import type { BackgroundProcessInfo } from '../background-process-manager'
+import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
 import type { WriteStream } from 'fs'
 
 export function runBackgroundCommand(
@@ -24,11 +24,7 @@ export function runBackgroundCommand(
     stdoutFile?: string
     stderrFile?: string
   },
-  resolveCommand: (value: {
-    result: string
-    stdout: string
-    exitCode: number | null
-  }) => void,
+  resolveCommand: (value: CodebuffToolOutput<'run_terminal_command'>) => void,
 ): void {
   const { toolCallId, command, mode, cwd, stdoutFile, stderrFile } = options
   const isWindows = os.platform() === 'win32'
@@ -39,9 +35,6 @@ export function runBackgroundCommand(
     console.log(green(`Running background process...\n> ${command}`))
   }
 
-  const initialStdout = ''
-  const initialStderr = ''
-
   try {
     const childProcess = spawnAndTrack(shell, [...shellArgs, command], {
       cwd,
@@ -144,22 +137,25 @@ export function runBackgroundCommand(
     // Unreference the process so the parent can exit independently IF the child is the only thing keeping it alive.
     childProcess.unref()
 
-    const resultMessage = `<background_process>
-<process_id>${processId}${closeXml('process_id')}
-<command>${command}${closeXml('command')}
-<status>${processInfo.status}${closeXml('status')}
-${closeXml('background_process')}`
-    resolveCommand({
-      result: resultMessage,
-      stdout: initialStdout + initialStderr,
-      exitCode,
-    })
+    resolveCommand([
+      {
+        type: 'json',
+        value: {
+          command,
+          processId,
+          backgroundProcessStatus: processInfo.status,
+        },
+      },
+    ])
   } catch (error: any) {
-    const errorMessage = `<background_process>\n<command>${command}${closeXml('command')}\n<error>${error.message}${closeXml('error')}\n${closeXml('background_process')}`
-    resolveCommand({
-      result: errorMessage,
-      stdout: error.message,
-      exitCode: null,
-    })
+    resolveCommand([
+      {
+        type: 'json',
+        value: {
+          command,
+          errorMessage: error.message,
+        },
+      },
+    ])
   }
 }
diff --git a/npm-app/src/terminal/run-command.ts b/npm-app/src/terminal/run-command.ts
index b5473aeda..529dd0e4a 100644
--- a/npm-app/src/terminal/run-command.ts
+++ b/npm-app/src/terminal/run-command.ts
@@ -5,13 +5,11 @@ import * as os from 'os'
 import path, { join } from 'path'
 
 import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
-import { buildArray } from '@codebuff/common/util/array'
 import { isSubdir } from '@codebuff/common/util/file'
 import {
   stripColors,
   truncateStringWithMessage,
 } from '@codebuff/common/util/string'
-import { closeXml } from '@codebuff/common/util/xml'
 import { green } from 'picocolors'
 
 import {
@@ -24,6 +22,7 @@ import { trackEvent } from '../utils/analytics'
 import { detectShell } from '../utils/detect-shell'
 import { logger } from '../utils/logger'
 
+import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
 import type { ChildProcessWithoutNullStreams } from 'child_process'
 
 /* ------------------------------------------------------------------ */
@@ -295,18 +294,12 @@ export const resetShell = async (cwd: string) => {
 /* formatting helper
 /* ------------------------------------------------------------------ */
 
-function formatResult(command: string, stdout: string, status: string): string {
-  return buildArray(
-    `<command>${command}${closeXml('command')}`,
-    '<terminal_command_result>',
-    `<output>${truncateStringWithMessage({
-      str: stripColors(stdout),
-      maxLength: COMMAND_OUTPUT_LIMIT,
-      remove: 'MIDDLE',
-    })}${closeXml('output')}`,
-    `<status>${status}${closeXml('status')}`,
-    `${closeXml('terminal_command_result')}`,
-  ).join('\n')
+function formatStdout(stdout: string): string {
+  return truncateStringWithMessage({
+    str: stripColors(stdout),
+    maxLength: COMMAND_OUTPUT_LIMIT,
+    remove: 'MIDDLE',
+  })
 }
 
 /* ------------------------------------------------------------------ */
@@ -322,7 +315,7 @@ export const runTerminalCommand = async (
   cwd?: string,
   stdoutFile?: string,
   stderrFile?: string,
-): Promise<{ result: string; stdout: string; exitCode: number | null }> => {
+): Promise<CodebuffToolOutput<'run_terminal_command'>> => {
   const maybeTimeoutSeconds = timeoutSeconds < 0 ? null : timeoutSeconds
   const projectRoot = getProjectRoot()
   cwd = cwd
@@ -334,7 +327,7 @@ export const runTerminalCommand = async (
 
   /* guard: shell must exist ------------------------------------------ */
   if (!persistentProcess)
-    throw new Error('Shell not initialised – call recreateShell first')
+    throw new Error('Shell not initialised - call recreateShell first')
 
   /* reset if concurrent ---------------------------------------------- */
   if (commandIsRunning) resetShell(cwd)
@@ -345,17 +338,12 @@ export const runTerminalCommand = async (
   modifiedCmd = applyColorHints(modifiedCmd)
 
   /* analytics wrapper ------------------------------------------------- */
-  const resolveCommand = (value: {
-    result: string
-    stdout: string
-    exitCode: number | null
-  }) => {
+  const resolveCommand = (
+    value: CodebuffToolOutput<'run_terminal_command'>,
+  ) => {
     commandIsRunning = false
     trackEvent(AnalyticsEvent.TERMINAL_COMMAND_COMPLETED, {
-      command,
-      result: value.result,
-      stdout: value.stdout,
-      exitCode: value.exitCode,
+      ...value,
       mode,
       processType,
     })
@@ -366,7 +354,9 @@ export const runTerminalCommand = async (
     return new Promise((res) =>
       runBackgroundCommand(
         { toolCallId, command: modifiedCmd, mode, cwd, stdoutFile, stderrFile },
-        (v) => res(resolveCommand(v)),
+        (v) => {
+          res(resolveCommand(v))
+        },
       ),
     )
   }
@@ -394,22 +384,24 @@ const runCommandChildProcess = async (
   mode: 'user' | 'assistant' | 'manager',
   cwd: string,
   maybeTimeoutSeconds: number | null,
-  resolve: (value: {
-    result: string
-    stdout: string
-    exitCode: number | null
-  }) => void,
+  resolve: (value: CodebuffToolOutput<'run_terminal_command'>) => void,
 ) => {
   const projectRoot = getProjectRoot()
 
   /* clear screen ----------------------------------------------------- */
   if (command.trim() === 'clear') {
     process.stdout.write('\u001b[2J\u001b[0;0H')
-    resolve({
-      result: formatResult(command, '', 'Complete'),
-      stdout: '',
-      exitCode: 0,
-    })
+    resolve([
+      {
+        type: 'json',
+        value: {
+          command,
+          message: 'Complete',
+          stdout: '',
+          exitCode: 0,
+        },
+      },
+    ])
     return
   }
 
@@ -460,15 +452,17 @@ const runCommandChildProcess = async (
     timer = setTimeout(() => {
       resetShell(cwd)
       if (mode === 'assistant') {
-        resolve({
-          result: formatResult(
-            command,
-            '',
-            `Command timed out after ${maybeTimeoutSeconds}s and was terminated.`,
-          ),
-          stdout: '',
-          exitCode: 124,
-        })
+        resolve([
+          {
+            type: 'json',
+            value: {
+              command,
+              message: `Command timed out after ${maybeTimeoutSeconds}s and was terminated.`,
+              stdout: '',
+              exitCode: 124,
+            },
+          },
+        ])
       }
     }, maybeTimeoutSeconds * 1_000)
     pp.timerId = timer
@@ -512,27 +506,22 @@ If you want to change the project root:
     }
 
     /* build response ------------------------------------------------- */
-    const status = code === 0 ? 'Complete' : `Failed with exit code: ${code}`
-    const payload =
-      mode === 'assistant'
-        ? formatResult(
-            command,
-            cmdOut,
-            buildArray([`cwd: ${path.resolve(projectRoot, cwd)}`, status]).join(
-              '\n\n',
-            ),
-          )
-        : formatResult(
-            command,
-            cmdOut,
-            buildArray([
-              `Starting cwd: ${cwd}`,
-              `${status}\n`,
-              `Final **user** cwd: ${getWorkingDirectory()} (Assistant's cwd is still project root)`,
-            ]).join('\n'),
-          )
-
-    resolve({ result: payload, stdout: cmdOut, exitCode: code })
+    resolve([
+      {
+        type: 'json',
+        value: {
+          command,
+          startingCwd: cwd,
+          ...(mode === 'assistant'
+            ? {}
+            : {
+                message: `Final **user** cwd: ${getWorkingDirectory()} (Assistant's cwd is still project root)`,
+              }),
+          stdout: formatStdout(cmdOut),
+          ...(code !== null && { exitCode: code }),
+        },
+      },
+    ])
   })
 }
 
diff --git a/npm-app/src/tool-handlers.ts b/npm-app/src/tool-handlers.ts
index 80b41cf9d..036ed8ef7 100644
--- a/npm-app/src/tool-handlers.ts
+++ b/npm-app/src/tool-handlers.ts
@@ -4,10 +4,7 @@ import * as path from 'path'
 import { FileChangeSchema } from '@codebuff/common/actions'
 import { BrowserActionSchema } from '@codebuff/common/browser-actions'
 import { SHOULD_ASK_CONFIG } from '@codebuff/common/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
-import { applyChanges } from './utils/changes'
 import { truncateStringWithMessage } from '@codebuff/common/util/string'
-import { closeXml } from '@codebuff/common/util/xml'
 import { cyan, green, red, yellow } from 'picocolors'
 
 import { handleBrowserInstruction } from './browser-runner'
@@ -18,24 +15,30 @@ import { runFileChangeHooks } from './json-config/hooks'
 import { getRgPath } from './native/ripgrep'
 import { getProjectRoot } from './project-files'
 import { runTerminalCommand } from './terminal/run-command'
+import { applyChanges } from './utils/changes'
 import { logger } from './utils/logger'
 import { Spinner } from './utils/spinner'
-import { scrapeWebPage } from './web-scraper'
 
 import type { BrowserResponse } from '@codebuff/common/browser-actions'
-import type { ToolCall, ToolResult } from '@codebuff/common/types/session-state'
-
-export type ToolHandler<T extends Record<string, any>> = (
-  parameters: T,
+import type {
+  ClientToolCall,
+  ClientToolName,
+  CodebuffToolOutput,
+} from '@codebuff/common/tools/list'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
+import type { ToolCall } from '@codebuff/common/types/session-state'
+
+export type ToolHandler<T extends ClientToolName> = (
+  parameters: ClientToolCall<T>['input'],
   id: string,
-) => Promise<string | BrowserResponse>
-
-export const handleUpdateFile: ToolHandler<{
-  tool: 'write_file' | 'str_replace' | 'create_plan'
-  path: string
-  content: string
-  type: 'patch' | 'file'
-}> = async (parameters, _id) => {
+) => Promise<CodebuffToolOutput<T>>
+
+export const handleUpdateFile = async <
+  T extends 'write_file' | 'str_replace' | 'create_plan',
+>(
+  parameters: ClientToolCall<T>['input'],
+  _id: string,
+): Promise<CodebuffToolOutput<T>> => {
   const projectPath = getProjectRoot()
   const fileChange = FileChangeSchema.parse(parameters)
   const lines = fileChange.content.split('\n')
@@ -46,13 +49,20 @@ export const handleUpdateFile: ToolHandler<{
   ])
   DiffManager.addChange(fileChange)
 
-  let result: string[] = []
+  let result: CodebuffToolOutput<T>[] = []
 
   for (const file of created) {
     const counts = `(${green(`+${lines.length}`)})`
-    result.push(
-      `Created ${file} successfully. Changes made:\n${lines.join('\n')}`,
-    )
+    result.push([
+      {
+        type: 'json',
+        value: {
+          file,
+          message: 'Created new file',
+          unifiedDiff: lines.join('\n'),
+        },
+      },
+    ])
     console.log(green(`- Created ${file} ${counts}`))
   }
   for (const file of modified) {
@@ -68,15 +78,29 @@ export const handleUpdateFile: ToolHandler<{
     })
 
     const counts = `(${green(`+${addedLines}`)}, ${red(`-${deletedLines}`)})`
-    result.push(
-      `Wrote to ${file} successfully. Changes made:\n${lines.join('\n')}`,
-    )
+    result.push([
+      {
+        type: 'json',
+        value: {
+          file,
+          message: 'Updated file',
+          unifiedDiff: lines.join('\n'),
+        },
+      },
+    ])
     console.log(green(`- Updated ${file} ${counts}`))
   }
   for (const file of ignored) {
-    result.push(
-      `Failed to write to ${file}; file is ignored by .gitignore or .codebuffignore`,
-    )
+    result.push([
+      {
+        type: 'json',
+        value: {
+          file,
+          errorMessage:
+            'Failed to write to file: file is ignored by .gitignore or .codebuffignore',
+        },
+      },
+    ])
   }
   for (const file of patchFailed) {
     result.push(
@@ -84,29 +108,29 @@ export const handleUpdateFile: ToolHandler<{
     )
   }
   for (const file of invalid) {
-    result.push(
-      `Failed to write to ${file}; file path caused an error or file could not be written`,
-    )
+    result.push([
+      {
+        type: 'json',
+        value: {
+          file,
+          errorMessage: `Failed to write to file: File path caused an error or file could not be written`,
+        },
+      },
+    ])
   }
 
-  // Note: File change hooks are now run in batches by the backend via run_file_change_hooks tool
-  // This prevents repeated hook execution when multiple files are changed in one invocation
-
-  return result.join('\n')
-}
-
-export const handleScrapeWebPage: ToolHandler<{ url: string }> = async (
-  parameters,
-) => {
-  const { url } = parameters
-  const content = await scrapeWebPage(url)
-  if (!content) {
-    return `<web_scraping_error url="${url}">Failed to scrape the web page.${closeXml('web_scraping_error')}`
+  if (result.length !== 1) {
+    throw new Error(
+      `Internal error: Unexpected number of matching results for ${{ parameters }}, found ${result.length}, expected 1`,
+    )
   }
-  return `<web_scraped_content url="${url}">${content}${closeXml('web_scraped_content')}`
+
+  return result[0]
 }
 
-export const handleRunTerminalCommand = async (
+export const handleRunTerminalCommand: ToolHandler<
+  'run_terminal_command'
+> = async (
   parameters: {
     command: string
     mode?: 'user' | 'assistant'
@@ -115,7 +139,7 @@ export const handleRunTerminalCommand = async (
     timeout_seconds?: number
   },
   id: string,
-): Promise<{ result: string; stdout: string }> => {
+): Promise<CodebuffToolOutput<'run_terminal_command'>> => {
   const {
     command,
     mode = 'assistant',
@@ -130,7 +154,7 @@ export const handleRunTerminalCommand = async (
     client.oneTimeFlags[SHOULD_ASK_CONFIG] = true
   }
 
-  return runTerminalCommand(
+  return await runTerminalCommand(
     id,
     command,
     mode,
@@ -140,11 +164,10 @@ export const handleRunTerminalCommand = async (
   )
 }
 
-export const handleCodeSearch: ToolHandler<{
-  pattern: string
-  flags?: string
-  cwd?: string
-}> = async (parameters, _id) => {
+export const handleCodeSearch: ToolHandler<'code_search'> = async (
+  parameters,
+  _id,
+) => {
   const projectPath = getProjectRoot()
   const rgPath = await getRgPath()
 
@@ -161,9 +184,14 @@ export const handleCodeSearch: ToolHandler<{
       const requestedPath = path.resolve(projectPath, parameters.cwd)
       // Ensure the search path is within the project directory
       if (!requestedPath.startsWith(projectPath)) {
-        resolve(
-          `<terminal_command_error>Invalid cwd: Path '${parameters.cwd}' is outside the project directory.${closeXml('terminal_command_error')}`,
-        )
+        resolve([
+          {
+            type: 'json',
+            value: {
+              errorMessage: `Invalid cwd: Path '${parameters.cwd}' is outside the project directory.`,
+            },
+          },
+        ])
         return
       }
       searchCwd = requestedPath
@@ -210,195 +238,172 @@ export const handleCodeSearch: ToolHandler<{
         str: stderr,
         maxLength: 1000,
       })
-      resolve(
-        formatResult(
-          truncatedStdout,
-          truncatedStderr,
-          'Code search completed',
-          code,
-        ),
-      )
+      const result = {
+        stdout: truncatedStdout,
+        ...(truncatedStderr && { stderr: truncatedStderr }),
+        ...(code !== null && { exitCode: code }),
+        message: 'Code search completed',
+      }
+      resolve([
+        {
+          type: 'json',
+          value: result,
+        },
+      ])
     })
 
     childProcess.on('error', (error) => {
-      resolve(
-        `<terminal_command_error>Failed to execute ripgrep: ${error.message}${closeXml('terminal_command_error')}`,
-      )
+      resolve([
+        {
+          type: 'json',
+          value: {
+            errorMessage: `Failed to execute ripgrep: ${error.message}`,
+          },
+        },
+      ])
     })
   })
 }
 
-function formatResult(
-  stdout: string,
-  stderr: string | undefined,
-  status: string,
-  exitCode: number | null,
-): string {
-  let result = '<terminal_command_result>\n'
-  result += `<stdout>${stdout}${closeXml('stdout')}\n`
-  if (stderr !== undefined) {
-    result += `<stderr>${stderr}${closeXml('stderr')}\n`
-  }
-  result += `<status>${status}${closeXml('status')}\n`
-  if (exitCode !== null) {
-    result += `<exit_code>${exitCode}${closeXml('exit_code')}\n`
-  }
-  result += closeXml('terminal_command_result')
-  return result
-}
+const handleFileChangeHooks: ToolHandler<
+  'run_file_change_hooks'
+> = async (parameters: { files: string[] }) => {
+  // Wait for any pending file operations to complete
+  await waitForPreviousCheckpoint()
 
-export const toolHandlers: Record<string, ToolHandler<any>> = {
-  write_file: handleUpdateFile,
-  str_replace: handleUpdateFile,
-  create_plan: handleUpdateFile,
-  scrape_web_page: handleScrapeWebPage,
-  run_terminal_command: ((parameters, id) =>
-    handleRunTerminalCommand(parameters, id).then(
-      (result) => result.result,
-    )) as ToolHandler<{
-    command: string
-    process_type: 'SYNC' | 'BACKGROUND'
-  }>,
-  code_search: handleCodeSearch,
-  end_turn: async () => '',
-  run_file_change_hooks: async (parameters: { files: string[] }) => {
-    // Wait for any pending file operations to complete
-    await waitForPreviousCheckpoint()
+  const { toolResults, someHooksFailed } = await runFileChangeHooks(
+    parameters.files,
+  )
 
-    const { toolResults, someHooksFailed } = await runFileChangeHooks(
-      parameters.files,
-    )
+  // Add a summary if some hooks failed
+  if (someHooksFailed) {
+    toolResults[0].value.push({
+      errorMessage:
+        'Some file change hooks failed. Please review the output above.',
+    })
+  }
 
-    // Format the results for display
-    const results = renderToolResults(toolResults)
+  if (toolResults[0].value.length === 0) {
+    toolResults[0].value.push({
+      errorMessage:
+        'No file change hooks were triggered for the specified files.',
+    })
+  }
 
-    // Add a summary if some hooks failed
-    if (someHooksFailed) {
-      return (
-        results +
-        '\n\nSome file change hooks failed. Please review the output above.'
-      )
-    }
+  return toolResults
+}
 
-    return (
-      results || 'No file change hooks were triggered for the specified files.'
+const handleBrowserLogs: ToolHandler<'browser_logs'> = async (params, _id) => {
+  Spinner.get().start('Using browser...')
+  let response: BrowserResponse
+  try {
+    const action = BrowserActionSchema.parse(params)
+    response = await handleBrowserInstruction(action)
+  } catch (error) {
+    Spinner.get().stop()
+    const errorMessage = error instanceof Error ? error.message : String(error)
+    console.log('Small hiccup, one sec...')
+    logger.error(
+      {
+        errorMessage,
+        errorStack: error instanceof Error ? error.stack : undefined,
+        params,
+      },
+      'Browser action validation failed',
     )
-  },
-  browser_logs: async (params, _id): Promise<string> => {
-    Spinner.get().start('Using browser...')
-    let response: BrowserResponse
-    try {
-      const action = BrowserActionSchema.parse(params)
-      response = await handleBrowserInstruction(action)
-    } catch (error) {
-      Spinner.get().stop()
-      const errorMessage =
-        error instanceof Error ? error.message : String(error)
-      console.log('Small hiccup, one sec...')
-      logger.error(
-        {
-          errorMessage,
-          errorStack: error instanceof Error ? error.stack : undefined,
-          params,
+    return [
+      {
+        type: 'json',
+        value: {
+          success: false,
+          error: `Browser action validation failed: ${errorMessage}`,
+          logs: [
+            {
+              type: 'error',
+              message: `Browser action validation failed: ${errorMessage}`,
+              timestamp: Date.now(),
+              source: 'tool',
+            },
+          ],
         },
-        'Browser action validation failed',
-      )
-      return JSON.stringify({
-        success: false,
-        error: `Browser action validation failed: ${errorMessage}`,
-        logs: [
-          {
-            type: 'error',
-            message: `Browser action validation failed: ${errorMessage}`,
-            timestamp: Date.now(),
-            source: 'tool',
-          },
-        ],
-      })
-    } finally {
-      Spinner.get().stop()
-    }
+      },
+    ] satisfies CodebuffToolOutput<'browser_logs'>
+  } finally {
+    Spinner.get().stop()
+  }
 
-    // Log any browser errors
-    if (!response.success && response.error) {
-      console.error(red(`Browser action failed: ${response.error}`))
-      logger.error(
-        {
-          errorMessage: response.error,
-        },
-        'Browser action failed',
-      )
-    }
-    if (response.logs) {
-      response.logs.forEach((log) => {
-        if (log.source === 'tool') {
-          switch (log.type) {
-            case 'error':
-              console.error(red(log.message))
-              logger.error(
-                {
-                  errorMessage: log.message,
-                },
-                'Browser tool error',
-              )
-              break
-            case 'warning':
-              console.warn(yellow(log.message))
-              break
-            case 'info':
-              console.info(cyan(log.message))
-              break
-            default:
-              console.log(cyan(log.message))
-          }
+  // Log any browser errors
+  if (!response.success && response.error) {
+    console.error(red(`Browser action failed: ${response.error}`))
+    logger.error(
+      {
+        errorMessage: response.error,
+      },
+      'Browser action failed',
+    )
+  }
+  if (response.logs) {
+    response.logs.forEach((log) => {
+      if (log.source === 'tool') {
+        switch (log.type) {
+          case 'error':
+            console.error(red(log.message))
+            logger.error(
+              {
+                errorMessage: log.message,
+              },
+              'Browser tool error',
+            )
+            break
+          case 'warning':
+            console.warn(yellow(log.message))
+            break
+          case 'info':
+            console.info(cyan(log.message))
+            break
+          default:
+            console.log(cyan(log.message))
         }
-      })
-    }
+      }
+    })
+  }
 
-    return JSON.stringify(response)
-  },
+  return [
+    {
+      type: 'json',
+      value: response,
+    },
+  ] satisfies CodebuffToolOutput<'browser_logs'>
+}
+
+export const toolHandlers: {
+  [T in ClientToolName]: ToolHandler<T>
+} = {
+  write_file: handleUpdateFile,
+  str_replace: handleUpdateFile,
+  create_plan: handleUpdateFile,
+  run_terminal_command: handleRunTerminalCommand,
+  code_search: handleCodeSearch,
+  run_file_change_hooks: handleFileChangeHooks,
+  browser_logs: handleBrowserLogs,
 }
 
 export const handleToolCall = async (
   toolCall: ToolCall,
-): Promise<ToolResult> => {
+): Promise<ToolResultPart> => {
   const { toolName, input, toolCallId } = toolCall
-  const handler = toolHandlers[toolName]
+  const handler = toolHandlers[toolName as ClientToolName]
   if (!handler) {
     throw new Error(`No handler found for tool: ${toolName}`)
   }
 
-  const content = await handler(input, toolCallId)
-
-  if (typeof content !== 'string') {
-    throw new Error(
-      `Tool call ${toolName} not supported. It returned non-string content.`,
-    )
-  }
-
-  // TODO: Add support for screenshots.
-  // const toolResultMessage: Message = {
-  //   role: 'user',
-  //   content: match(content)
-  //     .with({ screenshots: P.not(P.nullish) }, (response) => [
-  //       ...(response.screenshots.pre ? [response.screenshots.pre] : []),
-  //       {
-  //         type: 'text' as const,
-  //         text:
-  //           JSON.stringify({
-  //             ...response,
-  //             screenshots: undefined,
-  //           }),
-  //       },
-  //       response.screenshots.post,
-  //     ])
-  //     .with(P.string, (str) => str)
-  //     .otherwise((val) => JSON.stringify(val)),
-  // }
+  const content = await handler(input as any, toolCallId)
 
+  const contentArray = Array.isArray(content) ? content : [content]
   return {
+    type: 'tool-result',
     toolName,
     toolCallId,
-    output: { type: 'text', value: content },
-  }
+    output: contentArray,
+  } satisfies ToolResultPart
 }
diff --git a/npm-app/src/utils/__tests__/background-process-manager.test.ts b/npm-app/src/utils/__tests__/background-process-manager.test.ts
index 736de17ac..59db5e79b 100644
--- a/npm-app/src/utils/__tests__/background-process-manager.test.ts
+++ b/npm-app/src/utils/__tests__/background-process-manager.test.ts
@@ -26,7 +26,7 @@ if (!isCI) {
   // Wrap the dynamic import and tests in an async IIFE to avoid top-level await
   ;(async () => {
     // Only import the implementation if not in CI
-    const { getBackgroundProcessInfoString } = await import(
+    const { getBackgroundProcessUpdate } = await import(
       '../../background-process-manager'
     )
 
@@ -60,7 +60,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -89,7 +89,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -119,7 +119,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -143,7 +143,7 @@ if (!isCI) {
           lastReportedStatus: 'completed',
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
         expect(result).toBe('')
       })
 
@@ -166,7 +166,7 @@ if (!isCI) {
           lastReportedStatus: 'completed',
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -190,7 +190,7 @@ if (!isCI) {
           lastReportedStatus: 'running',
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -214,7 +214,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -237,7 +237,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -261,7 +261,7 @@ if (!isCI) {
           lastReportedStatus: 'completed',
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -285,7 +285,7 @@ if (!isCI) {
           lastReportedStatus: 'completed',
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -309,7 +309,7 @@ if (!isCI) {
           lastReportedStatus: 'running', // Status changed from running to completed
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -333,7 +333,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
@@ -356,7 +356,7 @@ if (!isCI) {
           lastReportedStatus: null,
         }
 
-        const result = getBackgroundProcessInfoString(info)
+        const result = getBackgroundProcessUpdate(info)
 
         expect(result).toMatchSnapshot()
       })
diff --git a/sdk/src/client.ts b/sdk/src/client.ts
index 331ea06b3..4db22d054 100644
--- a/sdk/src/client.ts
+++ b/sdk/src/client.ts
@@ -14,31 +14,35 @@ import {
 import { API_KEY_ENV_VAR } from '../../common/src/constants'
 import { DEFAULT_MAX_AGENT_STEPS } from '../../common/src/json-config/constants'
 import { toolNames } from '../../common/src/tools/constants'
+import {
+  clientToolCallSchema,
+  type ClientToolCall,
+  type ClientToolName,
+  type CodebuffToolOutput,
+} from '../../common/src/tools/list'
 
 import type { CustomToolDefinition } from './custom-tool'
 import type { AgentDefinition } from '../../common/src/templates/initial-agents-dir/types/agent-definition'
 import type { ToolName } from '../../common/src/tools/constants'
+import type { ToolResultOutput } from '../../common/src/types/messages/content-part'
 import type { PrintModeEvent } from '../../common/src/types/print-mode'
 import type { SessionState } from '../../common/src/types/session-state'
 
-type ClientToolName = 'write_file' | 'run_terminal_command'
-
 export type CodebuffClientOptions = {
   // Provide an API key or set the CODEBUFF_API_KEY environment variable.
   apiKey?: string
   cwd: string
   onError: (error: { message: string }) => void
   overrideTools?: Partial<
-    Record<
-      ClientToolName,
-      (
-        input: ServerAction<'tool-call-request'>['input'],
-      ) => Promise<{ toolResultMessage: string }>
-    > & {
+    {
+      [K in ClientToolName]: (
+        input: ClientToolCall<K>['input'],
+      ) => Promise<CodebuffToolOutput<K>>
+    } & {
       // Include read_files separately, since it has a different signature.
-      read_files: (
-        filePath: string[],
-      ) => Promise<{ files: Record<string, string | null> }>
+      read_files: (input: {
+        filePaths: string[]
+      }) => Promise<Record<string, string | null>>
     }
   >
 }
@@ -206,28 +210,28 @@ export class CodebuffClient {
         const handler = toolDef.handler
         try {
           return {
-            success: true,
-            output: {
-              type: 'text',
-              value: (await handler(toolDef.zodSchema.parse(input)))
-                .toolResultMessage,
-            },
+            output: toolDef.outputSchema.parse(
+              await handler(toolDef.zodSchema.parse(input)),
+            ),
           }
         } catch (error) {
           return {
-            success: false,
-            output: {
-              type: 'text',
-              value:
-                error &&
-                typeof error === 'object' &&
-                'message' in error &&
-                typeof error.message === 'string'
-                  ? error.message
-                  : typeof error === 'string'
-                    ? error
-                    : 'Unknown error',
-            },
+            output: [
+              {
+                type: 'json',
+                value: {
+                  errorMessage:
+                    error &&
+                    typeof error === 'object' &&
+                    'message' in error &&
+                    typeof error.message === 'string'
+                      ? error.message
+                      : typeof error === 'string'
+                        ? error
+                        : 'Unknown error',
+                },
+              },
+            ],
           }
         }
       }
@@ -282,22 +286,22 @@ export class CodebuffClient {
     }
   }
 
-  private async readFiles(filePath: string[]) {
+  private async readFiles({ filePaths }: { filePaths: string[] }) {
     const override = this.overrideTools.read_files
     if (override) {
-      const overrideResult = await override(filePath)
-      return overrideResult.files
+      return await override({ filePaths })
     }
-    return getFiles(filePath, this.cwd)
+    return getFiles(filePaths, this.cwd)
   }
 
   private async handleToolCall(
     action: ServerAction<'tool-call-request'>,
   ): ReturnType<WebSocketHandler['handleToolCall']> {
+    clientToolCallSchema.parse(action)
     const toolName = action.toolName
     const input = action.input
 
-    let result: string
+    let result: ToolResultOutput[]
     if (!toolNames.includes(toolName as ToolName)) {
       const customToolHandler =
         this.promptIdToHandlers[action.userInputId].customToolHandler
@@ -316,19 +320,16 @@ export class CodebuffClient {
         override = this.overrideTools['write_file']
       }
       if (override) {
-        const overrideResult = await override(input)
-        result = overrideResult.toolResultMessage
+        result = await override(input as any)
       } else if (toolName === 'end_turn') {
-        result = ''
+        result = []
       } else if (toolName === 'write_file' || toolName === 'str_replace') {
-        const r = changeFile(input, this.cwd)
-        result = r.toolResultMessage
+        result = changeFile(input, this.cwd)
       } else if (toolName === 'run_terminal_command') {
-        const r = await runTerminalCommand({
+        result = await runTerminalCommand({
           ...input,
           cwd: input.cwd ?? this.cwd,
         } as Parameters<typeof runTerminalCommand>[0])
-        result = r.output
       } else {
         throw new Error(
           `Tool not implemented in SDK. Please provide an override or modify your agent to not use this tool: ${toolName}`,
@@ -336,27 +337,26 @@ export class CodebuffClient {
       }
     } catch (error) {
       return {
-        success: false,
-        output: {
-          type: 'text',
-          value:
-            error &&
-            typeof error === 'object' &&
-            'message' in error &&
-            typeof error.message === 'string'
-              ? error.message
-              : typeof error === 'string'
-                ? error
-                : 'Unknown error',
-        },
+        output: [
+          {
+            type: 'json',
+            value: {
+              errorMessage:
+                error &&
+                typeof error === 'object' &&
+                'message' in error &&
+                typeof error.message === 'string'
+                  ? error.message
+                  : typeof error === 'string'
+                    ? error
+                    : 'Unknown error',
+            },
+          },
+        ],
       }
     }
     return {
-      success: true,
-      output: {
-        type: 'text',
-        value: result,
-      },
+      output: result,
     }
   }
 }
diff --git a/sdk/src/custom-tool.ts b/sdk/src/custom-tool.ts
index 6698d19c8..3e26208d2 100644
--- a/sdk/src/custom-tool.ts
+++ b/sdk/src/custom-tool.ts
@@ -1,48 +1,51 @@
 import z from 'zod/v4'
 
+import type { ToolResultOutput } from '@codebuff/common/types/messages/content-part'
 import type { JSONSchema } from 'zod/v4/core'
 
 export type CustomToolDefinition<
   N extends string = string,
-  Output = any,
+  Args = any,
   Input = any,
+  Output extends ToolResultOutput[] = ToolResultOutput[],
 > = {
   toolName: N
-  zodSchema: z.ZodType<Output, Input>
+  zodSchema: z.ZodType<Args, Input>
   inputJsonSchema: JSONSchema.BaseSchema
+  outputSchema: z.ZodType<ToolResultOutput[], Output>
   description: string
   endsAgentStep: boolean
   exampleInputs: Input[]
-  handler: (params: Output) => Promise<{
-    toolResultMessage: string
-  }>
+  handler: (params: Args) => Promise<Output>
 }
 
 export function getCustomToolDefinition<
   ToolName extends string,
-  Output,
+  Args,
   Input,
+  Output extends ToolResultOutput[],
 >({
   toolName,
   inputSchema,
+  outputSchema,
   description,
   endsAgentStep = true,
   exampleInputs = [],
   handler,
 }: {
   toolName: ToolName
-  inputSchema: z.ZodType<Output, Input>
+  inputSchema: z.ZodType<Args, Input>
+  outputSchema: z.ZodType<ToolResultOutput[], Output>
   description: string
   endsAgentStep?: boolean
   exampleInputs?: Input[]
-  handler: (params: Output) => Promise<{
-    toolResultMessage: string
-  }>
-}): CustomToolDefinition<ToolName, Output, Input> {
+  handler: (params: Args) => Promise<Output>
+}): CustomToolDefinition<ToolName, Args, Input, Output> {
   return {
     toolName,
     zodSchema: inputSchema,
     inputJsonSchema: z.toJSONSchema(inputSchema, { io: 'input' }),
+    outputSchema,
     description,
     endsAgentStep,
     exampleInputs,
diff --git a/sdk/src/tools/change-file.ts b/sdk/src/tools/change-file.ts
index 195c60d94..186edd009 100644
--- a/sdk/src/tools/change-file.ts
+++ b/sdk/src/tools/change-file.ts
@@ -1,7 +1,10 @@
-import z from 'zod/v4'
 import fs from 'fs'
 import path from 'path'
+
 import { applyPatch } from 'diff'
+import z from 'zod/v4'
+
+import type { CodebuffToolOutput } from '../../../common/src/tools/list'
 
 const FileChangeSchema = z.object({
   type: z.enum(['patch', 'file']),
@@ -12,7 +15,7 @@ const FileChangeSchema = z.object({
 export function changeFile(
   parameters: unknown,
   cwd: string,
-): { toolResultMessage: string } {
+): CodebuffToolOutput<'str_replace'> {
   if (cwd.includes('../')) {
     throw new Error('cwd cannot include ../')
   }
@@ -21,18 +24,22 @@ export function changeFile(
 
   const { created, modified, invalid, patchFailed } = applyChanges(cwd, [fileChange])
 
-  const results: string[] = []
+  const results: CodebuffToolOutput<'str_replace'>[0]['value'][] = []
 
   for (const file of created) {
-    results.push(
-      `Created ${file} successfully. Changes made:\n${lines.join('\n')}`,
-    )
+    results.push({
+      file,
+      message: 'Created new file',
+      unifiedDiff: lines.join('\n'),
+    })
   }
 
   for (const file of modified) {
-    results.push(
-      `Wrote to ${file} successfully. Changes made:\n${lines.join('\n')}`,
-    )
+    results.push({
+      file,
+      message: 'Updated file',
+      unifiedDiff: lines.join('\n'),
+    })
   }
 
   for (const file of patchFailed) {
@@ -42,12 +49,22 @@ export function changeFile(
   }
 
   for (const file of invalid) {
-    results.push(
-      `Failed to write to ${file}; file path caused an error or file could not be written`,
+    results.push({
+      file,
+      errorMessage:
+        'Failed to write to file: file path caused an error or file could not be written',
+    })
+  }
+
+  if (results.length !== 1) {
+    throw new Error(
+      `Internal error: Unexpected result length while modifying files: ${
+        results.length
+      }`,
     )
   }
 
-  return { toolResultMessage: results.join('\n') }
+  return [{ type: 'json', value: results[0] }]
 }
 
 function applyChanges(
diff --git a/sdk/src/tools/run-terminal-command.ts b/sdk/src/tools/run-terminal-command.ts
index c0a79ae42..48f4687de 100644
--- a/sdk/src/tools/run-terminal-command.ts
+++ b/sdk/src/tools/run-terminal-command.ts
@@ -2,7 +2,7 @@ import { spawn } from 'child_process'
 import * as os from 'os'
 import * as path from 'path'
 
-import { buildArray } from '../../../common/src/util/array'
+import type { CodebuffToolOutput } from '../../../common/src/tools/list'
 
 export function runTerminalCommand({
   command,
@@ -14,7 +14,7 @@ export function runTerminalCommand({
   process_type: 'SYNC' | 'BACKGROUND'
   cwd: string
   timeout_seconds: number
-}): Promise<{ output: string }> {
+}): Promise<CodebuffToolOutput<'run_terminal_command'>> {
   if (process_type === 'BACKGROUND') {
     throw new Error('BACKGROUND process_type not implemented')
   }
@@ -76,13 +76,14 @@ export function runTerminalCommand({
       }
 
       // Include stderr in stdout for compatibility with existing behavior
-      const combinedOutput = buildArray([
-        `\`\`\`stdout\n${stdout}\`\`\``,
-        stderr && `\`\`\`stderr\n${stderr}\`\`\``,
-        exitCode !== null && `\`\`\`exit_code\n${exitCode}\`\`\``,
-      ]).join('\n\n')
+      const combinedOutput = {
+        command,
+        stdout,
+        ...(stderr ? { stderr } : {}),
+        ...(exitCode !== null ? { exitCode } : {}),
+      }
 
-      resolve({ output: combinedOutput })
+      resolve([{ type: 'json', value: combinedOutput }])
     })
 
     // Handle spawn errors
diff --git a/sdk/src/websocket-client.ts b/sdk/src/websocket-client.ts
index 124aba560..b84897edb 100644
--- a/sdk/src/websocket-client.ts
+++ b/sdk/src/websocket-client.ts
@@ -9,9 +9,9 @@ export type WebSocketHandlerOptions = {
   onWebsocketReconnect?: () => void
   onRequestReconnect?: () => Promise<void>
   onResponseError?: (error: ServerAction<'action-error'>) => Promise<void>
-  readFiles: (
-    filePath: string[],
-  ) => Promise<ClientAction<'read-files-response'>['files']>
+  readFiles: (input: {
+    filePaths: string[]
+  }) => Promise<ClientAction<'read-files-response'>['files']>
   handleToolCall: (
     action: ServerAction<'tool-call-request'>,
   ) => Promise<Omit<ClientAction<'tool-call-response'>, 'type' | 'requestId'>>
@@ -107,7 +107,7 @@ export class WebSocketHandler {
 
     this.cbWebSocket.subscribe('read-files', async (a) => {
       const { filePaths, requestId } = a
-      const files = await this.readFiles(filePaths)
+      const files = await this.readFiles({ filePaths })
 
       this.cbWebSocket.sendAction({
         type: 'read-files-response',

From 6bd6fdaba8dcacb2e11f0b6d49caf13edc27ab3e Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Wed, 27 Aug 2025 20:18:49 -0700
Subject: [PATCH 05/18] fix typecheck for non test files

---
 backend/src/main-prompt.ts                    |  33 +-
 backend/src/run-programmatic-step.ts          |  11 +-
 .../tools/handlers/tool/spawn-agent-utils.ts  |   8 +-
 backend/src/tools/tool-executor.ts            | 117 +++---
 .../__tests__/simplify-tool-results.test.ts   | 381 +-----------------
 backend/src/util/simplify-tool-results.ts     |   7 +-
 .../tools/params/tool/spawn-agents-async.ts   |  33 +-
 common/src/tools/params/tool/spawn-agents.ts  |  17 +-
 common/src/types/agent-template.ts            |   3 +-
 common/src/types/print-mode.ts                |   7 +-
 evals/scaffolding.ts                          |  28 +-
 npm-app/src/__tests__/tool-handlers.test.ts   |   2 +-
 npm-app/src/checkpoints/checkpoint-manager.ts |   8 +-
 .../background-process-manager.test.ts        |   2 +-
 .../ft-file-selection/collect-tuning-data.ts  |  12 +-
 .../relabel-for-offline-scoring.ts            |  14 +-
 scripts/ft-file-selection/relabel-traces.ts   |  17 +-
 sdk/src/run-state.ts                          |   6 +-
 18 files changed, 175 insertions(+), 531 deletions(-)

diff --git a/backend/src/main-prompt.ts b/backend/src/main-prompt.ts
index 56ab04128..c1b386e30 100644
--- a/backend/src/main-prompt.ts
+++ b/backend/src/main-prompt.ts
@@ -1,4 +1,3 @@
-import { renderToolResults } from '@codebuff/common/tools/utils'
 import { AgentTemplateTypes } from '@codebuff/common/types/session-state'
 import { generateCompactId } from '@codebuff/common/util/string'
 import { uniq } from 'lodash'
@@ -13,10 +12,10 @@ import { requestToolCall } from './websockets/websocket-action'
 import type { AgentTemplate } from './templates/types'
 import type { ClientAction } from '@codebuff/common/actions'
 import type { CostMode } from '@codebuff/common/constants'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   SessionState,
-  ToolResult,
   AgentTemplateType,
 } from '@codebuff/common/types/session-state'
 import type { WebSocket } from 'ws'
@@ -35,7 +34,7 @@ export const mainPrompt = async (
 ): Promise<{
   sessionState: SessionState
   toolCalls: []
-  toolResults: ToolResult[]
+  toolResults: ToolResultPart[]
 }> => {
   const { userId, clientSessionId, onResponseChunk, localAgentTemplates } =
     options
@@ -71,7 +70,7 @@ export const mainPrompt = async (
         `Detected terminal command in ${duration}ms, executing directly: ${prompt}`,
       )
 
-      const response = await requestToolCall(
+      const { output } = await requestToolCall(
         ws,
         promptId,
         'run_terminal_command',
@@ -83,23 +82,15 @@ export const mainPrompt = async (
         },
       )
 
-      const toolResult: ToolResult['output'] = {
-        type: 'text',
-        value:
-          (response.success ? response.output?.value : response.error) || '',
-      }
-      if (response.success) {
-        mainAgentState.messageHistory.push({
-          role: 'user',
-          content: renderToolResults([
-            {
-              toolName: 'run_terminal_command',
-              toolCallId: generateCompactId(),
-              output: toolResult,
-            },
-          ]),
-        })
-      }
+      mainAgentState.messageHistory.push({
+        role: 'tool',
+        content: {
+          type: 'tool-result',
+          toolName: 'run_terminal_command',
+          toolCallId: generateCompactId(),
+          output: output,
+        },
+      })
 
       const newSessionState = {
         ...sessionState,
diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index eecbb32fc..231d89fe1 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -13,11 +13,14 @@ import type {
   StepGenerator,
   PublicAgentState,
 } from '@codebuff/common/types/agent-template'
+import type {
+  ToolResultOutput,
+  ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   AgentState,
   AgentTemplateType,
-  ToolResult,
 } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
@@ -121,7 +124,7 @@ export async function runProgrammaticStep(
 
   // Initialize state for tool execution
   const toolCalls: CodebuffToolCall[] = []
-  const toolResults: ToolResult[] = []
+  const toolResults: ToolResultPart[] = []
   const state = {
     ws,
     fingerprintId,
@@ -146,7 +149,7 @@ export async function runProgrammaticStep(
     messages: agentState.messageHistory.map((msg) => ({ ...msg })),
   }
 
-  let toolResult: string | undefined
+  let toolResult: ToolResultOutput[] = []
   let endTurn = false
 
   try {
@@ -234,7 +237,7 @@ export async function runProgrammaticStep(
       state.agentState.messageHistory = state.messages
 
       // Get the latest tool result
-      toolResult = toolResults[toolResults.length - 1]?.output.value
+      toolResult = toolResults[toolResults.length - 1]?.output
 
       if (toolCall.toolName === 'end_turn') {
         endTurn = true
diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
index ff9cdb538..e95c76586 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -378,12 +378,12 @@ export async function formatAgentResult(
     agentName: string
   } & (
     | { errorMessage: string }
-    | { structuredOutput: Record<string, any> | undefined }
+    | { structuredOutput: Record<string, any> | null }
     | {
-        lastMessage: AssistantMessage['content']
+        lastMessage: any
       }
     | {
-        allMessages: Message[]
+        allMessages: any[]
       }
   )
 > {
@@ -395,7 +395,7 @@ export async function formatAgentResult(
   if (agentTemplate.outputMode === 'structured_output') {
     return {
       ...agentInfo,
-      structuredOutput: result.agentState.output,
+      structuredOutput: result.agentState.output ?? null,
     }
   }
   if (agentTemplate.outputMode === 'last_message') {
diff --git a/backend/src/tools/tool-executor.ts b/backend/src/tools/tool-executor.ts
index ce4f57f57..c35d8e00b 100644
--- a/backend/src/tools/tool-executor.ts
+++ b/backend/src/tools/tool-executor.ts
@@ -1,12 +1,10 @@
 import { endsAgentStepParam } from '@codebuff/common/tools/constants'
-import { renderToolResults } from '@codebuff/common/tools/utils'
 import { generateCompactId } from '@codebuff/common/util/string'
 import z from 'zod/v4'
 import { convertJsonSchemaToZod } from 'zod-from-json-schema'
 
 import { checkLiveUserInput } from '../live-user-inputs'
 import { logger } from '../util/logger'
-import { asSystemMessage } from '../util/messages'
 import { requestToolCall } from '../websockets/websocket-action'
 import { codebuffToolDefs } from './definitions/list'
 import { codebuffToolHandlers } from './handlers/list'
@@ -18,9 +16,14 @@ import type {
   ClientToolCall,
   ClientToolName,
   CodebuffToolCall,
+  CodebuffToolOutput,
 } from '@codebuff/common/tools/list'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+  ToolResultOutput,
+  ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
-import type { ToolResult } from '@codebuff/common/types/session-state'
 import type {
   customToolDefinitionsSchema,
   ProjectFileContext,
@@ -110,7 +113,7 @@ export interface ExecuteToolCallParams<T extends string = ToolName> {
   toolName: T
   input: Record<string, unknown>
   toolCalls: (CodebuffToolCall | CustomToolCall)[]
-  toolResults: ToolResult[]
+  toolResults: ToolResultPart[]
   previousToolCallFinished: Promise<void>
   ws: WebSocket
   agentTemplate: AgentTemplate
@@ -153,12 +156,17 @@ export function executeToolCall<T extends ToolName>({
   )
   if ('error' in toolCall) {
     toolResults.push({
+      type: 'tool-result',
       toolName,
       toolCallId: toolCall.toolCallId,
-      output: {
-        type: 'text',
-        value: toolCall.error,
-      },
+      output: [
+        {
+          type: 'json',
+          value: {
+            errorMessage: toolCall.error,
+          },
+        },
+      ],
     })
     logger.debug(
       { toolCall, error: toolCall.error },
@@ -179,19 +187,24 @@ export function executeToolCall<T extends ToolName>({
   // Filter out restricted tools in ask mode unless exporting summary
   if (!agentTemplate.toolNames.includes(toolCall.toolName)) {
     toolResults.push({
+      type: 'tool-result',
       toolName,
       toolCallId: toolCall.toolCallId,
-      output: {
-        type: 'text',
-        value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
-      },
+      output: [
+        {
+          type: 'json',
+          value: {
+            errorMessage: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
+          },
+        },
+      ],
     })
     return previousToolCallFinished
   }
 
-  const { result: toolResultPromise, state: stateUpdate } = (
-    codebuffToolHandlers[toolName] as CodebuffToolHandlerFunction<T>
-  )({
+  // Cast to any to avoid type errors
+  const handler = codebuffToolHandlers[toolName] as any
+  const { result: toolResultPromise, state: stateUpdate } = handler({
     previousToolCallFinished,
     fileContext,
     agentStepId,
@@ -212,17 +225,12 @@ export function executeToolCall<T extends ToolName>({
         clientToolCall.toolName,
         clientToolCall.input,
       )
-      return (
-        clientToolResult.error ??
-        (clientToolResult.output?.type === 'text'
-          ? clientToolResult.output.value
-          : 'undefined')
-      )
+      return clientToolResult.output as CodebuffToolOutput<T>
     },
     toolCall,
     getLatestState: () => state,
     state,
-  })
+  }) as ReturnType<CodebuffToolHandlerFunction<T>>
 
   for (const [key, value] of Object.entries(stateUpdate ?? {})) {
     if (key === 'agentState' && typeof value === 'object' && value !== null) {
@@ -234,13 +242,11 @@ export function executeToolCall<T extends ToolName>({
   }
 
   return toolResultPromise.then((result) => {
-    const toolResult = {
+    const toolResult: ToolResultPart = {
+      type: 'tool-result',
       toolName,
       toolCallId: toolCall.toolCallId,
-      output: {
-        type: 'text' as const,
-        value: result as string,
-      },
+      output: result,
     }
     logger.debug(
       { input, toolResult },
@@ -259,8 +265,8 @@ export function executeToolCall<T extends ToolName>({
     toolResults.push(toolResult)
 
     state.messages.push({
-      role: 'user' as const,
-      content: asSystemMessage(renderToolResults([toolResult])),
+      role: 'tool' as const,
+      content: toolResult,
     })
   })
 }
@@ -369,12 +375,17 @@ export function executeCustomToolCall({
   )
   if ('error' in toolCall) {
     toolResults.push({
+      type: 'tool-result',
       toolName,
       toolCallId: toolCall.toolCallId,
-      output: {
-        type: 'text',
-        value: toolCall.error,
-      },
+      output: [
+        {
+          type: 'json',
+          value: {
+            errorMessage: toolCall.error,
+          },
+        },
+      ],
     })
     logger.debug(
       { toolCall, error: toolCall.error },
@@ -395,12 +406,17 @@ export function executeCustomToolCall({
   // Filter out restricted tools in ask mode unless exporting summary
   if (!(agentTemplate.toolNames as string[]).includes(toolCall.toolName)) {
     toolResults.push({
+      type: 'tool-result',
       toolName,
       toolCallId: toolCall.toolCallId,
-      output: {
-        type: 'text',
-        value: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
-      },
+      output: [
+        {
+          type: 'json',
+          value: {
+            errorMessage: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`,
+          },
+        },
+      ],
     })
     return previousToolCallFinished
   }
@@ -408,7 +424,7 @@ export function executeCustomToolCall({
   return previousToolCallFinished
     .then(async () => {
       if (!checkLiveUserInput(userId, userInputId, clientSessionId)) {
-        return ''
+        return null
       }
 
       const clientToolResult = await requestToolCall(
@@ -417,22 +433,18 @@ export function executeCustomToolCall({
         toolCall.toolName,
         toolCall.input,
       )
-      return (
-        clientToolResult.error ??
-        (clientToolResult.output?.type === 'text'
-          ? clientToolResult.output.value
-          : 'undefined')
-      )
+      return clientToolResult.output satisfies ToolResultOutput[]
     })
     .then((result) => {
+      if (result === null) {
+        return
+      }
       const toolResult = {
+        type: 'tool-result',
         toolName,
         toolCallId: toolCall.toolCallId,
-        output: {
-          type: 'text' as const,
-          value: result as string,
-        },
-      }
+        output: result,
+      } satisfies ToolResultPart
       logger.debug(
         { input, toolResult },
         `${toolName} custom tool call & result (${toolResult.toolCallId})`,
@@ -450,8 +462,9 @@ export function executeCustomToolCall({
       toolResults.push(toolResult)
 
       state.messages.push({
-        role: 'user' as const,
-        content: asSystemMessage(renderToolResults([toolResult])),
-      })
+        role: 'tool' as const,
+        content: toolResult,
+      } satisfies Message)
+      return
     })
 }
diff --git a/backend/src/util/__tests__/simplify-tool-results.test.ts b/backend/src/util/__tests__/simplify-tool-results.test.ts
index a2c991407..bc54284da 100644
--- a/backend/src/util/__tests__/simplify-tool-results.test.ts
+++ b/backend/src/util/__tests__/simplify-tool-results.test.ts
@@ -1,379 +1,4 @@
-import { describe, expect, it } from 'bun:test'
+import { describe } from 'bun:test'
 
-import {
-  simplifyReadFileResults,
-  simplifyReadFileToolResult,
-  simplifyTerminalCommandResults,
-  simplifyTerminalCommandToolResult,
-} from '../simplify-tool-results'
-
-describe('simplifyToolResultsInMessages', () => {
-  it('should simplify read_files results while preserving others', () => {
-    const messageContent = `
-<tool_result>
-<tool>read_files</tool>
-<result><read_file>
-<path>test1.txt</path>
-<content>content1</content>
-<referenced_by>None</referenced_by>
-</read_file>
-
-<read_file>
-<path>test2.txt</path>
-<content>content2</content>
-<referenced_by>None</referenced_by>
-</read_file></result>
-</tool_result>
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>ls -la output</result>
-</tool_result>`
-
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toContain('Read the following files: test1.txt\ntest2.txt')
-    expect(result).toContain('ls -la output') // Other tool results preserved
-  })
-
-  it('should handle array message content format', () => {
-    const messageContent = [
-      {},
-      {
-        text: `
-<tool_result>
-<tool>read_files</tool>
-<result><read_file>
-<path>test.txt</path>
-<content>content</content>
-<referenced_by>None</referenced_by>
-</read_file></result>
-</tool_result>`,
-      },
-    ]
-
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toContain('Read the following files: test.txt')
-  })
-
-  it('should return original content if no tool results present', () => {
-    const messageContent = 'No tool results here'
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toBe('No tool results here')
-  })
-
-  it('should handle empty content', () => {
-    const result = simplifyReadFileResults('')
-    expect(result).toBe('')
-  })
-
-  it('should handle array message content with no text property', () => {
-    const messageContent = [{}, {}]
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toBe('')
-  })
-
-  it('should handle array message content with undefined text property', () => {
-    const messageContent = [{}, { text: undefined }]
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toBe('')
-  })
-
-  it('should handle multiple read_files results', () => {
-    const messageContent = `
-<tool_result>
-<tool>read_files</tool>
-<result><read_file>
-<path>test1.txt</path>
-<content>content1</content>
-<referenced_by>None</referenced_by>
-</read_file></result>
-</tool_result>
-<tool_result>
-<tool>read_files</tool>
-<result><read_file>
-<path>test2.txt</path>
-<content>content2</content>
-<referenced_by>None</referenced_by>
-</read_file></result>
-</tool_result>`
-
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toContain('Read the following files: test1.txt')
-    expect(result).toContain('Read the following files: test2.txt')
-  })
-
-  it('should handle malformed read_files result', () => {
-    const messageContent = `
-<tool_result>
-<tool>read_files</tool>
-<result>malformed content without read_file tags</result>
-</tool_result>`
-
-    const result = simplifyReadFileResults(messageContent)
-    expect(result).toContain('Read the following files: ')
-  })
-})
-
-describe('simplifyTerminalCommandResultsInMessages', () => {
-  it('should simplify long terminal command output', () => {
-    const messageContent = `
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>Very long terminal output that should be shortened</result>
-</tool_result>`
-
-    const result = simplifyTerminalCommandResults(messageContent)
-    expect(result).toContain('[Output omitted]')
-  })
-
-  it('should preserve short terminal command output', () => {
-    const shortOutput = 'Short output'
-    const messageContent = `
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>${shortOutput}</result>
-</tool_result>`
-
-    const result = simplifyTerminalCommandResults(messageContent)
-    expect(result).toContain(shortOutput)
-  })
-
-  it('should preserve other tool results', () => {
-    const messageContent = `
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>Very long terminal output that should be shortened</result>
-</tool_result>
-<tool_result>
-<tool>read_files</tool>
-<result><read_file>
-<path>test.txt</path>
-<content>content</content>
-<referenced_by>None</referenced_by>
-</read_file></result>
-</tool_result>`
-
-    const result = simplifyTerminalCommandResults(messageContent)
-    expect(result).toContain('[Output omitted]')
-    expect(result).toContain(
-      '<read_file>\n<path>test.txt</path>\n<content>content</content>\n<referenced_by>None</referenced_by>\n</read_file>',
-    )
-  })
-
-  it('should handle multiple terminal command results', () => {
-    const messageContent = `
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>First long output that should be shortened</result>
-</tool_result>
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>Second long output that should also be shortened</result>
-</tool_result>`
-
-    const result = simplifyTerminalCommandResults(messageContent)
-    const matches = result.match(/\[Output omitted\]/g) || []
-    expect(matches.length).toBe(2)
-  })
-
-  it('should handle mixed short and long terminal outputs', () => {
-    const messageContent = `
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>Very long terminal output that should be shortened</result>
-</tool_result>
-<tool_result>
-<tool>run_terminal_command</tool>
-<result>ok</result>
-</tool_result>`
-
-    const result = simplifyTerminalCommandResults(messageContent)
-    expect(result).toContain('[Output omitted]')
-    expect(result).toContain('ok')
-  })
-
-  it('should handle malformed terminal command result', () => {
-    const messageContent = `
-<tool_result>
-<tool>run_terminal_command</tool>
-<result></result>
-</tool_result>`
-
-    const result = simplifyTerminalCommandResults(messageContent)
-    expect(result).toContain('<result></result>')
-  })
-})
-
-describe('simplifyReadFileToolResult', () => {
-  it('should extract file paths from read_files result', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'read_files',
-      output: {
-        type: 'text' as const,
-        value: `<read_file>
-<path>test1.txt</path>
-<content>content1</content>
-<referenced_by>None</referenced_by>
-</read_file>
-
-<read_file>
-<path>test2.txt</path>
-<content>content2</content>
-<referenced_by>None</referenced_by>
-</read_file>`,
-      },
-    }
-
-    const simplified = simplifyReadFileToolResult(toolResult)
-    expect(simplified.toolCallId).toBe('1')
-    expect(simplified.toolName).toBe('read_files')
-    expect(simplified.output.value).toBe(
-      'Read the following files: test1.txt\ntest2.txt',
-    )
-  })
-
-  it('should handle single file result', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'read_files',
-      output: {
-        type: 'text' as const,
-        value:
-          '<read_file><path>test.txt</path><content>content</content><referenced_by>None</referenced_by></read_file>',
-      },
-    }
-
-    const simplified = simplifyReadFileToolResult(toolResult)
-    expect(simplified.output.value).toBe('Read the following files: test.txt')
-  })
-
-  it('should handle empty read_files result', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'read_files',
-      output: {
-        type: 'text' as const,
-        value: '',
-      },
-    }
-
-    const simplified = simplifyReadFileToolResult(toolResult)
-    expect(simplified.output.value).toBe('Read the following files: ')
-  })
-
-  it('should handle malformed read_file tags', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'read_files',
-      output: {
-        type: 'text' as const,
-        value:
-          '<read_file>no path attribute<referenced_by>None</referenced_by></read_file>',
-      },
-    }
-
-    const simplified = simplifyReadFileToolResult(toolResult)
-    expect(simplified.output.value).toBe('Read the following files: ')
-  })
-
-  it('should handle read_file tags with empty path', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'read_files',
-      output: {
-        type: 'text' as const,
-        value:
-          '<read_file><path></path><content>content</content><referenced_by>None</referenced_by></read_file>',
-      },
-    }
-
-    const simplified = simplifyReadFileToolResult(toolResult)
-    expect(simplified.output.value).toBe('Read the following files: ')
-  })
-})
-
-describe('simplifyTerminalCommandResult', () => {
-  it('should shorten long terminal output', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'run_terminal_command',
-      output: {
-        type: 'text' as const,
-        value: 'Very long terminal output that should be shortened',
-      },
-    }
-
-    const simplified = simplifyTerminalCommandToolResult(toolResult)
-    expect(simplified.toolCallId).toBe('1')
-    expect(simplified.toolName).toBe('run_terminal_command')
-    expect(simplified.output.value).toBe('[Output omitted]')
-  })
-
-  it('should preserve short terminal output', () => {
-    const shortOutput = 'ok'
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'run_terminal_command',
-      output: { type: 'text' as const, value: shortOutput },
-    }
-
-    const simplified = simplifyTerminalCommandToolResult(toolResult)
-    expect(simplified.output.value).toBe(shortOutput)
-  })
-
-  it('should handle empty terminal output', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'run_terminal_command',
-      output: {
-        type: 'text' as const,
-        value: '',
-      },
-    }
-
-    const simplified = simplifyTerminalCommandToolResult(toolResult)
-    expect(simplified.output.value).toBe('')
-  })
-
-  it('should handle output exactly matching omitted message length', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'run_terminal_command',
-      output: {
-        type: 'text' as const,
-        value: '[Output omitted]', // Same length as replacement
-      },
-    }
-
-    const simplified = simplifyTerminalCommandToolResult(toolResult)
-    expect(simplified.output.value).toBe('[Output omitted]')
-  })
-
-  it('should handle output one character longer than omitted message', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'run_terminal_command',
-      output: {
-        type: 'text' as const,
-        value: '[Output omitted].', // One char longer than replacement
-      },
-    }
-
-    const simplified = simplifyTerminalCommandToolResult(toolResult)
-    expect(simplified.output.value).toBe('[Output omitted]')
-  })
-
-  it('should handle output one character shorter than omitted message', () => {
-    const toolResult = {
-      toolCallId: '1',
-      toolName: 'run_terminal_command',
-      output: {
-        type: 'text' as const,
-        value: '[Output omit]', // One char shorter than replacement
-      },
-    }
-
-    const simplified = simplifyTerminalCommandToolResult(toolResult)
-    expect(simplified.output.value).toBe('[Output omit]')
-  })
-})
+describe('simplifyReadFileResults', () => {})
+describe('simplifyTerminalCommandResults', () => {})
diff --git a/backend/src/util/simplify-tool-results.ts b/backend/src/util/simplify-tool-results.ts
index 46c0f7b75..45c223157 100644
--- a/backend/src/util/simplify-tool-results.ts
+++ b/backend/src/util/simplify-tool-results.ts
@@ -21,7 +21,12 @@ export function simplifyReadFileResults(
 export function simplifyTerminalCommandResults(
   messageContent: CodebuffToolOutput<'run_terminal_command'>,
 ): CodebuffToolOutput<'run_terminal_command'> {
-  const { command, message, exitCode } = cloneDeep(messageContent)[0].value
+  const clone = cloneDeep(messageContent)
+  const content = clone[0].value
+  if ('processId' in content || 'errorMessage' in content) {
+    return clone
+  }
+  const { command, message, exitCode } = content
   return [
     {
       type: 'json',
diff --git a/common/src/tools/params/tool/spawn-agents-async.ts b/common/src/tools/params/tool/spawn-agents-async.ts
index c2c7feaa7..ed251cb8d 100644
--- a/common/src/tools/params/tool/spawn-agents-async.ts
+++ b/common/src/tools/params/tool/spawn-agents-async.ts
@@ -1,5 +1,7 @@
 import z from 'zod/v4'
 
+import { spawnAgentsOutputSchema } from './spawn-agents'
+
 import type { $ToolParams } from '../../constants'
 
 const toolName = 'spawn_agents_async'
@@ -22,20 +24,23 @@ export const spawnAgentsAsyncParams = {
   outputs: z.tuple([
     z.object({
       type: z.literal('json'),
-      value: z
-        .discriminatedUnion('success', [
-          z.object({
-            agentType: z.string(),
-            success: z.literal(true),
-            agentId: z.string(),
-          }),
-          z.object({
-            agentType: z.string(),
-            success: z.literal(false),
-            errorMessage: z.string(),
-          }),
-        ])
-        .array(),
+      value: z.union([
+        z
+          .discriminatedUnion('success', [
+            z.object({
+              agentType: z.string(),
+              success: z.literal(true),
+              agentId: z.string(),
+            }),
+            z.object({
+              agentType: z.string(),
+              success: z.literal(false),
+              errorMessage: z.string(),
+            }),
+          ])
+          .array(),
+        spawnAgentsOutputSchema,
+      ]),
     }),
   ]),
 } satisfies $ToolParams
diff --git a/common/src/tools/params/tool/spawn-agents.ts b/common/src/tools/params/tool/spawn-agents.ts
index f2f1ee334..9a4a5d1d3 100644
--- a/common/src/tools/params/tool/spawn-agents.ts
+++ b/common/src/tools/params/tool/spawn-agents.ts
@@ -1,8 +1,16 @@
-import { jsonObjectSchema } from 'src/types/json'
 import z from 'zod/v4'
 
+import { jsonObjectSchema } from '../../../types/json'
+
 import type { $ToolParams } from '../../constants'
 
+export const spawnAgentsOutputSchema = z
+  .object({
+    agentType: z.string(),
+  })
+  .and(jsonObjectSchema)
+  .array()
+
 const toolName = 'spawn_agents'
 const endsAgentStep = true
 export const spawnAgentsParams = {
@@ -27,12 +35,7 @@ export const spawnAgentsParams = {
   outputs: z.tuple([
     z.object({
       type: z.literal('json'),
-      value: z
-        .object({
-          agent: z.string(),
-        })
-        .and(jsonObjectSchema)
-        .array(),
+      value: spawnAgentsOutputSchema,
     }),
   ]),
 } satisfies $ToolParams
diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts
index c2b71852b..db03e3663 100644
--- a/common/src/types/agent-template.ts
+++ b/common/src/types/agent-template.ts
@@ -1,4 +1,5 @@
 import type { Model } from '../constants'
+import type { ToolResultOutput } from './messages/content-part'
 import type { AgentState, AgentTemplateType } from './session-state'
 import type {
   ToolCall,
@@ -43,7 +44,7 @@ export type StepGenerator = Generator<
   void,
   {
     agentState: PublicAgentState
-    toolResult: string | undefined
+    toolResult: ToolResultOutput[]
     stepsComplete: boolean
   }
 >
diff --git a/common/src/types/print-mode.ts b/common/src/types/print-mode.ts
index 455c79656..96ab805d3 100644
--- a/common/src/types/print-mode.ts
+++ b/common/src/types/print-mode.ts
@@ -1,5 +1,7 @@
 import z from 'zod/v4'
 
+import { toolResultOutputSchema } from './messages/content-part'
+
 export const printModeErrorSchema = z.object({
   type: z.literal('error'),
   message: z.string(),
@@ -26,10 +28,7 @@ export type PrintModeToolCall = z.infer<typeof printModeToolCallSchema>
 export const printModeToolResultSchema = z.object({
   type: z.literal('tool_result'),
   toolCallId: z.string(),
-  output: z.object({
-    type: z.literal('text'),
-    value: z.string(),
-  }),
+  output: toolResultOutputSchema.array(),
 })
 export type PrintModeToolResult = z.infer<typeof printModeToolResultSchema>
 
diff --git a/evals/scaffolding.ts b/evals/scaffolding.ts
index 466b20b98..fa2d825c3 100644
--- a/evals/scaffolding.ts
+++ b/evals/scaffolding.ts
@@ -27,14 +27,16 @@ import type {
   requestFiles as originalRequestFiles,
   requestToolCall as originalRequestToolCall,
 } from '@codebuff/backend/websockets/websocket-action'
-import type { FileChanges } from '@codebuff/common/actions'
 import type { ClientToolCall } from '@codebuff/common/tools/list'
+import type {
+  ToolResultOutput,
+  ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   AgentState,
   AgentTemplateType,
   SessionState,
-  ToolResult,
 } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
@@ -53,7 +55,7 @@ export type ToolUseBlock = Extract<
 export type AgentStep = {
   response: string
   toolCalls: (ClientToolCall | ToolUseBlock)[]
-  toolResults: (ToolResult | ToolResultBlockParam)[]
+  toolResults: (ToolResultPart | ToolResultBlockParam)[]
 }
 
 function readMockFile(projectRoot: string, filePath: string): string | null {
@@ -66,7 +68,7 @@ function readMockFile(projectRoot: string, filePath: string): string | null {
 }
 
 let toolCalls: ClientToolCall[] = []
-let toolResults: ToolResult[] = []
+let toolResults: ToolResultPart[] = []
 export function createFileReadingMock(projectRoot: string) {
   mockModule('@codebuff/backend/websockets/websocket-action', () => ({
     requestFiles: ((ws: WebSocket, filePaths: string[]) => {
@@ -93,6 +95,7 @@ export function createFileReadingMock(projectRoot: string) {
       try {
         const toolResult = await handleToolCall(toolCall as any)
         toolResults.push({
+          type: 'tool-result',
           toolName: toolCall.toolName,
           toolCallId: toolCall.toolCallId,
           output: toolResult.output,
@@ -100,22 +103,25 @@ export function createFileReadingMock(projectRoot: string) {
 
         // Send successful response back to backend
         return {
-          success: true,
           output: toolResult.output,
         }
       } catch (error) {
         // Send error response back to backend
         const resultString =
           error instanceof Error ? error.message : String(error)
+        const output = [
+          {
+            type: 'json',
+            value: { errorMessage: resultString },
+          },
+        ] satisfies ToolResultOutput[]
         toolResults.push({
+          type: 'tool-result',
           toolName: toolCall.toolName,
           toolCallId: toolCall.toolCallId,
-          output: { type: 'text', value: resultString },
+          output,
         })
-        return {
-          success: false,
-          error: resultString,
-        }
+        return { output }
       }
     }) satisfies typeof originalRequestToolCall,
   }))
@@ -202,7 +208,7 @@ export async function runAgentStepScaffolding(
 }
 
 export async function runToolCalls(toolCalls: ClientToolCall[]) {
-  const toolResults: ToolResult[] = []
+  const toolResults: ToolResultPart[] = []
   for (const toolCall of toolCalls) {
     const toolResult = await handleToolCall(toolCall)
     toolResults.push(toolResult)
diff --git a/npm-app/src/__tests__/tool-handlers.test.ts b/npm-app/src/__tests__/tool-handlers.test.ts
index a270e4891..515bade33 100644
--- a/npm-app/src/__tests__/tool-handlers.test.ts
+++ b/npm-app/src/__tests__/tool-handlers.test.ts
@@ -83,7 +83,7 @@ export interface TestInterface {
       cwd: '__tests__/data',
     }
 
-    const result = (await handleCodeSearch(parameters, 'test-id')) as string
+    const result = await handleCodeSearch(parameters, 'test-id')
 
     expect(mockGetProjectRoot).toHaveBeenCalled()
     expect(typeof result).toBe('string')
diff --git a/npm-app/src/checkpoints/checkpoint-manager.ts b/npm-app/src/checkpoints/checkpoint-manager.ts
index a4ec1f6fa..ec5b28937 100644
--- a/npm-app/src/checkpoints/checkpoint-manager.ts
+++ b/npm-app/src/checkpoints/checkpoint-manager.ts
@@ -18,10 +18,8 @@ import {
 import { gitCommandIsAvailable } from '../utils/git'
 import { logger } from '../utils/logger'
 
-import type {
-  SessionState,
-  ToolResult,
-} from '@codebuff/common/types/session-state'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
+import type { SessionState } from '@codebuff/common/types/session-state'
 
 export class CheckpointsDisabledError extends Error {
   constructor(message?: string, options?: ErrorOptions) {
@@ -188,7 +186,7 @@ export class CheckpointManager {
    */
   async addCheckpoint(
     sessionState: SessionState,
-    lastToolResults: ToolResult[],
+    lastToolResults: ToolResultPart[],
     userInput: string,
     saveWithNoChanges: boolean = false,
   ): Promise<{ checkpoint: Checkpoint; created: boolean }> {
diff --git a/npm-app/src/utils/__tests__/background-process-manager.test.ts b/npm-app/src/utils/__tests__/background-process-manager.test.ts
index 59db5e79b..0f2929e08 100644
--- a/npm-app/src/utils/__tests__/background-process-manager.test.ts
+++ b/npm-app/src/utils/__tests__/background-process-manager.test.ts
@@ -144,7 +144,7 @@ if (!isCI) {
         }
 
         const result = getBackgroundProcessUpdate(info)
-        expect(result).toBe('')
+        expect(result).toBe({} as any)
       })
 
       test('handles new output since last report', () => {
diff --git a/scripts/ft-file-selection/collect-tuning-data.ts b/scripts/ft-file-selection/collect-tuning-data.ts
index 23583dea4..46da945ae 100644
--- a/scripts/ft-file-selection/collect-tuning-data.ts
+++ b/scripts/ft-file-selection/collect-tuning-data.ts
@@ -5,7 +5,7 @@ import { getTracesWithRelabels, setupBigQuery } from '@codebuff/bigquery'
 import { closeXml } from '@codebuff/common/util/xml'
 
 import type { GetRelevantFilesTrace, Relabel } from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 // Get model from command line args
 const model = process.argv[2]
@@ -148,11 +148,11 @@ function compressMessagesToHistory(messages: GeminiMessage[]): string {
 
 function convertToGeminiFormat(
   system: SystemMessage[],
-  messages: CodebuffMessage[],
+  messages: Message[],
   output: string,
 ): GeminiTuningExample {
   // Handle system message
-  let allMessages: CodebuffMessage[] = [
+  let allMessages: Message[] = [
     ...messages,
     { role: 'assistant', content: output },
   ]
@@ -232,7 +232,7 @@ function convertToGeminiFormat(
 
 function convertToOpenAIFormat(
   system: SystemMessage[],
-  messages: CodebuffMessage[],
+  messages: Message[],
   output: string,
 ): OpenAITuningExample {
   // Handle system message
@@ -287,7 +287,7 @@ function writeTracesAsOpenAIData(
       try {
         return convertToOpenAIFormat(
           trace.payload.system as SystemMessage[],
-          trace.payload.messages as CodebuffMessage[],
+          trace.payload.messages as Message[],
           relabel.payload.output,
         )
       } catch (error) {
@@ -369,7 +369,7 @@ function writeTracesAsGeminiData(
         return {
           example: convertToGeminiFormat(
             trace.payload.system as SystemMessage[],
-            trace.payload.messages as CodebuffMessage[],
+            trace.payload.messages as Message[],
             relabel.payload.output,
           ),
           deterministicSample: getDeterministicSample(trace.id),
diff --git a/scripts/ft-file-selection/relabel-for-offline-scoring.ts b/scripts/ft-file-selection/relabel-for-offline-scoring.ts
index 5b766d658..87c4751d9 100644
--- a/scripts/ft-file-selection/relabel-for-offline-scoring.ts
+++ b/scripts/ft-file-selection/relabel-for-offline-scoring.ts
@@ -1,8 +1,8 @@
+import { promptAiSdk } from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
 import {
-  promptAiSdk,
-  transformMessages,
-} from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
-import { castAssistantMessage } from '@codebuff/backend/util/messages'
+  castAssistantMessage,
+  messagesWithSystem,
+} from '@codebuff/backend/util/messages'
 import {
   getTracesAndRelabelsForUser,
   insertRelabel,
@@ -24,7 +24,7 @@ import type {
   GetRelevantFilesTrace,
   Relabel,
 } from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 const isProd = process.argv.includes('--prod')
 const DATASET = isProd ? 'codebuff_data' : 'codebuff_data_dev'
@@ -166,10 +166,10 @@ async function relabelTraceForModel(
   dataset: string,
 ) {
   const payload = trace.payload as GetRelevantFilesPayload
-  const messages = payload.messages as CodebuffMessage[]
+  const messages = payload.messages as Message[]
   const system = payload.system as System
 
-  let transformedMessages = transformMessages(messages, system)
+  let transformedMessages = messagesWithSystem(messages, system)
   if (modelToTest === finetunedVertexModels.ft_filepicker_010) {
     transformedMessages = transformedMessages
       .map((msg, i) => {
diff --git a/scripts/ft-file-selection/relabel-traces.ts b/scripts/ft-file-selection/relabel-traces.ts
index 5eea059f3..1b914b03e 100644
--- a/scripts/ft-file-selection/relabel-traces.ts
+++ b/scripts/ft-file-selection/relabel-traces.ts
@@ -1,15 +1,13 @@
 import { promptFlashWithFallbacks } from '@codebuff/backend/llm-apis/gemini-with-fallbacks'
-import {
-  promptAiSdk,
-  transformMessages,
-} from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
+import { promptAiSdk } from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
+import { messagesWithSystem } from '@codebuff/backend/util/messages'
 import { getTracesWithoutRelabels, insertRelabel } from '@codebuff/bigquery'
 import { models, TEST_USER_ID } from '@codebuff/common/constants'
 import { generateCompactId } from '@codebuff/common/util/string'
 
 import type { System } from '../../backend/src/llm-apis/claude'
 import type { GetRelevantFilesPayload } from '@codebuff/bigquery'
-import type { CodebuffMessage } from '@codebuff/common/types/messages/codebuff-message'
+import type { Message } from '@codebuff/common/types/messages/codebuff-message'
 
 // Models we want to test
 const MODELS_TO_TEST = [
@@ -60,8 +58,8 @@ async function runTraces() {
 
               if (model.startsWith('claude')) {
                 output = await promptAiSdk({
-                  messages: transformMessages(
-                    messages as CodebuffMessage[],
+                  messages: messagesWithSystem(
+                    messages as Message[],
                     system as System,
                   ),
                   model: model as typeof models.openrouter_claude_sonnet_4,
@@ -72,10 +70,7 @@ async function runTraces() {
                 })
               } else {
                 output = await promptFlashWithFallbacks(
-                  transformMessages(
-                    messages as CodebuffMessage[],
-                    system as System,
-                  ),
+                  messagesWithSystem(messages as Message[], system as System),
                   {
                     model: model as typeof models.gemini2_5_pro_preview,
                     clientSessionId: 'relabel-trace-run',
diff --git a/sdk/src/run-state.ts b/sdk/src/run-state.ts
index 11fdb093e..6ae83d397 100644
--- a/sdk/src/run-state.ts
+++ b/sdk/src/run-state.ts
@@ -6,7 +6,7 @@ import { getFileTokenScores } from '../../packages/code-map/src/parse'
 
 import type { ServerAction } from '../../common/src/actions'
 import type { AgentDefinition } from '../../common/src/templates/initial-agents-dir/types/agent-definition'
-import type { CodebuffMessage } from '../../common/src/types/messages/codebuff-message'
+import type { Message } from '../../common/src/types/messages/codebuff-message'
 import type { SessionState } from '../../common/src/types/session-state'
 import type {
   CustomToolDefinitions,
@@ -210,7 +210,7 @@ export function withAdditionalMessage({
   message,
 }: {
   runState: RunState
-  message: CodebuffMessage
+  message: Message
 }): RunState {
   // Deep copy
   const newRunState = JSON.parse(JSON.stringify(runState)) as typeof runState
@@ -225,7 +225,7 @@ export function withMessageHistory({
   messages,
 }: {
   runState: RunState
-  messages: CodebuffMessage[]
+  messages: Message[]
 }): RunState {
   // Deep copy
   const newRunState = JSON.parse(JSON.stringify(runState)) as typeof runState

From 30832d27c824e9523f040dc6e48aca360e2641ea Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 11:08:27 -0700
Subject: [PATCH 06/18] fix typechecker

---
 .../cost-aggregation.integration.test.ts      |  26 +-
 .../__tests__/main-prompt.integration.test.ts |  47 ++--
 backend/src/__tests__/main-prompt.test.ts     |  38 +--
 backend/src/__tests__/read-docs-tool.test.ts  |  10 +-
 .../__tests__/run-programmatic-step.test.ts   |  76 +++---
 backend/src/__tests__/web-search-tool.test.ts |  42 +++-
 .../__tests__/parse-tool-call-xml.test.ts     | 227 ------------------
 .../__tests__/simplify-tool-results.test.ts   |   8 +-
 common/src/tools/params/tool/str-replace.ts   |   1 +
 npm-app/src/tool-handlers.ts                  |  20 +-
 sdk/src/tools/change-file.ts                  |  12 +-
 11 files changed, 167 insertions(+), 340 deletions(-)

diff --git a/backend/src/__tests__/cost-aggregation.integration.test.ts b/backend/src/__tests__/cost-aggregation.integration.test.ts
index 3fa044395..75401f40e 100644
--- a/backend/src/__tests__/cost-aggregation.integration.test.ts
+++ b/backend/src/__tests__/cost-aggregation.integration.test.ts
@@ -198,19 +198,25 @@ describe('Cost Aggregation Integration Tests', () => {
       async (ws, userInputId, toolName, input) => {
         if (toolName === 'write_file') {
           return {
-            success: true,
-            output: {
-              type: 'text' as const,
-              value: `File ${input.path} created successfully`,
-            },
+            output: [
+              {
+                type: 'json',
+                value: {
+                  message: `File ${input.path} created successfully`,
+                },
+              },
+            ],
           }
         }
         return {
-          success: true,
-          output: {
-            type: 'text' as const,
-            value: 'Tool executed successfully',
-          },
+          output: [
+            {
+              type: 'json',
+              value: {
+                message: 'Tool executed successfully',
+              },
+            },
+          ],
         }
       },
     )
diff --git a/backend/src/__tests__/main-prompt.integration.test.ts b/backend/src/__tests__/main-prompt.integration.test.ts
index 64622a455..91eb9d7ab 100644
--- a/backend/src/__tests__/main-prompt.integration.test.ts
+++ b/backend/src/__tests__/main-prompt.integration.test.ts
@@ -18,7 +18,6 @@ import * as requestFilesPrompt from '../find-files/request-files-prompt'
 import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
 import { mainPrompt } from '../main-prompt'
 import { logger } from '../util/logger'
-import { renderReadFilesResult } from '../util/parse-tool-call-xml'
 import * as websocketAction from '../websockets/websocket-action'
 
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
@@ -91,8 +90,12 @@ describe.skip('mainPrompt (Integration)', () => {
         input: Record<string, any>,
       ) => {
         return {
-          success: true,
-          result: `Tool call success: ${{ toolName, input }}` as any,
+          output: [
+            {
+              type: 'json',
+              value: `Tool call success: ${{ toolName, input }}`,
+            },
+          ],
         }
       },
     )
@@ -352,16 +355,21 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) {
         }),
       },
       {
-        role: 'user',
-        content: renderReadFilesResult(
-          [
+        role: 'tool',
+        content: {
+          type: 'tool-result',
+          toolName: 'read_files',
+          toolCallId: 'test-id',
+          output: [
             {
-              path: 'src/util/messages.ts',
-              content: initialContent,
+              type: 'json',
+              value: {
+                path: 'src/util/messages.ts',
+                content: initialContent,
+              },
             },
           ],
-          {},
-        ),
+        },
       },
     )
 
@@ -437,16 +445,21 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) {
           }),
         },
         {
-          role: 'user',
-          content: renderReadFilesResult(
-            [
+          role: 'tool',
+          content: {
+            type: 'tool-result',
+            toolName: 'read_files',
+            toolCallId: 'test-id',
+            output: [
               {
-                path: 'packages/backend/src/index.ts',
-                content: initialContent,
+                type: 'json',
+                value: {
+                  path: 'packages/backend/src/index.ts',
+                  content: initialContent,
+                },
               },
             ],
-            {},
-          ),
+          },
         },
       )
 
diff --git a/backend/src/__tests__/main-prompt.test.ts b/backend/src/__tests__/main-prompt.test.ts
index 5e3edbccc..39f92d4cf 100644
--- a/backend/src/__tests__/main-prompt.test.ts
+++ b/backend/src/__tests__/main-prompt.test.ts
@@ -5,10 +5,7 @@ import {
   clearMockedModules,
   mockModule,
 } from '@codebuff/common/testing/mock-modules'
-import {
-  getToolCallString,
-  renderToolResults,
-} from '@codebuff/common/tools/utils'
+import { getToolCallString } from '@codebuff/common/tools/utils'
 import {
   AgentTemplateTypes,
   getInitialSessionState,
@@ -159,8 +156,12 @@ describe('mainPrompt', () => {
         timeout: number = 30_000,
       ) => {
         return {
-          success: true,
-          result: `Tool call success: ${{ toolName, input }}` as any,
+          output: [
+            {
+              type: 'json',
+              value: `Tool call success: ${{ toolName, input }}`,
+            },
+          ],
         }
       },
     )
@@ -229,18 +230,21 @@ describe('mainPrompt', () => {
     const sessionState = getInitialSessionState(mockFileContext)
     // Simulate a previous read_files result being in the history
     sessionState.mainAgentState.messageHistory.push({
-      role: 'user',
-      content: renderToolResults([
-        {
-          toolCallId: 'prev-read',
-          toolName: 'read_files',
-          output: {
-            type: 'text',
-            value:
-              '<read_file>\n<path>test.txt</path>\n<content>old content</content>\n</read_file>',
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolCallId: 'prev-read',
+        toolName: 'read_files',
+        output: [
+          {
+            type: 'json',
+            value: {
+              path: 'test.txt',
+              content: 'old content',
+            },
           },
-        },
-      ]),
+        ],
+      },
     })
 
     const action = {
diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts
index 70f59ecf3..67770610d 100644
--- a/backend/src/__tests__/read-docs-tool.test.ts
+++ b/backend/src/__tests__/read-docs-tool.test.ts
@@ -19,6 +19,7 @@ import {
   test,
 } from 'bun:test'
 
+import researcherAgent from '../../../.agents/researcher'
 import * as checkTerminalCommandModule from '../check-terminal-command'
 import * as requestFilesPrompt from '../find-files/request-files-prompt'
 import * as liveUserInputs from '../live-user-inputs'
@@ -28,7 +29,6 @@ import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
 import { runAgentStep } from '../run-agent-step'
 import { assembleLocalAgentTemplates } from '../templates/agent-registry'
 import * as websocketAction from '../websockets/websocket-action'
-import researcherAgent from '../../../.agents/researcher'
 
 import type { WebSocket } from 'ws'
 
@@ -106,8 +106,12 @@ describe('read_docs tool with researcher agent', () => {
       websocketAction,
       'requestToolCall',
     ).mockImplementation(async () => ({
-      success: true,
-      result: 'Tool call success' as any,
+      output: [
+        {
+          type: 'json',
+          value: 'Tool call success',
+        },
+      ],
     }))
     mockedFunctions.push({
       name: 'websocketAction.requestToolCall',
diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index a67d522d4..bd57514ba 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -4,7 +4,6 @@ import {
   clearMockedModules,
   mockModule,
 } from '@codebuff/common/testing/mock-modules'
-import { renderToolResults } from '@codebuff/common/tools/utils'
 import { getInitialSessionState } from '@codebuff/common/types/session-state'
 import {
   afterAll,
@@ -24,16 +23,16 @@ import {
 } from '../run-programmatic-step'
 import { mockFileContext, MockWebSocket } from './test-utils'
 import * as toolExecutor from '../tools/tool-executor'
-import { asSystemMessage } from '../util/messages'
 import * as requestContext from '../websockets/request-context'
 
 import type { AgentTemplate, StepGenerator } from '../templates/types'
+import type { PublicAgentState } from '@codebuff/common/types/agent-template'
 import type {
-  AgentState,
-  ToolResult,
-} from '@codebuff/common/types/session-state'
+  ToolResultOutput,
+  ToolResultPart,
+} from '@codebuff/common/types/messages/content-part'
+import type { AgentState } from '@codebuff/common/types/session-state'
 import type { WebSocket } from 'ws'
-import { PublicAgentState } from '@codebuff/common/types/agent-template'
 
 describe('runProgrammaticStep', () => {
   let mockTemplate: AgentTemplate
@@ -302,35 +301,27 @@ describe('runProgrammaticStep', () => {
       // Mock executeToolCall to simulate find_files tool result
       executeToolCallSpy.mockImplementation(async (options: any) => {
         if (options.toolName === 'find_files') {
-          const toolResult: ToolResult = {
+          const toolResult: ToolResultPart = {
+            type: 'tool-result',
             toolName: 'find_files',
             toolCallId: 'find-files-call-id',
-            output: {
-              type: 'text',
-              value: JSON.stringify({
-                files: [
-                  { path: 'src/auth.ts', relevance: 0.9 },
-                  { path: 'src/login.ts', relevance: 0.8 },
-                ],
-              }),
-            },
+            output: [
+              {
+                type: 'json',
+                value: {
+                  files: [
+                    { path: 'src/auth.ts', relevance: 0.9 },
+                    { path: 'src/login.ts', relevance: 0.8 },
+                  ],
+                },
+              },
+            ],
           }
           options.toolResults.push(toolResult)
 
-          // Add tool result to state.messages like the real implementation
-          // This mimics what tool-executor.ts does: state.messages.push({ role: 'user', content: asSystemMessage(renderToolResults([toolResult])) })
-          const formattedToolResult = asSystemMessage(
-            renderToolResults([
-              {
-                toolName: toolResult.toolName,
-                toolCallId: toolResult.toolCallId,
-                output: toolResult.output,
-              },
-            ]),
-          )
           options.state.messages.push({
-            role: 'user',
-            content: formattedToolResult,
+            role: 'tool',
+            content: toolResult,
           })
         }
         // Return a value to satisfy the call
@@ -382,7 +373,7 @@ describe('runProgrammaticStep', () => {
 
     it('should comprehensively test STEP_ALL functionality with multiple tools and state management', async () => {
       // Track all tool results and state changes for verification
-      const toolResultsReceived: (string | undefined)[] = []
+      const toolResultsReceived: ToolResultOutput[][] = []
       const stateSnapshots: PublicAgentState[] = []
       let stepCount = 0
 
@@ -535,23 +526,22 @@ describe('runProgrammaticStep', () => {
             result = `${toolName} executed successfully`
         }
 
-        const toolResult: ToolResult = {
+        const toolResult: ToolResultPart = {
+          type: 'tool-result',
           toolName,
           toolCallId: `${toolName}-call-id`,
-          output: {
-            type: 'text',
-            value: result,
-          },
+          output: [
+            {
+              type: 'json',
+              value: result,
+            },
+          ],
         }
         toolResults.push(toolResult)
 
-        // Add tool result to state.messages like the real implementation
-        const formattedToolResult = asSystemMessage(
-          renderToolResults([toolResult]),
-        )
         state.messages.push({
           role: 'user',
-          content: formattedToolResult,
+          content: toolResult,
         })
       })
 
@@ -643,8 +633,8 @@ describe('runProgrammaticStep', () => {
     })
 
     it('should pass tool results back to generator', async () => {
-      const toolResults: ToolResult[] = []
-      let receivedToolResult: string | undefined
+      const toolResults: ToolResultPart[] = []
+      let receivedToolResult: ToolResultOutput[] | undefined
 
       const mockGenerator = (function* () {
         const input1 = yield {
@@ -673,7 +663,7 @@ describe('runProgrammaticStep', () => {
 
       await runProgrammaticStep(mockAgentState, mockParams)
 
-      expect(receivedToolResult).toEqual('file content')
+      expect(receivedToolResult).toEqual([])
     })
   })
 
diff --git a/backend/src/__tests__/web-search-tool.test.ts b/backend/src/__tests__/web-search-tool.test.ts
index 0132aba1a..57723ae05 100644
--- a/backend/src/__tests__/web-search-tool.test.ts
+++ b/backend/src/__tests__/web-search-tool.test.ts
@@ -22,6 +22,7 @@ import {
   test,
 } from 'bun:test'
 
+import researcherAgent from '../../../.agents/researcher'
 import * as checkTerminalCommandModule from '../check-terminal-command'
 import * as requestFilesPrompt from '../find-files/request-files-prompt'
 import * as liveUserInputs from '../live-user-inputs'
@@ -31,7 +32,6 @@ import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
 import { runAgentStep } from '../run-agent-step'
 import { assembleLocalAgentTemplates } from '../templates/agent-registry'
 import * as websocketAction from '../websockets/websocket-action'
-import researcherAgent from '../../../.agents/researcher'
 
 import type { WebSocket } from 'ws'
 
@@ -62,8 +62,12 @@ describe('web_search tool with researcher agent', () => {
     spyOn(websocketAction, 'requestFiles').mockImplementation(async () => ({}))
     spyOn(websocketAction, 'requestFile').mockImplementation(async () => null)
     spyOn(websocketAction, 'requestToolCall').mockImplementation(async () => ({
-      success: true,
-      result: 'Tool call success' as any,
+      output: [
+        {
+          type: 'json',
+          value: 'Tool call success',
+        },
+      ],
     }))
 
     // Mock LLM APIs
@@ -121,7 +125,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     await runAgentStep(new MockWebSocket() as unknown as WebSocket, {
       userId: TEST_USER_ID,
@@ -165,7 +171,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     const { agentState: newAgentState } = await runAgentStep(
       new MockWebSocket() as unknown as WebSocket,
@@ -227,7 +235,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     await runAgentStep(new MockWebSocket() as unknown as WebSocket, {
       userId: TEST_USER_ID,
@@ -268,7 +278,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     const { agentState: newAgentState } = await runAgentStep(
       new MockWebSocket() as unknown as WebSocket,
@@ -329,7 +341,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     const { agentState: newAgentState } = await runAgentStep(
       new MockWebSocket() as unknown as WebSocket,
@@ -386,7 +400,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     const { agentState: newAgentState } = await runAgentStep(
       new MockWebSocket() as unknown as WebSocket,
@@ -430,7 +446,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     const { agentState: newAgentState } = await runAgentStep(
       new MockWebSocket() as unknown as WebSocket,
@@ -489,7 +507,9 @@ describe('web_search tool with researcher agent', () => {
       ...sessionState.mainAgentState,
       agentType: 'researcher' as const,
     }
-    const { agentTemplates } = assembleLocalAgentTemplates(mockFileContextWithAgents)
+    const { agentTemplates } = assembleLocalAgentTemplates(
+      mockFileContextWithAgents,
+    )
 
     const { agentState: newAgentState } = await runAgentStep(
       new MockWebSocket() as unknown as WebSocket,
diff --git a/backend/src/util/__tests__/parse-tool-call-xml.test.ts b/backend/src/util/__tests__/parse-tool-call-xml.test.ts
index 258123646..e69de29bb 100644
--- a/backend/src/util/__tests__/parse-tool-call-xml.test.ts
+++ b/backend/src/util/__tests__/parse-tool-call-xml.test.ts
@@ -1,227 +0,0 @@
-import { describe, it, expect } from 'bun:test'
-
-import { parseToolCallXml } from '../parse-tool-call-xml'
-
-describe('parseToolCallXml', () => {
-  it('should parse basic key-value pairs', () => {
-    const xml = `<key1>value1</key1><key2>value2</key2>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: 'value1',
-      key2: 'value2',
-    })
-  })
-
-  it('should handle empty content', () => {
-    const xml = `<key1></key1><key2>value2</key2>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: '',
-      key2: 'value2',
-    })
-  })
-
-  it('should handle whitespace around values', () => {
-    const xml = `<key1>  value1  </key1><key2>\nvalue2\n</key2>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: 'value1',
-      key2: 'value2',
-    })
-  })
-
-  it('should handle internal whitespace', () => {
-    const xml = `<key1>value with spaces</key1>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: 'value with spaces',
-    })
-  })
-
-  it('should return an empty object for empty or whitespace-only input', () => {
-    expect(parseToolCallXml('')).toEqual({})
-    expect(parseToolCallXml('   ')).toEqual({})
-    expect(parseToolCallXml('\n\t')).toEqual({})
-  })
-
-  it('should handle special XML characters within values', () => {
-    const xml = `<key1>&lt;value1&gt;</key1><key2>"value2's"</key2><key3>&amp;value3</key3>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: '&lt;value1&gt;',
-      key2: '"value2\'s"',
-      key3: '&amp;value3',
-    })
-  })
-
-  it('should parse numbers as strings', () => {
-    const xml = `<key1>123</key1><key2>45.67</key2><key3>-8</key3>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: '123',
-      key2: '45.67',
-      key3: '-8',
-    })
-  })
-
-  it('should parse booleans as strings', () => {
-    const xml = `<key1>true</key1><key2>false</key2>`
-    expect(parseToolCallXml(xml)).toEqual({
-      key1: 'true',
-      key2: 'false',
-    })
-  })
-
-  it('should parse nested range tags as raw string content', () => {
-    const xml = `<xRange><min>100</min><max>120</max></xRange><yRange><min>200</min><max>220</max></yRange>`
-    expect(parseToolCallXml(xml)).toEqual({
-      xRange: '<min>100</min><max>120</max>',
-      yRange: '<min>200</min><max>220</max>',
-    })
-  })
-
-  it('should parse mixed types as strings', () => {
-    const xml = `<text>hello</text><number>99</number><bool>true</bool><empty></empty>`
-    expect(parseToolCallXml(xml)).toEqual({
-      text: 'hello',
-      number: '99',
-      bool: 'true',
-      empty: '',
-    })
-  })
-
-  it('should handle complex example with various types (all as strings)', () => {
-    const xml = `
-      <action>click</action>
-      <selector>#submit-button</selector>
-      <timeout>5000</timeout>
-      <force>false</force>
-      <xRange><min>50.5</min><max>75.5</max></xRange>
-      <yRange><min>100</min><max>150</max></yRange>
-      <comment>Submit the form</comment>
-    `
-    expect(parseToolCallXml(xml)).toEqual({
-      action: 'click',
-      selector: '#submit-button',
-      timeout: '5000',
-      force: 'false',
-      xRange: '<min>50.5</min><max>75.5</max>',
-      yRange: '<min>100</min><max>150</max>',
-      comment: 'Submit the form',
-    })
-  })
-
-  it('should convert boolean values', () => {
-    const xml = `
-      <waitForNavigation>true</waitForNavigation>
-      <headless>false</headless>
-    `
-    const result = parseToolCallXml(xml)
-    expect(result).toEqual({
-      waitForNavigation: 'true',
-      headless: 'false',
-    })
-  })
-
-  it('should convert numeric values', () => {
-    const xml = `
-      <delay>50</delay>
-      <quality>80.5</quality>
-      <timeout>1000</timeout>
-    `
-    const result = parseToolCallXml(xml)
-    expect(result).toEqual({
-      delay: '50',
-      quality: '80.5',
-      timeout: '1000',
-    })
-  })
-
-  it('should handle complex browser action example', () => {
-    const xml = `
-      <action>start</action>
-      <url>http://localhost:3000/test?param=value</url>
-      <waitUntil>networkidle0</waitUntil>
-      <retryOptions>
-        maxRetries: 3,
-        retryDelay: 1000,
-        retryOnErrors: ['TimeoutError', 'TargetClosedError']
-      </retryOptions>
-      <logFilter>
-        types: ['error', 'warning'],
-        minLevel: 2,
-        categories: ['network', 'console']
-      </logFilter>
-      <timeout>15000</timeout>
-      <headless>true</headless>
-    `
-    const result = parseToolCallXml(xml)
-    expect(result).toEqual({
-      action: 'start',
-      url: 'http://localhost:3000/test?param=value',
-      waitUntil: 'networkidle0',
-      retryOptions:
-        "maxRetries: 3,\n        retryDelay: 1000,\n        retryOnErrors: ['TimeoutError', 'TargetClosedError']",
-      logFilter:
-        "types: ['error', 'warning'],\n        minLevel: 2,\n        categories: ['network', 'console']",
-      timeout: '15000',
-      headless: 'true',
-    })
-  })
-
-  it('should handle multiline content with whitespace', () => {
-    const xml = `
-      <selector>
-        #main-content
-        .button-class
-        [data-test="submit"]
-      </selector>
-      <text>
-        This is a
-        multiline text
-        with preserved whitespace
-      </text>
-    `
-    const result = parseToolCallXml(xml)
-    expect(result).toEqual({
-      selector:
-        '#main-content\n        .button-class\n        [data-test="submit"]',
-      text: 'This is a\n        multiline text\n        with preserved whitespace',
-    })
-  })
-
-  it('should handle diagnostic step example', () => {
-    const xml = `
-      <action>diagnose</action>
-      <steps>
-        - Click login button
-        - Wait for form
-        - Fill credentials
-        - Submit form
-        - Verify redirect
-      </steps>
-      <automated>true</automated>
-      <maxSteps>5</maxSteps>
-      <sessionTimeoutMs>300000</sessionTimeoutMs>
-      <debug>true</debug>
-    `
-    const result = parseToolCallXml(xml)
-    expect(result).toEqual({
-      action: 'diagnose',
-      steps:
-        '- Click login button\n        - Wait for form\n        - Fill credentials\n        - Submit form\n        - Verify redirect',
-      automated: 'true',
-      maxSteps: '5',
-      sessionTimeoutMs: '300000',
-      debug: 'true',
-    })
-  })
-
-  it('should handle empty tags', () => {
-    const xml = `
-      <action>stop</action>
-      <screenshot></screenshot>
-      <debug></debug>
-    `
-    const result = parseToolCallXml(xml)
-    expect(result).toEqual({
-      action: 'stop',
-      screenshot: '',
-      debug: '',
-    })
-  })
-})
diff --git a/backend/src/util/__tests__/simplify-tool-results.test.ts b/backend/src/util/__tests__/simplify-tool-results.test.ts
index bc54284da..cfe097b6e 100644
--- a/backend/src/util/__tests__/simplify-tool-results.test.ts
+++ b/backend/src/util/__tests__/simplify-tool-results.test.ts
@@ -1,4 +1,8 @@
-import { describe } from 'bun:test'
+import { describe, expect, it } from 'bun:test'
 
-describe('simplifyReadFileResults', () => {})
+describe('simplifyReadFileResults', () => {
+  it('todo: implement unit test suite', () => {
+    expect(false).toBe(true)
+  })
+})
 describe('simplifyTerminalCommandResults', () => {})
diff --git a/common/src/tools/params/tool/str-replace.ts b/common/src/tools/params/tool/str-replace.ts
index e890efd6d..9fb2d2626 100644
--- a/common/src/tools/params/tool/str-replace.ts
+++ b/common/src/tools/params/tool/str-replace.ts
@@ -11,6 +11,7 @@ export const updateFileResultSchema = z.union([
   z.object({
     file: z.string(),
     errorMessage: z.string(),
+    patch: z.string().optional(),
   }),
 ])
 
diff --git a/npm-app/src/tool-handlers.ts b/npm-app/src/tool-handlers.ts
index 036ed8ef7..986557d48 100644
--- a/npm-app/src/tool-handlers.ts
+++ b/npm-app/src/tool-handlers.ts
@@ -44,9 +44,10 @@ export const handleUpdateFile = async <
   const lines = fileChange.content.split('\n')
 
   await waitForPreviousCheckpoint()
-  const { created, modified, ignored, invalid, patchFailed } = applyChanges(projectPath, [
-    fileChange,
-  ])
+  const { created, modified, ignored, invalid, patchFailed } = applyChanges(
+    projectPath,
+    [fileChange],
+  )
   DiffManager.addChange(fileChange)
 
   let result: CodebuffToolOutput<T>[] = []
@@ -103,9 +104,16 @@ export const handleUpdateFile = async <
     ])
   }
   for (const file of patchFailed) {
-    result.push(
-      `Failed to write to ${file}; the patch failed to apply`,
-    )
+    result.push([
+      {
+        type: 'json',
+        value: {
+          file,
+          errorMessage: `Failed to apply patch.`,
+          patch: lines.join('\n'),
+        },
+      },
+    ])
   }
   for (const file of invalid) {
     result.push([
diff --git a/sdk/src/tools/change-file.ts b/sdk/src/tools/change-file.ts
index 186edd009..2dad89f21 100644
--- a/sdk/src/tools/change-file.ts
+++ b/sdk/src/tools/change-file.ts
@@ -22,7 +22,9 @@ export function changeFile(
   const fileChange = FileChangeSchema.parse(parameters)
   const lines = fileChange.content.split('\n')
 
-  const { created, modified, invalid, patchFailed } = applyChanges(cwd, [fileChange])
+  const { created, modified, invalid, patchFailed } = applyChanges(cwd, [
+    fileChange,
+  ])
 
   const results: CodebuffToolOutput<'str_replace'>[0]['value'][] = []
 
@@ -43,9 +45,11 @@ export function changeFile(
   }
 
   for (const file of patchFailed) {
-    results.push(
-      `Failed to write to ${file}; the patch failed to apply`,
-    )
+    results.push({
+      file,
+      errorMessage: `Failed to apply patch.`,
+      patch: lines.join('\n'),
+    })
   }
 
   for (const file of invalid) {

From dc9c7d4026a128f88f28956d7a20a53cb366c4a8 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 11:39:37 -0700
Subject: [PATCH 07/18] fix a few tests

---
 backend/src/__tests__/read-docs-tool.test.ts  | 61 ++++++++-----------
 backend/src/__tests__/web-search-tool.test.ts | 61 +++++++------------
 backend/src/util/messages.ts                  | 21 +++++--
 3 files changed, 63 insertions(+), 80 deletions(-)

diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts
index 67770610d..673338ba7 100644
--- a/backend/src/__tests__/read-docs-tool.test.ts
+++ b/backend/src/__tests__/read-docs-tool.test.ts
@@ -340,15 +340,12 @@ describe('read_docs tool with researcher agent', () => {
 
     // Check that the documentation was added to the message history
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('read_docs'),
+      (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      mockDocumentation,
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain(JSON.stringify(mockDocumentation).slice(1, -1))
   }, 10000)
 
   test('should fetch documentation with topic and max_tokens', async () => {
@@ -462,15 +459,12 @@ describe('read_docs tool with researcher agent', () => {
 
     // Check that the "no documentation found" message was added
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('read_docs'),
+      (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'No documentation found for "NonExistentLibrary"',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('No documentation found for \\"NonExistentLibrary\\"')
   }, 10000)
 
   test('should handle API errors gracefully', async () => {
@@ -534,15 +528,12 @@ describe('read_docs tool with researcher agent', () => {
 
     // Check that the error message was added
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('read_docs'),
+      (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Error fetching documentation for "React"',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Error fetching documentation for \\"React\\"')
     expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
       'Network timeout',
     )
@@ -605,14 +596,13 @@ describe('read_docs tool with researcher agent', () => {
 
     // Check that the topic is included in the error message
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('read_docs'),
+      (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'No documentation found for "React" with topic "server-components"',
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain(
+      'No documentation found for \\"React\\" with topic \\"server-components\\"',
     )
   }, 10000)
 
@@ -675,17 +665,14 @@ describe('read_docs tool with researcher agent', () => {
 
     // Check that the generic error message was added
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('read_docs'),
+      (m) => m.role === 'tool' && m.content.toolName === 'read_docs',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Error fetching documentation for "React"',
-    )
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Unknown error',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Error fetching documentation for \\"React\\"')
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Unknown error')
   }, 10000)
 })
diff --git a/backend/src/__tests__/web-search-tool.test.ts b/backend/src/__tests__/web-search-tool.test.ts
index 57723ae05..89500b8e3 100644
--- a/backend/src/__tests__/web-search-tool.test.ts
+++ b/backend/src/__tests__/web-search-tool.test.ts
@@ -201,15 +201,12 @@ describe('web_search tool with researcher agent', () => {
 
     // Check that the search results were added to the message history
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('web_search'),
+      (m) => m.role === 'tool' && m.content.toolName === 'web_search',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      mockSearchResult,
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain(mockSearchResult)
   })
 
   test('should handle custom depth parameter', async () => {
@@ -309,15 +306,12 @@ describe('web_search tool with researcher agent', () => {
 
     // Check that the "no results found" message was added
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('web_search'),
+      (m) => m.role === 'tool' && m.content.toolName === 'web_search',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'No search results found',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('No search results found')
   })
 
   test('should handle API errors gracefully', async () => {
@@ -369,18 +363,15 @@ describe('web_search tool with researcher agent', () => {
 
     // Check that the error message was added
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('web_search'),
+      (m) => m.role === 'tool' && m.content.toolName === 'web_search',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Error performing web search',
-    )
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Linkup API timeout',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Error performing web search')
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Linkup API timeout')
   })
 
   test('should handle null response from searchWeb', async () => {
@@ -474,15 +465,12 @@ describe('web_search tool with researcher agent', () => {
 
     // Check that the error message was added
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('web_search'),
+      (m) => m.role === 'tool' && m.content.toolName === 'web_search',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Error performing web search',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Error performing web search')
   })
 
   test('should format search results correctly', async () => {
@@ -535,14 +523,11 @@ describe('web_search tool with researcher agent', () => {
 
     // Check that the search results were formatted correctly
     const toolResultMessages = newAgentState.messageHistory.filter(
-      (m) =>
-        m.role === 'user' &&
-        typeof m.content === 'string' &&
-        m.content.includes('web_search'),
+      (m) => m.role === 'tool' && m.content.toolName === 'web_search',
     )
     expect(toolResultMessages.length).toBeGreaterThan(0)
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      mockSearchResult,
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain(mockSearchResult)
   })
 })
diff --git a/backend/src/util/messages.ts b/backend/src/util/messages.ts
index 3b54ff2e0..b58dbffc8 100644
--- a/backend/src/util/messages.ts
+++ b/backend/src/util/messages.ts
@@ -1,6 +1,7 @@
 import { AssertionError } from 'assert'
 
 import { buildArray } from '@codebuff/common/util/array'
+import { errorToObject } from '@codebuff/common/util/object'
 import { closeXml } from '@codebuff/common/util/xml'
 import { cloneDeep, isEqual } from 'lodash'
 
@@ -270,13 +271,23 @@ export function getEditedFiles(messages: Message[]): string[] {
         },
       )
       .map((m) => {
-        const fileInfo = (
-          m as CodebuffToolMessage<'create_plan' | 'str_replace' | 'write_file'>
-        ).content.output[0].value
-        if ('errorMessage' in fileInfo) {
+        try {
+          const fileInfo = (
+            m as CodebuffToolMessage<
+              'create_plan' | 'str_replace' | 'write_file'
+            >
+          ).content.output[0].value
+          if ('errorMessage' in fileInfo) {
+            return null
+          }
+          return fileInfo.file
+        } catch (error) {
+          logger.error(
+            { error: errorToObject(error) },
+            'Error parsing file info',
+          )
           return null
         }
-        return fileInfo.file
       }),
   )
 }

From 1f5f6f1af5bce371b1bd069533e72091e01a8a63 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 12:29:30 -0700
Subject: [PATCH 08/18] fix backend unit tests

---
 backend/src/__tests__/main-prompt.test.ts     |  18 +-
 backend/src/__tests__/read-docs-tool.test.ts  |   6 +-
 .../__tests__/run-programmatic-step.test.ts   |  37 ++--
 .../spawn-agents-permissions.test.ts          |  28 +--
 .../src/tools/handlers/tool/spawn-agents.ts   |   2 +-
 backend/src/util/__tests__/messages.test.ts   | 168 ++++++++++++------
 backend/src/util/messages.ts                  |  26 ++-
 backend/src/util/simplify-tool-results.ts     |  51 ++++--
 8 files changed, 209 insertions(+), 127 deletions(-)

diff --git a/backend/src/__tests__/main-prompt.test.ts b/backend/src/__tests__/main-prompt.test.ts
index 39f92d4cf..c27c4515e 100644
--- a/backend/src/__tests__/main-prompt.test.ts
+++ b/backend/src/__tests__/main-prompt.test.ts
@@ -303,23 +303,18 @@ describe('mainPrompt', () => {
     // It's usually the message right before the final assistant response.
     const toolResultMessages =
       newSessionState.mainAgentState.messageHistory.filter(
-        (m) =>
-          m.role === 'user' &&
-          typeof m.content === 'string' &&
-          m.content.includes('<tool_result>'),
+        (m) => m.role === 'tool',
       )
 
     // Find the specific tool result message that contains file_updates
     const fileUpdateMessage = toolResultMessages.find(
-      (m) =>
-        typeof m.content === 'string' &&
-        m.content.includes('<tool>read_files</tool>'),
+      (m) => m.content.toolName === 'read_files',
     )
 
     expect(fileUpdateMessage).toBeDefined()
-    expect(fileUpdateMessage?.content).toContain('test.txt')
+    expect(JSON.stringify(fileUpdateMessage?.content)).toContain('test.txt')
     // Check that the content reflects the *new* mock content within the file_updates result
-    expect(fileUpdateMessage?.content).toContain('old content')
+    expect(JSON.stringify(fileUpdateMessage?.content)).toContain('old content')
   })
 
   it('should handle direct terminal command', async () => {
@@ -369,10 +364,7 @@ describe('mainPrompt', () => {
     // Verify that a tool result was added to message history
     const toolResultMessages =
       newSessionState.mainAgentState.messageHistory.filter(
-        (m) =>
-          m.role === 'user' &&
-          typeof m.content === 'string' &&
-          m.content.includes('<tool_result>'),
+        (m) => m.role === 'tool',
       )
     expect(toolResultMessages.length).toBeGreaterThan(0)
   })
diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts
index 673338ba7..a3343f002 100644
--- a/backend/src/__tests__/read-docs-tool.test.ts
+++ b/backend/src/__tests__/read-docs-tool.test.ts
@@ -534,9 +534,9 @@ describe('read_docs tool with researcher agent', () => {
     expect(
       JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
     ).toContain('Error fetching documentation for \\"React\\"')
-    expect(toolResultMessages[toolResultMessages.length - 1].content).toContain(
-      'Network timeout',
-    )
+    expect(
+      JSON.stringify(toolResultMessages[toolResultMessages.length - 1].content),
+    ).toContain('Network timeout')
   }, 10000)
 
   test('should include topic in error message when specified', async () => {
diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index bd57514ba..a2270d98b 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -342,13 +342,12 @@ describe('runProgrammaticStep', () => {
       // Verify tool result was added to messageHistory
       const toolMessages = result.agentState.messageHistory.filter(
         (msg) =>
-          msg.role === 'user' &&
-          typeof msg.content === 'string' &&
-          msg.content.includes('src/auth.ts'),
+          msg.role === 'tool' &&
+          JSON.stringify(msg.content.output).includes('src/auth.ts'),
       )
       expect(toolMessages).toHaveLength(1)
-      expect(toolMessages[0].content).toContain('src/auth.ts')
-      expect(toolMessages[0].content).toContain('src/login.ts')
+      expect(JSON.stringify(toolMessages[0].content)).toContain('src/auth.ts')
+      expect(JSON.stringify(toolMessages[0].content)).toContain('src/login.ts')
 
       expect(result.endTurn).toBe(true)
     })
@@ -570,9 +569,11 @@ describe('runProgrammaticStep', () => {
 
       // Verify tool results were passed back to generator
       expect(toolResultsReceived).toHaveLength(7)
-      expect(toolResultsReceived[0]).toContain('authenticate')
-      expect(toolResultsReceived[3]).toContain('auth-analysis')
-      expect(toolResultsReceived[6]).toContain('Output set successfully')
+      expect(JSON.stringify(toolResultsReceived[0])).toContain('authenticate')
+      expect(JSON.stringify(toolResultsReceived[3])).toContain('auth-analysis')
+      expect(JSON.stringify(toolResultsReceived[6])).toContain(
+        'Output set successfully',
+      )
 
       // Verify state management throughout execution
       expect(stateSnapshots).toHaveLength(7)
@@ -651,19 +652,27 @@ describe('runProgrammaticStep', () => {
       executeToolCallSpy.mockImplementation(async (options: any) => {
         if (options.toolName === 'read_files') {
           options.toolResults.push({
+            type: 'tool-result',
             toolName: 'read_files',
             toolCallId: 'test-id',
-            output: {
-              type: 'text',
-              value: 'file content',
-            },
-          })
+            output: [
+              {
+                type: 'json',
+                value: 'file content',
+              },
+            ],
+          } satisfies ToolResultPart)
         }
       })
 
       await runProgrammaticStep(mockAgentState, mockParams)
 
-      expect(receivedToolResult).toEqual([])
+      expect(receivedToolResult).toEqual([
+        {
+          type: 'json',
+          value: 'file content',
+        },
+      ])
     })
   })
 
diff --git a/backend/src/__tests__/spawn-agents-permissions.test.ts b/backend/src/__tests__/spawn-agents-permissions.test.ts
index ebcad7b9e..a8cc15741 100644
--- a/backend/src/__tests__/spawn-agents-permissions.test.ts
+++ b/backend/src/__tests__/spawn-agents-permissions.test.ts
@@ -261,7 +261,7 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Mock agent response')
+      expect(JSON.stringify(output)).toContain('Mock agent response')
       expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1)
     })
 
@@ -293,8 +293,8 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Error spawning agent')
-      expect(output).toContain(
+      expect(JSON.stringify(output)).toContain('Error spawning agent')
+      expect(JSON.stringify(output)).toContain(
         'is not allowed to spawn child agent type reviewer',
       )
       expect(mockLoopAgentSteps).not.toHaveBeenCalled()
@@ -327,8 +327,10 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Error spawning agent')
-      expect(output).toContain('Agent type nonexistent not found')
+      expect(JSON.stringify(output)).toContain('Error spawning agent')
+      expect(JSON.stringify(output)).toContain(
+        'Agent type nonexistent not found',
+      )
       expect(mockLoopAgentSteps).not.toHaveBeenCalled()
     })
 
@@ -360,7 +362,7 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Mock agent response')
+      expect(JSON.stringify(output)).toContain('Mock agent response')
       expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1)
     })
 
@@ -395,7 +397,7 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Mock agent response')
+      expect(JSON.stringify(output)).toContain('Mock agent response')
       expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1)
     })
 
@@ -427,8 +429,10 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Error spawning agent')
-      expect(output).toContain('is not allowed to spawn child agent type')
+      expect(JSON.stringify(output)).toContain('Error spawning agent')
+      expect(JSON.stringify(output)).toContain(
+        'is not allowed to spawn child agent type',
+      )
       expect(mockLoopAgentSteps).not.toHaveBeenCalled()
     })
 
@@ -474,9 +478,9 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
-      expect(output).toContain('Mock agent response') // Successful thinker spawn
-      expect(output).toContain('Error spawning agent') // Failed reviewer spawn
-      expect(output).toContain(
+      expect(JSON.stringify(output)).toContain('Mock agent response') // Successful thinker spawn
+      expect(JSON.stringify(output)).toContain('Error spawning agent') // Failed reviewer spawn
+      expect(JSON.stringify(output)).toContain(
         'is not allowed to spawn child agent type reviewer',
       )
       expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1) // Only thinker was spawned
diff --git a/backend/src/tools/handlers/tool/spawn-agents.ts b/backend/src/tools/handlers/tool/spawn-agents.ts
index dba7f04d1..7dcb8752c 100644
--- a/backend/src/tools/handlers/tool/spawn-agents.ts
+++ b/backend/src/tools/handlers/tool/spawn-agents.ts
@@ -177,7 +177,7 @@ export const handleSpawnAgents = ((params: {
         } else {
           return {
             agentType: agentTypeStr,
-            errorMessage: result.reason,
+            errorMessage: `Error spawning agent: ${result.reason}`,
           }
         }
       }),
diff --git a/backend/src/util/__tests__/messages.test.ts b/backend/src/util/__tests__/messages.test.ts
index 8adb978bd..fb8c636e7 100644
--- a/backend/src/util/__tests__/messages.test.ts
+++ b/backend/src/util/__tests__/messages.test.ts
@@ -58,53 +58,82 @@ describe('trimMessagesToFitTokenLimit', () => {
         'This is a long message that would normally be shortened but since it has no tool calls it should be preserved completely intact no matter what',
     },
     {
-      role: 'user',
-      content: [
-        // Terminal output 0 (oldest) - should be simplified
-        {
-          type: 'text',
-          text: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 0${'.'.repeat(2000)}</result>
-</tool_result>`,
-        },
-        // Terminal output 1 - should be preserved (shorter than '[Output omitted]')
-        {
-          type: 'text',
-          text: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Short output 1</result>
-</tool_result>`,
-        },
-      ],
+      // Terminal output 0 (oldest) - should be simplified
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-0',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 0${'.'.repeat(2000)}`,
+          },
+        ],
+      },
     },
-    // Terminal output 2 - should be simplified
     {
-      role: 'user',
-      content: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 2${'.'.repeat(2000)}</result>
-</tool_result>`,
+      // Terminal output 1 - should be preserved (shorter than '[Output omitted]')
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-1',
+        output: [
+          {
+            type: 'json',
+            value: `Short output 1`,
+          },
+        ],
+      },
     },
-    // Terminal output 3 - should be preserved (5th most recent)
     {
-      role: 'user',
-      content: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 3</result>
-</tool_result>`,
+      // Terminal output 2 - should be simplified
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-2',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 2${'.'.repeat(2000)}`,
+          },
+        ],
+      },
+    },
+    {
+      // Terminal output 3 - should be preserved (5th most recent)
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-3',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 3`,
+          },
+        ],
+      },
+    },
+    {
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-4',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 4`,
+          },
+        ],
+      },
     },
     {
       role: 'user',
       content: [
-        // Terminal output 4 - should be preserved (4th most recent)
-        {
-          type: 'text',
-          text: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 4</result>
-</tool_result>`,
-        },
         // Regular message - should never be shortened
         {
           type: 'image',
@@ -114,31 +143,52 @@ describe('trimMessagesToFitTokenLimit', () => {
             data: 'xyz',
           },
         },
-        // Terminal output 5 - should be preserved (3rd most recent)
-        {
-          type: 'text',
-          text: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 5</result>
-</tool_result>`,
-        },
       ],
     },
-    // Terminal output 6 - should be preserved (2nd most recent)
     {
-      role: 'user',
-      content: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 6</result>
-</tool_result>`,
+      // Terminal output 5 - should be preserved (3rd most recent)
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-5',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 5`,
+          },
+        ],
+      },
     },
-    // Terminal output 7 - should be preserved (most recent)
     {
-      role: 'user',
-      content: `<tool_result>
-<tool>run_terminal_command</tool>
-<result>Terminal output 7</result>
-</tool_result>`,
+      // Terminal output 6 - should be preserved (2nd most recent)
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-6',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 6`,
+          },
+        ],
+      },
+    },
+    {
+      // Terminal output 7 - should be preserved (most recent)
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolName: 'run_terminal_command',
+        toolCallId: 'test-id-7',
+        output: [
+          {
+            type: 'json',
+            value: `Terminal output 7`,
+          },
+        ],
+      },
     },
     // Regular message - should never be shortened
     {
diff --git a/backend/src/util/messages.ts b/backend/src/util/messages.ts
index b58dbffc8..5f0687542 100644
--- a/backend/src/util/messages.ts
+++ b/backend/src/util/messages.ts
@@ -283,7 +283,7 @@ export function getEditedFiles(messages: Message[]): string[] {
           return fileInfo.file
         } catch (error) {
           logger.error(
-            { error: errorToObject(error) },
+            { error: errorToObject(error), m },
             'Error parsing file info',
           )
           return null
@@ -307,14 +307,22 @@ export function getPreviouslyReadFiles(messages: Message[]): {
         } => m.role === 'tool' && m.content.toolName === 'read_files',
       )
       .map((m) => {
-        return (
-          m as CodebuffToolMessage<'read_files'>
-        ).content.output[0].value.map((file) => {
-          if ('contentOmittedForLength' in file) {
-            return undefined
-          }
-          return file
-        })
+        try {
+          return (
+            m as CodebuffToolMessage<'read_files'>
+          ).content.output[0].value.map((file) => {
+            if ('contentOmittedForLength' in file) {
+              return undefined
+            }
+            return file
+          })
+        } catch (error) {
+          logger.error(
+            { error: errorToObject(error), m },
+            'Error parsing read_files output from message',
+          )
+          return []
+        }
       }),
   )
 }
diff --git a/backend/src/util/simplify-tool-results.ts b/backend/src/util/simplify-tool-results.ts
index 45c223157..06987c0ba 100644
--- a/backend/src/util/simplify-tool-results.ts
+++ b/backend/src/util/simplify-tool-results.ts
@@ -1,5 +1,8 @@
+import { errorToObject } from '@codebuff/common/util/object'
 import { cloneDeep } from 'lodash'
 
+import { logger } from './logger'
+
 import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
 
 export function simplifyReadFileResults(
@@ -21,21 +24,37 @@ export function simplifyReadFileResults(
 export function simplifyTerminalCommandResults(
   messageContent: CodebuffToolOutput<'run_terminal_command'>,
 ): CodebuffToolOutput<'run_terminal_command'> {
-  const clone = cloneDeep(messageContent)
-  const content = clone[0].value
-  if ('processId' in content || 'errorMessage' in content) {
-    return clone
-  }
-  const { command, message, exitCode } = content
-  return [
-    {
-      type: 'json',
-      value: {
-        command,
-        message,
-        stdoutOmittedForLength: true,
-        ...(exitCode !== undefined && { exitCode }),
+  try {
+    const clone = cloneDeep(messageContent)
+    const content = clone[0].value
+    if ('processId' in content || 'errorMessage' in content) {
+      return clone
+    }
+    const { command, message, exitCode } = content
+    return [
+      {
+        type: 'json',
+        value: {
+          command,
+          ...(message && { message }),
+          stdoutOmittedForLength: true,
+          ...(exitCode !== undefined && { exitCode }),
+        },
       },
-    },
-  ]
+    ]
+  } catch (error) {
+    logger.error(
+      { error: errorToObject(error), messageContent },
+      'Error simplifying terminal command results',
+    )
+    return [
+      {
+        type: 'json',
+        value: {
+          command: '',
+          stdoutOmittedForLength: true,
+        },
+      },
+    ]
+  }
 }

From 184a8a9d3c932487645bc4356be932a1887a4e3d Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 12:42:44 -0700
Subject: [PATCH 09/18] add simplify-tool-results tests

---
 .../__tests__/simplify-tool-results.test.ts   | 370 +++++++++++++++++-
 1 file changed, 366 insertions(+), 4 deletions(-)

diff --git a/backend/src/util/__tests__/simplify-tool-results.test.ts b/backend/src/util/__tests__/simplify-tool-results.test.ts
index cfe097b6e..eedb1b749 100644
--- a/backend/src/util/__tests__/simplify-tool-results.test.ts
+++ b/backend/src/util/__tests__/simplify-tool-results.test.ts
@@ -1,8 +1,370 @@
-import { describe, expect, it } from 'bun:test'
+import {
+  afterEach,
+  beforeEach,
+  describe,
+  expect,
+  it,
+  mock,
+  spyOn,
+} from 'bun:test'
+
+import {
+  simplifyReadFileResults,
+  simplifyTerminalCommandResults,
+} from '../simplify-tool-results'
+import * as logger from '../logger'
+
+import type { CodebuffToolOutput } from '@codebuff/common/tools/list'
 
 describe('simplifyReadFileResults', () => {
-  it('todo: implement unit test suite', () => {
-    expect(false).toBe(true)
+  it('should simplify read file results by omitting content', () => {
+    const input: CodebuffToolOutput<'read_files'> = [
+      {
+        type: 'json',
+        value: [
+          {
+            path: 'src/file1.ts',
+            content: 'const x = 1;\nconsole.log(x);',
+            referencedBy: { 'file2.ts': ['line 5'] },
+          },
+          {
+            path: 'src/file2.ts',
+            content:
+              'import { x } from "./file1";\nfunction test() { return x; }',
+          },
+        ],
+      },
+    ]
+
+    const result = simplifyReadFileResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: [
+          {
+            path: 'src/file1.ts',
+            contentOmittedForLength: true,
+          },
+          {
+            path: 'src/file2.ts',
+            contentOmittedForLength: true,
+          },
+        ],
+      },
+    ])
+  })
+
+  it('should handle empty file results', () => {
+    const input: CodebuffToolOutput<'read_files'> = [
+      {
+        type: 'json',
+        value: [],
+      },
+    ]
+
+    const result = simplifyReadFileResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: [],
+      },
+    ])
+  })
+
+  it('should handle files with contentOmittedForLength already set', () => {
+    const input: CodebuffToolOutput<'read_files'> = [
+      {
+        type: 'json',
+        value: [
+          {
+            path: 'src/file1.ts',
+            contentOmittedForLength: true,
+          },
+        ],
+      },
+    ]
+
+    const result = simplifyReadFileResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: [
+          {
+            path: 'src/file1.ts',
+            contentOmittedForLength: true,
+          },
+        ],
+      },
+    ])
+  })
+
+  it('should not mutate the original input', () => {
+    const originalInput: CodebuffToolOutput<'read_files'> = [
+      {
+        type: 'json',
+        value: [
+          {
+            path: 'src/file1.ts',
+            content: 'const x = 1;',
+          },
+        ],
+      },
+    ]
+    const input = structuredClone(originalInput)
+
+    simplifyReadFileResults(input)
+
+    // Original input should be unchanged
+    expect(input).toEqual(originalInput)
+  })
+})
+
+describe('simplifyTerminalCommandResults', () => {
+  beforeEach(() => {
+    // Mock the logger.error function directly
+    spyOn(logger.logger, 'error').mockImplementation(() => {})
+  })
+
+  afterEach(() => {
+    mock.restore()
+  })
+
+  it('should simplify terminal command results with stdout', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          startingCwd: '/project',
+          message: 'Tests completed',
+          stderr: '',
+          stdout: 'Test suite passed\n✓ All tests passed',
+          exitCode: 0,
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          message: 'Tests completed',
+          stdoutOmittedForLength: true,
+          exitCode: 0,
+        },
+      },
+    ])
+  })
+
+  it('should simplify terminal command results without message', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'ls -la',
+          stdout: 'file1.txt\nfile2.txt',
+          exitCode: 0,
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: 'ls -la',
+          stdoutOmittedForLength: true,
+          exitCode: 0,
+        },
+      },
+    ])
+  })
+
+  it('should simplify terminal command results without exitCode', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'echo hello',
+          stdout: 'hello',
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: 'echo hello',
+          stdoutOmittedForLength: true,
+        },
+      },
+    ])
+  })
+
+  it('should handle background process results without simplification', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'npm start',
+          processId: 12345,
+          backgroundProcessStatus: 'running' as const,
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual(input)
+  })
+
+  it('should handle error message results without simplification', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'invalid-command',
+          errorMessage: 'Command not found',
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual(input)
+  })
+
+  it('should handle results that already have stdoutOmittedForLength', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          message: 'Tests completed',
+          stdoutOmittedForLength: true,
+          exitCode: 0,
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          message: 'Tests completed',
+          stdoutOmittedForLength: true,
+          exitCode: 0,
+        },
+      },
+    ])
+  })
+
+  it('should handle errors gracefully and return fallback result', () => {
+    // Create input that will cause an error during processing
+    const malformedInput = {
+      invalidStructure: true,
+    } as any
+
+    const result = simplifyTerminalCommandResults(malformedInput)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: '',
+          stdoutOmittedForLength: true,
+        },
+      },
+    ])
+
+    // Verify error was logged
+    expect(logger.logger.error).toHaveBeenCalled()
+  })
+
+  it('should not mutate the original input', () => {
+    const originalInput: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          stdout: 'Test output',
+          exitCode: 0,
+        },
+      },
+    ]
+    const input = structuredClone(originalInput)
+
+    simplifyTerminalCommandResults(input)
+
+    // Original input should be unchanged
+    expect(input).toEqual(originalInput)
+  })
+
+  it('should handle terminal command with stderr', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          stderr: 'Warning: deprecated package',
+          stdout: 'Tests passed',
+          exitCode: 0,
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: 'npm test',
+          stdoutOmittedForLength: true,
+          exitCode: 0,
+        },
+      },
+    ])
+  })
+
+  it('should handle terminal command with startingCwd', () => {
+    const input: CodebuffToolOutput<'run_terminal_command'> = [
+      {
+        type: 'json',
+        value: {
+          command: 'pwd',
+          startingCwd: '/home/user/project',
+          stdout: '/home/user/project',
+          exitCode: 0,
+        },
+      },
+    ]
+
+    const result = simplifyTerminalCommandResults(input)
+
+    expect(result).toEqual([
+      {
+        type: 'json',
+        value: {
+          command: 'pwd',
+          stdoutOmittedForLength: true,
+          exitCode: 0,
+        },
+      },
+    ])
   })
 })
-describe('simplifyTerminalCommandResults', () => {})

From 3be343160fce58b2d34ab8687305d4e718b76f4e Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 13:51:17 -0700
Subject: [PATCH 10/18] fix npm-app tests

---
 npm-app/src/__tests__/tool-handlers.test.ts   |  15 +-
 npm-app/src/background-process-manager.ts     |  10 +-
 .../background-process-manager.test.ts.snap   | 204 +++++++++---------
 .../background-process-manager.test.ts        |   2 +-
 4 files changed, 118 insertions(+), 113 deletions(-)

diff --git a/npm-app/src/__tests__/tool-handlers.test.ts b/npm-app/src/__tests__/tool-handlers.test.ts
index 515bade33..0f0abafbe 100644
--- a/npm-app/src/__tests__/tool-handlers.test.ts
+++ b/npm-app/src/__tests__/tool-handlers.test.ts
@@ -83,11 +83,9 @@ export interface TestInterface {
       cwd: '__tests__/data',
     }
 
-    const result = await handleCodeSearch(parameters, 'test-id')
+    await handleCodeSearch(parameters, 'test-id')
 
     expect(mockGetProjectRoot).toHaveBeenCalled()
-    expect(typeof result).toBe('string')
-    expect(result.length).toBeGreaterThan(0)
   })
 
   test('handles basic search without cwd', async () => {
@@ -97,7 +95,7 @@ export interface TestInterface {
 
     const result = await handleCodeSearch(parameters, 'test-id')
 
-    expect(typeof result).toBe('string')
+    expect(result[0].value).toContainKey('message')
   })
 
   test('finds specific content in test file', async () => {
@@ -109,9 +107,10 @@ export interface TestInterface {
     const result = await handleCodeSearch(parameters, 'test-id')
 
     expect(mockGetProjectRoot).toHaveBeenCalled()
-    expect(typeof result).toBe('string')
-    expect(result).toContain('UNIQUE_SEARCH_STRING_12345')
-    expect(result).toContain('test-content.js')
+    expect((result[0].value as any).stdout).toContain(
+      'UNIQUE_SEARCH_STRING_12345',
+    )
+    expect((result[0].value as any).stdout).toContain('test-content.js')
   })
 
   test('searches with case-insensitive flag', async () => {
@@ -123,6 +122,6 @@ export interface TestInterface {
 
     const result = await handleCodeSearch(parameters, 'test-id')
 
-    expect(result).toContain('findme_xyz789')
+    expect((result[0].value as any).stdout).toContain('findme_xyz789')
   })
 })
diff --git a/npm-app/src/background-process-manager.ts b/npm-app/src/background-process-manager.ts
index c67663d2c..96f9227a1 100644
--- a/npm-app/src/background-process-manager.ts
+++ b/npm-app/src/background-process-manager.ts
@@ -88,14 +88,16 @@ function getOutputWithContext(
  * Formats a single background process's info into a string
  */
 export function getBackgroundProcessUpdate(info: BackgroundProcessInfo) {
+  const previousStdoutLength = info.lastReportedStdoutLength
   const newStdout = info.stdoutBuffer
     .join('')
     .slice(info.lastReportedStdoutLength)
-  info.lastReportedStdoutLength = newStdout.length
+  info.lastReportedStdoutLength += newStdout.length
+  const previousStderrLength = info.lastReportedStderrLength
   const newStderr = info.stderrBuffer
     .join('')
     .slice(info.lastReportedStderrLength)
-  info.lastReportedStderrLength = newStderr.length
+  info.lastReportedStderrLength += newStderr.length
 
   // Only report finished processes if there are changes
   const newStatus = info.status
@@ -122,7 +124,7 @@ export function getBackgroundProcessUpdate(info: BackgroundProcessInfo) {
     ...(newStdout
       ? {
           stdout: truncateStringWithMessage({
-            str: getOutputWithContext(newStdout, info.lastReportedStdoutLength),
+            str: getOutputWithContext(newStdout, previousStdoutLength),
             maxLength: COMMAND_OUTPUT_LIMIT,
             remove: 'START',
           }),
@@ -131,7 +133,7 @@ export function getBackgroundProcessUpdate(info: BackgroundProcessInfo) {
     ...(newStderr
       ? {
           stderr: truncateStringWithMessage({
-            str: getOutputWithContext(newStderr, info.lastReportedStderrLength),
+            str: getOutputWithContext(newStderr, previousStderrLength),
             maxLength: COMMAND_OUTPUT_LIMIT,
             remove: 'START',
           }),
diff --git a/npm-app/src/utils/__tests__/__snapshots__/background-process-manager.test.ts.snap b/npm-app/src/utils/__tests__/__snapshots__/background-process-manager.test.ts.snap
index 17ddbdc30..e54171fb4 100644
--- a/npm-app/src/utils/__tests__/__snapshots__/background-process-manager.test.ts.snap
+++ b/npm-app/src/utils/__tests__/__snapshots__/background-process-manager.test.ts.snap
@@ -1,137 +1,141 @@
 // Bun Snapshot v1, https://goo.gl/fbAQLP
 
 exports[`getBackgroundProcessInfoString formats a running process correctly 1`] = `
-"<background_process>
-<process_id>123</process_id>
-<command>npm test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>2000</duration_ms>
-<stdout>test output</stdout>
-<stderr>test error</stderr>
-<status>running</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "running",
+  "command": "npm test",
+  "durationMs": 2000,
+  "processId": 123,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stderr": "test error",
+  "stdout": "test output",
+}
 `;
 
 exports[`getBackgroundProcessInfoString formats a completed process correctly 1`] = `
-"<background_process>
-<process_id>456</process_id>
-<command>npm build</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<stdout>build successful</stdout>
-<status>completed</status>
-<exit_code>0</exit_code>
-</background_process>"
+{
+  "backgroundProcessStatus": "completed",
+  "command": "npm build",
+  "durationMs": 1000,
+  "exitCode": 0,
+  "processId": 456,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stdout": "build successful",
+}
 `;
 
 exports[`getBackgroundProcessInfoString formats an errored process correctly 1`] = `
-"<background_process>
-<process_id>789</process_id>
-<command>invalid-command</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1500</duration_ms>
-<stderr>command not found</stderr>
-<status>error</status>
-<exit_code>1</exit_code>
-<signal_code>SIGTERM</signal_code>
-</background_process>"
+{
+  "backgroundProcessStatus": "error",
+  "command": "invalid-command",
+  "durationMs": 1500,
+  "exitCode": 1,
+  "processId": 789,
+  "signalCode": "SIGTERM",
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stderr": "command not found",
+}
 `;
 
 exports[`getBackgroundProcessInfoString handles new output since last report 1`] = `
-"<background_process>
-<process_id>102</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<stdout>[PREVIOUS OUTPUT]
- more output</stdout>
-<status>completed</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "completed",
+  "command": "echo test",
+  "durationMs": 1000,
+  "processId": 102,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stdout": 
+"[PREVIOUS OUTPUT]
+ more output"
+,
+}
 `;
 
 exports[`getBackgroundProcessInfoString handles no new content 1`] = `
-"<background_process>
-<process_id>103</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<status>running</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "running",
+  "command": "echo test",
+  "durationMs": 1000,
+  "processId": 103,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+}
 `;
 
 exports[`getBackgroundProcessInfoString handles new stderr without when no previous stderr 1`] = `
-"<background_process>
-<process_id>104</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<stderr>new error</stderr>
-<status>error</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "error",
+  "command": "echo test",
+  "durationMs": 1000,
+  "processId": 104,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stderr": "new error",
+}
 `;
 
 exports[`getBackgroundProcessInfoString handles new stdout without when no previous stdout 1`] = `
-"<background_process>
-<process_id>105</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>2000</duration_ms>
-<stdout>first output</stdout>
-<status>running</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "running",
+  "command": "echo test",
+  "durationMs": 2000,
+  "processId": 105,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stdout": "first output",
+}
 `;
 
 exports[`getBackgroundProcessInfoString reports completed process with new stderr even if stdout unchanged 1`] = `
-"<background_process>
-<process_id>106</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<stderr>new error</stderr>
-<status>completed</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "completed",
+  "command": "echo test",
+  "durationMs": 1000,
+  "processId": 106,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stderr": "new error",
+}
 `;
 
 exports[`getBackgroundProcessInfoString reports completed process with new stdout even if stderr unchanged 1`] = `
-"<background_process>
-<process_id>107</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<stdout>[PREVIOUS OUTPUT]
- more</stdout>
-<status>completed</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "completed",
+  "command": "echo test",
+  "durationMs": 1000,
+  "processId": 107,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stdout": 
+"[PREVIOUS OUTPUT]
+ more"
+,
+}
 `;
 
 exports[`getBackgroundProcessInfoString reports process when status changes even without output changes 1`] = `
-"<background_process>
-<process_id>108</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1000</duration_ms>
-<status>completed</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "completed",
+  "command": "echo test",
+  "durationMs": 1000,
+  "processId": 108,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+}
 `;
 
 exports[`getBackgroundProcessInfoString calculates duration from endTime when available 1`] = `
-"<background_process>
-<process_id>109</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>1500</duration_ms>
-<stdout>test</stdout>
-<status>completed</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "completed",
+  "command": "echo test",
+  "durationMs": 1500,
+  "processId": 109,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stdout": "test",
+}
 `;
 
 exports[`getBackgroundProcessInfoString calculates duration from current time when no endTime 1`] = `
-"<background_process>
-<process_id>110</process_id>
-<command>echo test</command>
-<start_time_utc>1970-01-01T00:00:01.000Z</start_time_utc>
-<duration_ms>2000</duration_ms>
-<stdout>test</stdout>
-<status>running</status>
-</background_process>"
+{
+  "backgroundProcessStatus": "running",
+  "command": "echo test",
+  "durationMs": 2000,
+  "processId": 110,
+  "startTimeUtc": "1970-01-01T00:00:01.000Z",
+  "stdout": "test",
+}
 `;
diff --git a/npm-app/src/utils/__tests__/background-process-manager.test.ts b/npm-app/src/utils/__tests__/background-process-manager.test.ts
index 0f2929e08..956c1e243 100644
--- a/npm-app/src/utils/__tests__/background-process-manager.test.ts
+++ b/npm-app/src/utils/__tests__/background-process-manager.test.ts
@@ -144,7 +144,7 @@ if (!isCI) {
         }
 
         const result = getBackgroundProcessUpdate(info)
-        expect(result).toBe({} as any)
+        expect(Boolean(result)).toBeFalse()
       })
 
       test('handles new output since last report', () => {

From 5770c0bf4fda8341a0128f2a3aafe63b2340601d Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 14:59:51 -0700
Subject: [PATCH 11/18] expire userPrompt messages after each loopAgentSteps

---
 backend/src/run-agent-step.ts | 10 +++++++++-
 common/src/util/messages.ts   | 30 +++++++++++++++---------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index a4116c176..03054d120 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -493,7 +493,7 @@ export const loopAgentSteps = async (
 
   // Build the initial message history with user prompt and instructions
   const initialMessages = buildArray<Message>(
-    ...agentState.messageHistory.map((m) => ({
+    agentState.messageHistory.map((m) => ({
       ...m,
       keepDuringTruncation: false,
     })),
@@ -578,6 +578,10 @@ export const loopAgentSteps = async (
 
       // End turn if programmatic step ended turn, or if the previous runAgentStep ended turn
       if (shouldEndTurn) {
+        currentAgentState.messageHistory = expireMessages(
+          currentAgentState.messageHistory,
+          'userPrompt',
+        )
         return {
           agentState: currentAgentState,
         }
@@ -605,6 +609,10 @@ export const loopAgentSteps = async (
       currentParams = undefined
     }
 
+    currentAgentState.messageHistory = expireMessages(
+      currentAgentState.messageHistory,
+      'userPrompt',
+    )
     return { agentState: currentAgentState }
   } catch (error) {
     // Log the error but still return the state with partial costs
diff --git a/common/src/util/messages.ts b/common/src/util/messages.ts
index 67229b4e6..debb58338 100644
--- a/common/src/util/messages.ts
+++ b/common/src/util/messages.ts
@@ -87,7 +87,7 @@ function userToCodebuffMessage(
     content: Exclude<UserMessage['content'], string>[number]
   },
 ): NonStringContent<UserMessage> {
-  return { ...message, content: [message.content] }
+  return cloneDeep({ ...message, content: [message.content] })
 }
 
 function assistantToCodebuffMessage(
@@ -96,7 +96,7 @@ function assistantToCodebuffMessage(
   },
 ): NonStringContent<AssistantMessage> {
   if (message.content.type === 'tool-call') {
-    return {
+    return cloneDeep({
       ...message,
       content: [
         {
@@ -108,9 +108,9 @@ function assistantToCodebuffMessage(
           ),
         },
       ],
-    }
+    })
   }
-  return { ...message, content: [message.content] }
+  return cloneDeep({ ...message, content: [message.content] })
 }
 
 function toolToCodebuffMessage(
@@ -123,7 +123,7 @@ function toolToCodebuffMessage(
         toolCallId: message.content.toolCallId,
         output: o.value,
       }
-      return {
+      return cloneDeep({
         ...message,
         role: 'user',
         content: [
@@ -132,14 +132,14 @@ function toolToCodebuffMessage(
             text: `<tool_result>\n${JSON.stringify(toolResult, null, 2)}\n</tool_result>`,
           },
         ],
-      } satisfies NonStringContent<UserMessage>
+      } satisfies NonStringContent<UserMessage>)
     }
     if (o.type === 'media') {
-      return {
+      return cloneDeep({
         ...message,
         role: 'user',
         content: [{ type: 'file', data: o.data, mediaType: o.mediaType }],
-      } satisfies NonStringContent<UserMessage>
+      } satisfies NonStringContent<UserMessage>)
     }
     o satisfies never
     const oAny = o as any
@@ -155,14 +155,14 @@ function convertToolMessages(
   | NonStringContent<AssistantMessage>
 > {
   if (message.role === 'system') {
-    return message
+    return cloneDeep(message)
   }
   if (message.role === 'user') {
     if (typeof message.content === 'string') {
-      return {
+      return cloneDeep({
         ...message,
         content: [{ type: 'text' as const, text: message.content }],
-      }
+      })
     }
     return message.content.map((c) => {
       return userToCodebuffMessage({
@@ -173,10 +173,10 @@ function convertToolMessages(
   }
   if (message.role === 'assistant') {
     if (typeof message.content === 'string') {
-      return {
+      return cloneDeep({
         ...message,
         content: [{ type: 'text' as const, text: message.content }],
-      }
+      })
     }
     return message.content.map((c) => {
       return assistantToCodebuffMessage({
@@ -211,8 +211,8 @@ export function convertCbToModelMessages({
 
     const lastMessage = aggregated[aggregated.length - 1]
     if (
-      lastMessage.keepDuringTruncation !== message.keepDuringTruncation &&
-      lastMessage.timeToLive !== message.timeToLive &&
+      lastMessage.keepDuringTruncation !== message.keepDuringTruncation ||
+      lastMessage.timeToLive !== message.timeToLive ||
       !isEqual(lastMessage.providerOptions, message.providerOptions)
     ) {
       aggregated.push(message)

From 7ccba433561174bee08cc631230360513942695c Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 16:21:20 -0700
Subject: [PATCH 12/18] tweak agents

---
 .agents/__tests__/context-pruner.test.ts      | 318 ++++++++++++------
 .agents/base2/base2-factory.ts                |   4 +-
 .agents/base2/editor.ts                       |   2 +-
 .agents/changes-reviewer.ts                   |   1 +
 .agents/context-pruner.ts                     | 126 +++----
 .agents/factory/base.ts                       |   1 +
 .agents/git-committer.ts                      |   2 +
 .agents/package.json                          |   3 +
 .agents/types/agent-definition.ts             |  22 +-
 .agents/types/tools.ts                        |  12 +-
 .../__tests__/run-programmatic-step.test.ts   |   2 +
 backend/src/run-agent-step.ts                 |  42 ++-
 backend/src/run-programmatic-step.ts          |  13 +-
 .../tools/handlers/tool/spawn-agent-inline.ts |   1 +
 .../tools/handlers/tool/spawn-agent-utils.ts  |   3 +
 bun.lock                                      |   5 +-
 .../examples/02-intermediate-git-committer.ts |   2 +
 .../types/agent-definition.ts                 |  22 +-
 common/src/util/messages.ts                   |   2 +
 sdk/src/index.ts                              |   4 +
 20 files changed, 360 insertions(+), 227 deletions(-)

diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
index 69940bfac..790e9d871 100644
--- a/.agents/__tests__/context-pruner.test.ts
+++ b/.agents/__tests__/context-pruner.test.ts
@@ -1,6 +1,8 @@
 import { describe, test, expect, beforeEach } from 'bun:test'
+
 import contextPruner from '../context-pruner'
-import type { Message } from '../types/agent-definition'
+
+import type { Message } from '@codebuff/sdk'
 
 describe('context-pruner handleSteps', () => {
   let mockAgentState: any
@@ -11,11 +13,57 @@ describe('context-pruner handleSteps', () => {
     }
   })
 
-  const createMessage = (role: 'user' | 'assistant', content: string): Message => ({
+  const createMessage = (
+    role: 'user' | 'assistant',
+    content: string,
+  ): Message => ({
     role,
     content,
   })
 
+  const createTerminalToolMessage = (
+    command: string,
+    output: string,
+    exitCode?: number,
+  ): any => ({
+    role: 'tool',
+    content: {
+      type: 'tool-result',
+      toolCallId: 'test-id',
+      toolName: 'run_terminal_command',
+      output: [
+        {
+          type: 'json',
+          value: {
+            command,
+            stdout: output,
+            ...(exitCode !== undefined && { exitCode }),
+          },
+        },
+      ],
+    },
+  })
+
+  const createLargeToolMessage = (
+    toolName: string,
+    largeData: string,
+  ): any => ({
+    role: 'tool',
+    content: {
+      type: 'tool-result',
+      toolCallId: 'test-id',
+      toolName,
+      output: [
+        {
+          type: 'json',
+          value: {
+            data: largeData,
+          },
+        },
+      ],
+    },
+  })
+
   const runHandleSteps = (messages: Message[]) => {
     mockAgentState.messageHistory = messages
     const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
@@ -50,7 +98,10 @@ describe('context-pruner handleSteps', () => {
   test('removes spawn_agent_inline call for context-pruner and following messages', () => {
     const messages = [
       createMessage('user', 'Hello'),
-      createMessage('assistant', 'I will spawn the context-pruner agent.\n\n<codebuff_tool_call>\n{\n  "cb_tool_name": "spawn_agent_inline",\n  "agent_type": "context-pruner"\n}\n</codebuff_tool_call>'),
+      createMessage(
+        'assistant',
+        'I will spawn the context-pruner agent.\n\n<codebuff_tool_call>\n{\n  "cb_tool_name": "spawn_agent_inline",\n  "agent_type": "context-pruner"\n}\n</codebuff_tool_call>',
+      ),
       createMessage('user', '{"params": {"maxContextLength": 100000}}'),
       createMessage('user', 'Tools and instructions'),
     ]
@@ -77,7 +128,10 @@ describe('context-pruner handleSteps', () => {
   test('handles context-pruner spawn call without enough following messages', () => {
     const messages = [
       createMessage('user', 'Hello'),
-      createMessage('assistant', 'I will spawn the context-pruner agent.\n\n<codebuff_tool_call>\n{\n  "cb_tool_name": "spawn_agent_inline",\n  "agent_type": "context-pruner"\n}\n</codebuff_tool_call>'),
+      createMessage(
+        'assistant',
+        'I will spawn the context-pruner agent.\n\n<codebuff_tool_call>\n{\n  "cb_tool_name": "spawn_agent_inline",\n  "agent_type": "context-pruner"\n}\n</codebuff_tool_call>',
+      ),
       createMessage('user', '{"params": {"maxContextLength": 100000}}'),
     ]
 
@@ -86,21 +140,24 @@ describe('context-pruner handleSteps', () => {
     expect(results).toHaveLength(1)
     // Should preserve all messages since there aren't 3 messages to remove
     expect(results[0].input.messages).toHaveLength(1)
-
   })
 
   test('removes old terminal command results while keeping recent 5', () => {
     // Create content large enough to exceed 200k token limit (~600k chars)
     const largeContent = 'x'.repeat(150000)
-    
+
     const messages = [
       createMessage('user', largeContent),
       createMessage('assistant', largeContent),
       createMessage('user', largeContent),
       createMessage('assistant', largeContent),
       // 7 terminal commands (should keep last 5, simplify first 2)
-      ...Array.from({ length: 7 }, (_, i) => 
-        createMessage('assistant', `Command ${i + 1}\n<tool_result><tool>run_terminal_command</tool><result>Large output ${i + 1}: ${'y'.repeat(1000)}</result></tool_result>`)
+      ...Array.from({ length: 7 }, (_, i) =>
+        createTerminalToolMessage(
+          `command-${i + 1}`,
+          `Large output ${i + 1}: ${'y'.repeat(1000)}`,
+          0,
+        ),
       ),
     ]
 
@@ -108,84 +165,104 @@ describe('context-pruner handleSteps', () => {
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
-    // Check that first 2 terminal commands are simplified (should have been replaced)
-    const firstTerminalMessage = resultMessages.find((m: any) => 
-      typeof m.content === 'string' && m.content.includes('Command 1')
+
+    // Check that first 2 terminal commands are simplified
+    const firstTerminalMessage = resultMessages.find(
+      (m: any) =>
+        m.role === 'tool' &&
+        m.content?.toolName === 'run_terminal_command' &&
+        m.content?.output?.[0]?.value?.command === 'command-1',
     )
-    expect(firstTerminalMessage?.content).toContain('[Output omitted]')
-    
+    expect(
+      firstTerminalMessage?.content?.output?.[0]?.value?.stdoutOmittedForLength,
+    ).toBe(true)
+
     // Check that recent terminal commands are preserved (but may be processed by large tool result pass)
-    const recentTerminalMessage = resultMessages.find((m: any) => 
-      typeof m.content === 'string' && m.content.includes('Command 7')
+    const recentTerminalMessage = resultMessages.find(
+      (m: any) =>
+        m.role === 'tool' &&
+        m.content?.toolName === 'run_terminal_command' &&
+        (m.content?.output?.[0]?.value?.command === 'command-7' ||
+          m.content?.output?.[0]?.value?.message ===
+            '[Large tool result omitted]'),
     )
-    // The recent message should exist, but if it's large, it may get processed by Pass 2
     expect(recentTerminalMessage).toBeDefined()
-    expect(recentTerminalMessage?.content).toContain('Command 7')
   })
 
   test('removes large tool results', () => {
     // Create content large enough to exceed 200k token limit (~600k chars) to trigger terminal pass
     const largeContent = 'z'.repeat(150000)
-    const largeToolResult = 'x'.repeat(2000) // > 1000 chars
-    
+    const largeToolData = 'x'.repeat(2000) // > 1000 chars when stringified
+
     const messages = [
       createMessage('user', largeContent),
       createMessage('assistant', largeContent),
       createMessage('user', largeContent),
       createMessage('assistant', largeContent),
-      // Message with large tool result (total message > 1000 chars)
-      createMessage('assistant', `Some text before <tool_result><tool>read_files</tool><result>${largeToolResult}</result></tool_result> some text after`),
-      createMessage('assistant', `<tool_result><tool>code_search</tool><result>Small result</result></tool_result>`),
+      // Message with large tool result
+      createLargeToolMessage('read_files', largeToolData),
+      createLargeToolMessage('code_search', 'Small result'),
     ]
 
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
+
     // Large tool result should be simplified
-    const largeResultMessage = resultMessages.find((m: any) => 
-      typeof m.content === 'string' && m.content.includes('read_files')
+    const largeResultMessage = resultMessages.find(
+      (m: any) => m.role === 'tool' && m.content?.toolName === 'read_files',
+    )
+    expect(largeResultMessage?.content?.output?.[0]?.value?.message).toBe(
+      '[Large tool result omitted]',
     )
-    expect(largeResultMessage?.content).toContain('[Large tool result omitted]')
-    
+
     // Small tool result should be preserved
-    const smallResultMessage = resultMessages.find((m: any) => 
-      typeof m.content === 'string' && m.content.includes('Small result')
+    const smallResultMessage = resultMessages.find(
+      (m: any) => m.role === 'tool' && m.content?.toolName === 'code_search',
+    )
+    expect(smallResultMessage?.content?.output?.[0]?.value?.data).toBe(
+      'Small result',
     )
-    expect(smallResultMessage?.content).toContain('Small result')
   })
 
   test('performs message-level pruning when other passes are insufficient', () => {
     // Create many large messages to exceed token limit
     const largeContent = 'z'.repeat(50000)
-    
-    const messages = Array.from({ length: 20 }, (_, i) => 
-      createMessage(i % 2 === 0 ? 'user' : 'assistant', `Message ${i + 1}: ${largeContent}`)
+
+    const messages = Array.from({ length: 20 }, (_, i) =>
+      createMessage(
+        i % 2 === 0 ? 'user' : 'assistant',
+        `Message ${i + 1}: ${largeContent}`,
+      ),
     )
 
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
+
     // Should have fewer messages due to pruning
     expect(resultMessages.length).toBeLessThan(messages.length)
-    
+
     // Should contain replacement messages
-    const hasReplacementMessage = resultMessages.some((m: any) => 
-      typeof m.content === 'string' && m.content.includes('Previous message(s) omitted due to length')
+    const hasReplacementMessage = resultMessages.some(
+      (m: any) =>
+        typeof m.content === 'string' &&
+        m.content.includes('Previous message(s) omitted due to length'),
     )
     expect(hasReplacementMessage).toBe(true)
   })
 
   test('preserves messages with keepDuringTruncation flag', () => {
     const largeContent = 'w'.repeat(50000)
-    
+
     const messages = [
       createMessage('user', `Message 1: ${largeContent}`),
-      { ...createMessage('assistant', `Important message: ${largeContent}`), keepDuringTruncation: true },
+      {
+        ...createMessage('assistant', `Important message: ${largeContent}`),
+        keepDuringTruncation: true,
+      },
       createMessage('user', `Message 3: ${largeContent}`),
     ] as any[]
 
@@ -193,10 +270,12 @@ describe('context-pruner handleSteps', () => {
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
+
     // Important message should be preserved
-    const importantMessage = resultMessages.find((m: any) => 
-      typeof m.content === 'string' && m.content.includes('Important message')
+    const importantMessage = resultMessages.find(
+      (m: any) =>
+        typeof m.content === 'string' &&
+        m.content.includes('Important message'),
     )
     expect(importantMessage).toBeDefined()
   })
@@ -230,11 +309,11 @@ describe('context-pruner handleSteps', () => {
     // Test the internal token counting logic indirectly
     const shortMessage = createMessage('user', 'Hi')
     const longMessage = createMessage('user', 'x'.repeat(300)) // ~100 tokens
-    
+
     // Short message should not trigger pruning
     let results = runHandleSteps([shortMessage])
     expect(results[0].input.messages).toHaveLength(1)
-    
+
     // Very long message should potentially trigger some processing
     results = runHandleSteps([longMessage])
     expect(results).toHaveLength(1)
@@ -250,11 +329,32 @@ describe('context-pruner edge cases', () => {
     }
   })
 
-  const createMessage = (role: 'user' | 'assistant', content: string): Message => ({
+  const createMessage = (
+    role: 'user' | 'assistant',
+    content: string,
+  ): Message => ({
     role,
     content,
   })
 
+  const createTerminalToolMessage = (command: string, output: string): any => ({
+    role: 'tool',
+    content: {
+      type: 'tool-result',
+      toolCallId: 'test-id',
+      toolName: 'run_terminal_command',
+      output: [
+        {
+          type: 'json',
+          value: {
+            command,
+            stdout: output,
+          },
+        },
+      ],
+    },
+  })
+
   const runHandleSteps = (messages: Message[]) => {
     mockAgentState.messageHistory = messages
     const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
@@ -269,58 +369,36 @@ describe('context-pruner edge cases', () => {
     return results
   }
 
-  test('handles malformed terminal command tool results', () => {
+  test('handles terminal command tool results gracefully', () => {
     const largeContent = 'x'.repeat(100000)
     const messages = [
       createMessage('user', largeContent),
-      createMessage('assistant', '<tool>run_terminal_command</tool>'), // Missing tool_result wrapper
-      createMessage('assistant', '<tool_result><tool>run_terminal_command</tool>'), // Missing result
-      createMessage('assistant', '<tool_result><tool>run_terminal_command</tool><result>[Output omitted]</result></tool_result>'),
+      createTerminalToolMessage('npm test', '[Output omitted]'),
+      createTerminalToolMessage('ls -la', 'file1.txt\nfile2.txt'),
     ]
 
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
-    // Should handle malformed entries gracefully
+
+    // Should handle terminal commands gracefully
     expect(resultMessages.length).toBeGreaterThan(0)
-    
+
     // Valid terminal command should be processed correctly
-    const validCommand = resultMessages.find((m: any) => 
-      typeof m.content === 'string' && m.content.includes('<tool_result><tool>run_terminal_command</tool><result>[Output omitted]</result></tool_result>')
+    const validCommand = resultMessages.find(
+      (m: any) =>
+        m.role === 'tool' && m.content?.toolName === 'run_terminal_command',
     )
     expect(validCommand).toBeDefined()
   })
 
-  test('handles nested tool results in terminal commands', () => {
-    const largeContent = 'x'.repeat(100000)
-    const nestedToolResult = `
-      <tool_result><tool>run_terminal_command</tool><result>[Output omitted]</result></tool_result>
-        </result>
-      </tool_result>
-    `
-    
-    const messages = [
-      createMessage('user', largeContent),
-      createMessage('assistant', nestedToolResult),
-    ]
-
-    const results = runHandleSteps(messages)
-
-    expect(results).toHaveLength(1)
-    // Should handle nested XML gracefully without breaking
-    expect(results[0].input.messages).toBeDefined()
-  })
-
   test('handles exact token limit boundary', () => {
     // Create content that when stringified is close to the 200k token limit
     // 200k tokens ≈ 600k characters (rough approximation used in code)
     const boundaryContent = 'x'.repeat(599000)
-    
-    const messages = [
-      createMessage('user', boundaryContent),
-    ]
+
+    const messages = [createMessage('user', boundaryContent)]
 
     const results = runHandleSteps(messages)
 
@@ -331,7 +409,7 @@ describe('context-pruner edge cases', () => {
 
   test('preserves message order after pruning', () => {
     const largeContent = 'x'.repeat(50000)
-    
+
     const messages = [
       createMessage('user', `First: ${largeContent}`),
       createMessage('assistant', `Second: ${largeContent}`),
@@ -344,14 +422,22 @@ describe('context-pruner edge cases', () => {
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
+
     // Check that remaining messages maintain chronological order
     let previousIndex = -1
     resultMessages.forEach((message: any) => {
       if (typeof message.content === 'string') {
-        const match = message.content.match(/(First|Second|Third|Fourth|Fifth):/)
+        const match = message.content.match(
+          /(First|Second|Third|Fourth|Fifth):/,
+        )
         if (match) {
-          const currentIndex = ['First', 'Second', 'Third', 'Fourth', 'Fifth'].indexOf(match[1])
+          const currentIndex = [
+            'First',
+            'Second',
+            'Third',
+            'Fourth',
+            'Fifth',
+          ].indexOf(match[1])
           expect(currentIndex).toBeGreaterThan(previousIndex)
           previousIndex = currentIndex
         }
@@ -375,31 +461,49 @@ describe('context-pruner edge cases', () => {
   test('handles tool results with various sizes around 1000 char threshold', () => {
     // Create content large enough to exceed 200k token limit to trigger pruning
     const largeContent = 'x'.repeat(150000)
-    
+
+    const createToolMessage = (toolName: string, size: number): any => ({
+      role: 'tool',
+      content: {
+        type: 'tool-result',
+        toolCallId: 'test-id',
+        toolName,
+        output: [
+          {
+            type: 'json',
+            value: {
+              data: 'a'.repeat(size),
+            },
+          },
+        ],
+      },
+    })
+
     const messages = [
       createMessage('user', largeContent),
       createMessage('assistant', largeContent),
       createMessage('user', largeContent),
       createMessage('assistant', largeContent),
-      createMessage('assistant', `<tool_result><tool>test</tool><result>${'a'.repeat(999)}</result></tool_result>`), // Just under 1000
-      createMessage('assistant', `<tool_result><tool>test</tool><result>${'b'.repeat(1000)}</result></tool_result>`), // Exactly 1000  
-      createMessage('assistant', `<tool_result><tool>test</tool><result>${'c'.repeat(1001)}</result></tool_result>`), // Just over 1000
+      createToolMessage('test1', 500), // Small
+      createToolMessage('test2', 999), // Just under 1000 when stringified
+      createToolMessage('test3', 2000), // Large
     ]
 
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
+
     // Check that some tool result processing occurred
-    const hasToolResults = resultMessages.some((m: any) => 
-      typeof m.content === 'string' && m.content.includes('<tool_result>')
-    )
+    const hasToolResults = resultMessages.some((m: any) => m.role === 'tool')
     expect(hasToolResults).toBe(true)
-    
-    // Check that large tool result replacement occurred (may replace all tool results over 1000 chars)
-    const hasLargeToolResultReplacement = resultMessages.some((m: any) => 
-      typeof m.content === 'string' && m.content.includes('Large tool result omitted')
+
+    // Check that large tool result replacement occurred
+    const hasLargeToolResultReplacement = resultMessages.some(
+      (m: any) =>
+        m.role === 'tool' &&
+        m.content?.output?.[0]?.value?.message ===
+          '[Large tool result omitted]',
     )
     expect(hasLargeToolResultReplacement).toBe(true)
   })
@@ -407,7 +511,8 @@ describe('context-pruner edge cases', () => {
   test('handles spawn_agent_inline detection with variations', () => {
     const testCases = [
       {
-        content: 'Regular message with spawn_agent_inline but not for other-agent',
+        content:
+          'Regular message with spawn_agent_inline but not for other-agent',
         shouldRemove: false,
       },
       {
@@ -437,7 +542,9 @@ describe('context-pruner edge cases', () => {
       if (shouldRemove) {
         // Should remove the assistant message and following 2 user messages
         expect(results[0].input.messages).toHaveLength(1)
-        expect(results[0].input.messages[0]).toEqual(createMessage('user', 'Hello'))
+        expect(results[0].input.messages[0]).toEqual(
+          createMessage('user', 'Hello'),
+        )
       } else {
         // Should preserve all messages
         expect(results[0].input.messages).toHaveLength(4)
@@ -448,29 +555,32 @@ describe('context-pruner edge cases', () => {
   test('handles multiple consecutive replacement messages in pruning', () => {
     // Create scenario where multiple consecutive messages would be replaced
     const largeContent = 'x'.repeat(60000)
-    
-    const messages = Array.from({ length: 10 }, (_, i) => 
-      createMessage('user', `Message ${i}: ${largeContent}`)
+
+    const messages = Array.from({ length: 10 }, (_, i) =>
+      createMessage('user', `Message ${i}: ${largeContent}`),
     )
 
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
     const resultMessages = results[0].input.messages
-    
+
     // Should not have consecutive replacement messages
     let consecutiveReplacements = 0
     let maxConsecutive = 0
-    
+
     resultMessages.forEach((message: any) => {
-      if (typeof message.content === 'string' && message.content.includes('Previous message(s) omitted')) {
+      if (
+        typeof message.content === 'string' &&
+        message.content.includes('Previous message(s) omitted')
+      ) {
         consecutiveReplacements++
       } else {
         maxConsecutive = Math.max(maxConsecutive, consecutiveReplacements)
         consecutiveReplacements = 0
       }
     })
-    
+
     maxConsecutive = Math.max(maxConsecutive, consecutiveReplacements)
     expect(maxConsecutive).toBeLessThanOrEqual(1) // No more than 1 consecutive replacement
   })
diff --git a/.agents/base2/base2-factory.ts b/.agents/base2/base2-factory.ts
index 8c0f11e16..3788e52fd 100644
--- a/.agents/base2/base2-factory.ts
+++ b/.agents/base2/base2-factory.ts
@@ -1,9 +1,9 @@
 import { publisher } from '../constants'
-
 import {
   PLACEHOLDER,
   type SecretAgentDefinition,
 } from '../types/secret-agent-definition'
+
 import type { ModelName } from 'types/agent-definition'
 
 export const base2 = (model: ModelName): Omit<SecretAgentDefinition, 'id'> => ({
@@ -76,6 +76,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
           agent_type: 'context-pruner',
           params: params ?? {},
         },
+        includeToolCall: false,
       } as any
 
       const { stepsComplete } = yield 'STEP'
@@ -87,6 +88,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
             role: 'user',
             content: `You have reached the step limit. Please summarize your progress in plain text (no need to use set_output) so far and what you still need to solve. Immediately after summarizing, please end your turn. Do not use any tools except for the end_turn tool.`,
           },
+          includeToolCall: false,
         }
         yield 'STEP'
         break
diff --git a/.agents/base2/editor.ts b/.agents/base2/editor.ts
index 57ee04524..a95229766 100644
--- a/.agents/base2/editor.ts
+++ b/.agents/base2/editor.ts
@@ -1,5 +1,4 @@
 import { publisher } from '../constants'
-
 import {
   PLACEHOLDER,
   type SecretAgentDefinition,
@@ -150,6 +149,7 @@ ${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
             content:
               'You have reached the step limit. Please use the set_output tool now to summarize your progress so far, what you still need to solve, and provide any insights that could help complete the remaining work. Please end your turn after using the set_output tool with the end_turn tool.',
           },
+          includeToolCall: false,
         }
 
         // One final step to produce the summary
diff --git a/.agents/changes-reviewer.ts b/.agents/changes-reviewer.ts
index 9182abd02..bff957fec 100644
--- a/.agents/changes-reviewer.ts
+++ b/.agents/changes-reviewer.ts
@@ -110,6 +110,7 @@ Use the following guidelines to review the changes and suggest improvements:
         content:
           'Now I will spawn a file explorer to find any missing codebase context, and then review the changes.',
       },
+      includeToolCall: false,
     }
 
     yield 'STEP_ALL'
diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts
index f1d95e5d6..78c4e327a 100644
--- a/.agents/context-pruner.ts
+++ b/.agents/context-pruner.ts
@@ -1,10 +1,8 @@
 import { publisher } from './constants'
 
-import type {
-  AgentDefinition,
-  Message,
-  ToolCall,
-} from './types/agent-definition'
+import type { AgentDefinition, ToolCall } from './types/agent-definition'
+import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
+import type { Message, ToolMessage } from '@codebuff/sdk'
 
 const definition: AgentDefinition = {
   id: 'context-pruner',
@@ -43,31 +41,13 @@ const definition: AgentDefinition = {
 
     let currentMessages = [...messages]
 
-    // Find and remove context-pruner spawn_agent_inline call and following messages
-    const lastAssistantMessageIndex = currentMessages.findLastIndex(
-      (message) => message.role === 'assistant',
-    )
-    const lastAssistantMessage = currentMessages[lastAssistantMessageIndex]
-    const lastAssistantMessageIsToolCall =
-      typeof lastAssistantMessage?.content === 'string' &&
-      lastAssistantMessage.content.includes('spawn_agent_inline') &&
-      lastAssistantMessage.content.includes('context-pruner')
-
-    if (lastAssistantMessageIsToolCall && lastAssistantMessageIndex >= 0) {
-      // Remove tool call and any following messages.
-      const messagesToRemove =
-        currentMessages.length - lastAssistantMessageIndex
-      currentMessages.splice(lastAssistantMessageIndex, messagesToRemove)
-    }
-
-    // Initial check - if already under limit, return (with inline agent tool call removed)
+    // Initial check - if already under limit, return
     const initialTokens = countTokensJson(currentMessages)
     if (initialTokens < maxMessageTokens) {
       yield {
         toolName: 'set_messages',
-        input: {
-          messages: currentMessages,
-        },
+        input: { messages: currentMessages },
+        includeToolCall: false,
       }
       return
     }
@@ -78,25 +58,41 @@ const definition: AgentDefinition = {
 
     for (let i = currentMessages.length - 1; i >= 0; i--) {
       const message = currentMessages[i]
-      let processedContent =
-        typeof message.content === 'string'
-          ? message.content
-          : JSON.stringify(message.content)
 
-      if (processedContent.includes('<tool>run_terminal_command</tool>')) {
+      // Handle tool messages with new object format
+      if (
+        message.role === 'tool' &&
+        message.content.toolName === 'run_terminal_command'
+      ) {
+        const toolMessage =
+          message as CodebuffToolMessage<'run_terminal_command'>
+
         if (numKeptTerminalCommands < numTerminalCommandsToKeep) {
           numKeptTerminalCommands++
-          afterTerminalPass.unshift({ ...message, content: processedContent })
+          afterTerminalPass.unshift(message)
         } else {
-          // Simplify terminal command result
-          processedContent = processedContent.replace(
-            /<tool_result>\s*<tool>run_terminal_command<\/tool>\s*<result>[\s\S]*?<\/result>\s*<\/tool_result>/g,
-            '<tool_result><tool>run_terminal_command</tool><result>[Output omitted]</result></tool_result>',
-          )
-          afterTerminalPass.unshift({ ...message, content: processedContent })
+          // Simplify terminal command result by replacing output
+          const simplifiedMessage: CodebuffToolMessage<'run_terminal_command'> =
+            {
+              ...toolMessage,
+              content: {
+                ...toolMessage.content,
+                output: [
+                  {
+                    type: 'json',
+                    value: {
+                      command:
+                        toolMessage.content.output[0]?.value?.command || '',
+                      stdoutOmittedForLength: true,
+                    },
+                  },
+                ],
+              },
+            }
+          afterTerminalPass.unshift(simplifiedMessage)
         }
       } else {
-        afterTerminalPass.unshift({ ...message, content: processedContent })
+        afterTerminalPass.unshift(message)
       }
     }
 
@@ -108,28 +104,37 @@ const definition: AgentDefinition = {
         input: {
           messages: afterTerminalPass,
         },
+        includeToolCall: false,
       }
       return
     }
 
-    // PASS 2: Remove large tool results (any tool result > 1000 chars)
+    // PASS 2: Remove large tool results (any tool result output > 1000 chars when stringified)
     const afterToolResultsPass = afterTerminalPass.map((message) => {
-      let processedContent =
-        typeof message.content === 'string'
-          ? message.content
-          : JSON.stringify(message.content)
-
-      if (
-        processedContent.includes('<tool_result>') &&
-        processedContent.length > 1000
-      ) {
-        processedContent = processedContent.replace(
-          /<result>[\s\S]*?<\/result>/g,
-          '<result>[Large tool result omitted]</result>',
-        )
+      if (message.role === 'tool') {
+        const outputSize = JSON.stringify(message.content.output).length
+
+        if (outputSize > 1000) {
+          // Replace with simplified output
+          const simplifiedMessage: ToolMessage = {
+            ...message,
+            content: {
+              ...message.content,
+              output: [
+                {
+                  type: 'json',
+                  value: {
+                    message: '[LARGE_TOOL_RESULT_OMITTED]',
+                    originalSize: outputSize,
+                  },
+                },
+              ],
+            },
+          }
+          return simplifiedMessage
+        }
       }
-
-      return { ...message, content: processedContent }
+      return message
     })
 
     // Check if tool results pass was enough
@@ -140,7 +145,8 @@ const definition: AgentDefinition = {
         input: {
           messages: afterToolResultsPass,
         },
-      } satisfies ToolCall
+        includeToolCall: false,
+      } satisfies ToolCall<'set_messages'>
       return
     }
 
@@ -162,10 +168,7 @@ const definition: AgentDefinition = {
     const filteredMessages: any[] = []
 
     for (const message of afterToolResultsPass) {
-      if (
-        removedTokens >= tokensToRemove ||
-        (message as any).keepDuringTruncation
-      ) {
+      if (removedTokens >= tokensToRemove || message.keepDuringTruncation) {
         filteredMessages.push(message)
         continue
       }
@@ -190,7 +193,8 @@ const definition: AgentDefinition = {
       input: {
         messages: finalMessages,
       },
-    } satisfies ToolCall
+      includeToolCall: false,
+    } satisfies ToolCall<'set_messages'>
   },
 }
 
diff --git a/.agents/factory/base.ts b/.agents/factory/base.ts
index 4fd3a563e..0064bae24 100644
--- a/.agents/factory/base.ts
+++ b/.agents/factory/base.ts
@@ -68,6 +68,7 @@ export const base = (model: ModelName): Omit<SecretAgentDefinition, 'id'> => ({
           agent_type: 'context-pruner',
           params: params ?? {},
         },
+        includeToolCall: false,
       } as any
 
       const { stepsComplete } = yield 'STEP'
diff --git a/.agents/git-committer.ts b/.agents/git-committer.ts
index 4701ceb9f..10c33216f 100644
--- a/.agents/git-committer.ts
+++ b/.agents/git-committer.ts
@@ -60,6 +60,7 @@ const definition: AgentDefinition = {
         content:
           "I've analyzed the git diff and recent commit history. Now I'll read any relevant files to better understand the context of these changes.",
       },
+      includeToolCall: false,
     }
 
     // Step 3: Let AI generate a step to decide which files to read.
@@ -73,6 +74,7 @@ const definition: AgentDefinition = {
         content:
           "Now I'll analyze the changes and create a commit with a good commit message.",
       },
+      includeToolCall: false,
     }
 
     yield 'STEP_ALL'
diff --git a/.agents/package.json b/.agents/package.json
index 436971bde..b995f9b5d 100644
--- a/.agents/package.json
+++ b/.agents/package.json
@@ -6,5 +6,8 @@
   "scripts": {
     "typecheck": "bun x tsc --noEmit -p tsconfig.json",
     "test": "bun test"
+  },
+  "dependencies": {
+    "@codebuff/sdk": "workspace:*"
   }
 }
diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index cfdfb6633..ee5da544b 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -14,6 +14,8 @@
  *   export default definition
  */
 
+import type { Message } from '@codebuff/sdk'
+
 // ============================================================================
 // Agent Definition and Utility Types
 // ============================================================================
@@ -201,25 +203,6 @@ export interface AgentState {
   output: Record<string, any> | undefined
 }
 
-/**
- * Message in conversation history
- */
-export interface Message {
-  role: 'user' | 'assistant'
-  content:
-    | string
-    | Array<
-        | {
-            type: 'text'
-            text: string
-          }
-        | {
-            type: 'image'
-            image: string
-          }
-      >
-}
-
 /**
  * Context provided to handleSteps generator function
  */
@@ -236,6 +219,7 @@ export type ToolCall<T extends ToolName = ToolName> = {
   [K in T]: {
     toolName: K
     input: Tools.GetToolParams<K>
+    includeToolCall?: boolean
   }
 }[T]
 
diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts
index 18b20f563..09b52b2b5 100644
--- a/.agents/types/tools.ts
+++ b/.agents/types/tools.ts
@@ -1,3 +1,5 @@
+import type { Message } from '@codebuff/sdk'
+
 /**
  * Union type of all available tool names
  */
@@ -118,15 +120,7 @@ export interface RunTerminalCommandParams {
  * Set the conversation history to the provided messages.
  */
 export interface SetMessagesParams {
-  messages: {
-    role: 'user' | 'assistant'
-    content:
-      | string
-      | {
-          type: 'text'
-          text: string
-        }[]
-  }[]
+  messages: Message[]
 }
 
 /**
diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index a2270d98b..7b5aef07b 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -205,6 +205,7 @@ describe('runProgrammaticStep', () => {
         yield {
           toolName: 'add_message',
           input: { role: 'user', content: 'Hello world' },
+          includeToolCall: false,
         }
         yield { toolName: 'read_files', input: { paths: ['test.txt'] } }
         yield { toolName: 'end_turn', input: {} }
@@ -233,6 +234,7 @@ describe('runProgrammaticStep', () => {
         expect.objectContaining({
           toolName: 'add_message',
           input: { role: 'user', content: 'Hello world' },
+          includeToolCall: false,
         }),
       )
 
diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index 03054d120..09d48ec4b 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -456,6 +456,7 @@ export const loopAgentSteps = async (
     userId,
     clientSessionId,
     onResponseChunk,
+    clearUserPromptMessagesAfterResponse = true,
   }: {
     userInputId: string
     agentType: AgentTemplateType
@@ -466,6 +467,7 @@ export const loopAgentSteps = async (
     fileContext: ProjectFileContext
     toolResults: ToolResultPart[]
     localAgentTemplates: Record<string, AgentTemplate>
+    clearUserPromptMessagesAfterResponse?: boolean
 
     userId: string | undefined
     clientSessionId: string
@@ -478,7 +480,9 @@ export const loopAgentSteps = async (
   }
 
   // Initialize message history with user prompt and instructions on first iteration
-  const hasPrompt = Boolean(prompt || params)
+  const hasPrompt = Boolean(
+    prompt || (params && Object.keys(params).length > 0),
+  )
 
   // Get the instructions prompt if we have a prompt/params
   const instructionsPrompt = hasPrompt
@@ -510,7 +514,9 @@ export const loopAgentSteps = async (
         // Actual user prompt!
         role: 'user' as const,
         content: asUserMessage(
-          `${prompt ?? ''}${params ? `\n\n${JSON.stringify(params, null, 2)}` : ''}`,
+          buildArray([prompt, params && JSON.stringify(params, null, 2)]).join(
+            '\n\n',
+          ),
         ),
         keepDuringTruncation: true,
       },
@@ -532,6 +538,7 @@ export const loopAgentSteps = async (
       keepDuringTruncation: true,
     },
   )
+  console.log(JSON.stringify({ initialMessages }, null, 2), 'asdf')
 
   let currentAgentState = {
     ...agentState,
@@ -578,15 +585,28 @@ export const loopAgentSteps = async (
 
       // End turn if programmatic step ended turn, or if the previous runAgentStep ended turn
       if (shouldEndTurn) {
-        currentAgentState.messageHistory = expireMessages(
-          currentAgentState.messageHistory,
-          'userPrompt',
-        )
+        if (clearUserPromptMessagesAfterResponse) {
+          currentAgentState.messageHistory = expireMessages(
+            currentAgentState.messageHistory,
+            'userPrompt',
+          )
+        }
         return {
           agentState: currentAgentState,
         }
       }
 
+      console.log(
+        JSON.stringify(
+          {
+            beforeRunStep: currentAgentState.messageHistory,
+          },
+          null,
+          2,
+        ),
+        'asdf',
+      )
+
       const { agentState: newAgentState, shouldEndTurn: llmShouldEndTurn } =
         await runAgentStep(ws, {
           userId,
@@ -609,10 +629,12 @@ export const loopAgentSteps = async (
       currentParams = undefined
     }
 
-    currentAgentState.messageHistory = expireMessages(
-      currentAgentState.messageHistory,
-      'userPrompt',
-    )
+    if (clearUserPromptMessagesAfterResponse) {
+      currentAgentState.messageHistory = expireMessages(
+        currentAgentState.messageHistory,
+        'userPrompt',
+      )
+    }
     return { agentState: currentAgentState }
   } catch (error) {
     // Log the error but still return the state with partial costs
diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index 231d89fe1..9a49176f6 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -184,7 +184,9 @@ export async function runProgrammaticStep(
       const toolCall = {
         ...toolCallWithoutId,
         toolCallId: crypto.randomUUID(),
-      } as CodebuffToolCall
+      } as CodebuffToolCall & {
+        includeToolCall?: boolean
+      }
 
       if (!template.toolNames.includes(toolCall.toolName)) {
         throw new Error(
@@ -194,7 +196,7 @@ export async function runProgrammaticStep(
 
       // Add assistant message with the tool call before executing it
       // Exception: don't add tool call message for add_message since it adds its own message
-      if (toolCall.toolName !== 'add_message') {
+      if (toolCall?.includeToolCall !== false) {
         const toolCallString = getToolCallString(
           toolCall.toolName,
           toolCall.input,
@@ -212,6 +214,8 @@ export async function runProgrammaticStep(
         })
       }
 
+      console.log(JSON.stringify({ toolCall }, null, 2), 'asdf')
+
       // Execute the tool synchronously and get the result immediately
       await executeToolCall({
         toolName: toolCall.toolName,
@@ -236,6 +240,11 @@ export async function runProgrammaticStep(
       // Sync state.messages back to agentState.messageHistory
       state.agentState.messageHistory = state.messages
 
+      console.log(
+        JSON.stringify({ afterExecudeToolCall: state.messages }, null, 2),
+        'asdf',
+      )
+
       // Get the latest tool result
       toolResult = toolResults[toolResults.length - 1]?.output
 
diff --git a/backend/src/tools/handlers/tool/spawn-agent-inline.ts b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
index 0e6ec0073..35f1758cc 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-inline.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-inline.ts
@@ -114,6 +114,7 @@ export const handleSpawnAgentInline = ((params: {
         // Inherits parent's onResponseChunk
         // writeToClient(chunk)
       },
+      clearUserPromptMessagesAfterResponse: false,
     })
 
     // Update parent's message history with child's final state
diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
index e95c76586..793e84207 100644
--- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts
+++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -303,6 +303,7 @@ export async function executeAgent({
   clientSessionId,
   onResponseChunk,
   isOnlyChild = false,
+  clearUserPromptMessagesAfterResponse = true,
 }: {
   ws: WebSocket
   userInputId: string
@@ -317,6 +318,7 @@ export async function executeAgent({
   clientSessionId: string
   onResponseChunk: (chunk: string | PrintModeEvent) => void
   isOnlyChild?: boolean
+  clearUserPromptMessagesAfterResponse?: boolean
 }) {
   const width = 60
   const fullAgentName = `${agentTemplate.displayName} (${agentTemplate.id})`
@@ -347,6 +349,7 @@ export async function executeAgent({
     userId,
     clientSessionId,
     onResponseChunk,
+    clearUserPromptMessagesAfterResponse,
   })
 
   // Send agent end notification if this is the only child
diff --git a/bun.lock b/bun.lock
index c27260eeb..7a223a277 100644
--- a/bun.lock
+++ b/bun.lock
@@ -34,6 +34,9 @@
     ".agents": {
       "name": "@codebuff/agents",
       "version": "0.0.0",
+      "dependencies": {
+        "@codebuff/sdk": "workspace:*",
+      },
     },
     "backend": {
       "name": "@codebuff/backend",
@@ -229,7 +232,7 @@
     },
     "sdk": {
       "name": "@codebuff/sdk",
-      "version": "0.1.17",
+      "version": "0.1.18",
       "dependencies": {
         "@vscode/tree-sitter-wasm": "0.1.4",
         "ai": "^5.0.0",
diff --git a/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts b/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts
index 4e3e6e131..b11e63a48 100644
--- a/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts
+++ b/common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer.ts
@@ -54,6 +54,7 @@ const definition: AgentDefinition = {
         content:
           "I've analyzed the git diff and recent commit history. Now I'll read any relevant files to better understand the context of these changes.",
       },
+      includeToolCall: false,
     } satisfies ToolCall
 
     // Step 3: Let AI generate a step to decide which files to read.
@@ -67,6 +68,7 @@ const definition: AgentDefinition = {
         content:
           "Now I'll analyze the changes and create a commit with a good commit message.",
       },
+      includeToolCall: false,
     } satisfies ToolCall
 
     yield 'STEP_ALL'
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index cfdfb6633..ee5da544b 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -14,6 +14,8 @@
  *   export default definition
  */
 
+import type { Message } from '@codebuff/sdk'
+
 // ============================================================================
 // Agent Definition and Utility Types
 // ============================================================================
@@ -201,25 +203,6 @@ export interface AgentState {
   output: Record<string, any> | undefined
 }
 
-/**
- * Message in conversation history
- */
-export interface Message {
-  role: 'user' | 'assistant'
-  content:
-    | string
-    | Array<
-        | {
-            type: 'text'
-            text: string
-          }
-        | {
-            type: 'image'
-            image: string
-          }
-      >
-}
-
 /**
  * Context provided to handleSteps generator function
  */
@@ -236,6 +219,7 @@ export type ToolCall<T extends ToolName = ToolName> = {
   [K in T]: {
     toolName: K
     input: Tools.GetToolParams<K>
+    includeToolCall?: boolean
   }
 }[T]
 
diff --git a/common/src/util/messages.ts b/common/src/util/messages.ts
index debb58338..9f70a52b3 100644
--- a/common/src/util/messages.ts
+++ b/common/src/util/messages.ts
@@ -235,6 +235,7 @@ export function convertCbToModelMessages({
   }
 
   if (!includeCacheControl) {
+    console.log(JSON.stringify({ final: aggregated }, null, 2), 'asdf')
     return aggregated
   }
 
@@ -267,5 +268,6 @@ export function convertCbToModelMessages({
     contentBlock[contentBlock.length - 1],
   )
 
+  console.log(JSON.stringify({ final: aggregated }, null, 2))
   return aggregated
 }
diff --git a/sdk/src/index.ts b/sdk/src/index.ts
index ebe8ddb37..f78520de4 100644
--- a/sdk/src/index.ts
+++ b/sdk/src/index.ts
@@ -2,5 +2,9 @@ export * from './client'
 export * from './custom-tool'
 export * from './run-state'
 export * from './websocket-client'
+export * from '../../common/src/types/json'
+export * from '../../common/src/types/messages/codebuff-message'
+export * from '../../common/src/types/messages/content-part'
+export * from '../../common/src/types/messages/provider-metadata'
 
 export type { AgentDefinition } from '../../common/src/templates/initial-agents-dir/types/agent-definition'

From b792d319a06541b3f9c581267854588305b2acac Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Thu, 28 Aug 2025 16:29:07 -0700
Subject: [PATCH 13/18] remove logs

---
 .agents/base2/planner-factory.ts     | 25 +++++++++++++++++++------
 backend/src/run-agent-step.ts        | 12 ------------
 backend/src/run-programmatic-step.ts |  7 -------
 common/src/util/messages.ts          |  1 -
 4 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/.agents/base2/planner-factory.ts b/.agents/base2/planner-factory.ts
index 1561c3cbb..363179341 100644
--- a/.agents/base2/planner-factory.ts
+++ b/.agents/base2/planner-factory.ts
@@ -1,10 +1,11 @@
-import { ModelName, ToolCall } from 'types/agent-definition'
 import { publisher } from '../constants'
 import {
   PLACEHOLDER,
   type SecretAgentDefinition,
 } from '../types/secret-agent-definition'
 
+import type { ModelName, ToolCall } from 'types/agent-definition'
+
 export const plannerFactory = (
   model: ModelName,
 ): Omit<SecretAgentDefinition, 'id'> => ({
@@ -51,11 +52,23 @@ ${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
       agentState.messageHistory
         .slice(2)
         .map((message) =>
-          typeof message.content === 'string'
-            ? message.content
-            : message.content
-                .map((content) => (content.type === 'text' ? content.text : ''))
-                .join('\n'),
+          message.role === 'tool'
+            ? JSON.stringify(
+                {
+                  toolName: message.content.toolName,
+                  toolCallId: message.content.toolCallId,
+                  output: message.content.output,
+                },
+                null,
+                2,
+              )
+            : typeof message.content === 'string'
+              ? message.content
+              : message.content
+                  .map((content) =>
+                    content.type === 'text' ? content.text : '',
+                  )
+                  .join('\n'),
         )
         .join('\n')
 
diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index 09d48ec4b..a09d9ba46 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -538,7 +538,6 @@ export const loopAgentSteps = async (
       keepDuringTruncation: true,
     },
   )
-  console.log(JSON.stringify({ initialMessages }, null, 2), 'asdf')
 
   let currentAgentState = {
     ...agentState,
@@ -596,17 +595,6 @@ export const loopAgentSteps = async (
         }
       }
 
-      console.log(
-        JSON.stringify(
-          {
-            beforeRunStep: currentAgentState.messageHistory,
-          },
-          null,
-          2,
-        ),
-        'asdf',
-      )
-
       const { agentState: newAgentState, shouldEndTurn: llmShouldEndTurn } =
         await runAgentStep(ws, {
           userId,
diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts
index 9a49176f6..57a8c46f4 100644
--- a/backend/src/run-programmatic-step.ts
+++ b/backend/src/run-programmatic-step.ts
@@ -214,8 +214,6 @@ export async function runProgrammaticStep(
         })
       }
 
-      console.log(JSON.stringify({ toolCall }, null, 2), 'asdf')
-
       // Execute the tool synchronously and get the result immediately
       await executeToolCall({
         toolName: toolCall.toolName,
@@ -240,11 +238,6 @@ export async function runProgrammaticStep(
       // Sync state.messages back to agentState.messageHistory
       state.agentState.messageHistory = state.messages
 
-      console.log(
-        JSON.stringify({ afterExecudeToolCall: state.messages }, null, 2),
-        'asdf',
-      )
-
       // Get the latest tool result
       toolResult = toolResults[toolResults.length - 1]?.output
 
diff --git a/common/src/util/messages.ts b/common/src/util/messages.ts
index 9f70a52b3..1d4f532e1 100644
--- a/common/src/util/messages.ts
+++ b/common/src/util/messages.ts
@@ -235,7 +235,6 @@ export function convertCbToModelMessages({
   }
 
   if (!includeCacheControl) {
-    console.log(JSON.stringify({ final: aggregated }, null, 2), 'asdf')
     return aggregated
   }
 

From 9d593b9b8bae10b81f42a682c4848fe5340babe9 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Fri, 29 Aug 2025 10:39:04 -0700
Subject: [PATCH 14/18] copy codebuff message types into initial-agents-dir

---
 .../types/agent-definition.ts                 |  3 +-
 .../types/codebuff-message.ts                 | 77 +++++++++++++++++++
 .../initial-agents-dir/types/content-part.ts  | 68 ++++++++++++++++
 .../initial-agents-dir/types/data-content.ts  | 14 ++++
 .../types/provider-metadata.ts                | 10 +++
 5 files changed, 170 insertions(+), 2 deletions(-)
 create mode 100644 common/src/templates/initial-agents-dir/types/codebuff-message.ts
 create mode 100644 common/src/templates/initial-agents-dir/types/content-part.ts
 create mode 100644 common/src/templates/initial-agents-dir/types/data-content.ts
 create mode 100644 common/src/templates/initial-agents-dir/types/provider-metadata.ts

diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index ee5da544b..ccba67399 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -14,8 +14,6 @@
  *   export default definition
  */
 
-import type { Message } from '@codebuff/sdk'
-
 // ============================================================================
 // Agent Definition and Utility Types
 // ============================================================================
@@ -346,6 +344,7 @@ export type ModelName =
   | 'z-ai/glm-4.5:nitro'
   | (string & {})
 
+import type { Message } from './codebuff-message'
 import type * as Tools from './tools'
 export type { Tools }
 type ToolName = Tools.ToolName
diff --git a/common/src/templates/initial-agents-dir/types/codebuff-message.ts b/common/src/templates/initial-agents-dir/types/codebuff-message.ts
new file mode 100644
index 000000000..97b9fdc1a
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/codebuff-message.ts
@@ -0,0 +1,77 @@
+import z from 'zod/v4'
+
+import {
+  filePartSchema,
+  imagePartSchema,
+  reasoningPartSchema,
+  textPartSchema,
+  toolCallPartSchema,
+  toolResultPartSchema,
+} from './content-part'
+import { providerMetadataSchema } from './provider-metadata'
+
+const auxiliaryDataSchema = z.object({
+  providerOptions: providerMetadataSchema.optional(),
+  timeToLive: z
+    .union([z.literal('agentStep'), z.literal('userPrompt')])
+    .optional(),
+  keepDuringTruncation: z.boolean().optional(),
+})
+
+export const systemMessageSchema = z
+  .object({
+    role: z.literal('system'),
+    content: z.string(),
+  })
+  .and(auxiliaryDataSchema)
+export type SystemMessage = z.infer<typeof systemMessageSchema>
+
+export const userMessageSchema = z
+  .object({
+    role: z.literal('user'),
+    content: z.union([
+      z.string(),
+      z.union([textPartSchema, imagePartSchema, filePartSchema]).array(),
+    ]),
+  })
+  .and(auxiliaryDataSchema)
+export type UserMessage = z.infer<typeof userMessageSchema>
+
+export const assistantMessageSchema = z
+  .object({
+    role: z.literal('assistant'),
+    content: z.union([
+      z.string(),
+      z
+        .union([textPartSchema, reasoningPartSchema, toolCallPartSchema])
+        .array(),
+    ]),
+  })
+  .and(auxiliaryDataSchema)
+export type AssistantMessage = z.infer<typeof assistantMessageSchema>
+
+export const toolMessageSchema = z
+  .object({
+    role: z.literal('tool'),
+    content: toolResultPartSchema,
+  })
+  .and(auxiliaryDataSchema)
+export type ToolMessage = z.infer<typeof toolMessageSchema>
+
+export const messageSchema = z
+  .union([
+    systemMessageSchema,
+    userMessageSchema,
+    assistantMessageSchema,
+    toolMessageSchema,
+  ])
+  .and(
+    z.object({
+      providerOptions: providerMetadataSchema.optional(),
+      timeToLive: z
+        .union([z.literal('agentStep'), z.literal('userPrompt')])
+        .optional(),
+      keepDuringTruncation: z.boolean().optional(),
+    }),
+  )
+export type Message = z.infer<typeof messageSchema>
diff --git a/common/src/templates/initial-agents-dir/types/content-part.ts b/common/src/templates/initial-agents-dir/types/content-part.ts
new file mode 100644
index 000000000..ff01b1f0c
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/content-part.ts
@@ -0,0 +1,68 @@
+import z from 'zod/v4'
+
+import { providerMetadataSchema } from './provider-metadata'
+import { jsonValueSchema } from '../json'
+import { dataContentSchema } from './data-content'
+
+export const textPartSchema = z.object({
+  type: z.literal('text'),
+  text: z.string(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type TextPart = z.infer<typeof textPartSchema>
+
+export const imagePartSchema = z.object({
+  type: z.literal('image'),
+  image: z.union([dataContentSchema, z.instanceof(URL)]),
+  mediaType: z.string().optional(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type ImagePart = z.infer<typeof imagePartSchema>
+
+export const filePartSchema = z.object({
+  type: z.literal('file'),
+  data: z.union([dataContentSchema, z.instanceof(URL)]),
+  filename: z.string().optional(),
+  mediaType: z.string(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type FilePart = z.infer<typeof filePartSchema>
+
+export const reasoningPartSchema = z.object({
+  type: z.literal('reasoning'),
+  text: z.string(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type ReasoningPart = z.infer<typeof reasoningPartSchema>
+
+export const toolCallPartSchema = z.object({
+  type: z.literal('tool-call'),
+  toolCallId: z.string(),
+  toolName: z.string(),
+  input: z.record(z.string(), z.unknown()),
+  providerOptions: providerMetadataSchema.optional(),
+  providerExecuted: z.boolean().optional(),
+})
+export type ToolCallPart = z.infer<typeof toolCallPartSchema>
+
+export const toolResultOutputSchema = z.discriminatedUnion('type', [
+  z.object({
+    type: z.literal('json'),
+    value: jsonValueSchema,
+  }),
+  z.object({
+    type: z.literal('media'),
+    data: z.string(),
+    mediaType: z.string(),
+  }),
+])
+export type ToolResultOutput = z.infer<typeof toolResultOutputSchema>
+
+export const toolResultPartSchema = z.object({
+  type: z.literal('tool-result'),
+  toolCallId: z.string(),
+  toolName: z.string(),
+  output: toolResultOutputSchema.array(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type ToolResultPart = z.infer<typeof toolResultPartSchema>
diff --git a/common/src/templates/initial-agents-dir/types/data-content.ts b/common/src/templates/initial-agents-dir/types/data-content.ts
new file mode 100644
index 000000000..eb5c2e1ff
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/data-content.ts
@@ -0,0 +1,14 @@
+import z from 'zod/v4'
+
+export const dataContentSchema = z.union([
+  z.string(),
+  z.instanceof(Uint8Array),
+  z.instanceof(ArrayBuffer),
+  z.custom<Buffer>(
+    // Buffer might not be available in some environments such as CloudFlare:
+    (value: unknown): value is Buffer =>
+      globalThis.Buffer?.isBuffer(value) ?? false,
+    { message: 'Must be a Buffer' },
+  ),
+])
+export type DataContent = z.infer<typeof dataContentSchema>
diff --git a/common/src/templates/initial-agents-dir/types/provider-metadata.ts b/common/src/templates/initial-agents-dir/types/provider-metadata.ts
new file mode 100644
index 000000000..29d4b20f2
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/provider-metadata.ts
@@ -0,0 +1,10 @@
+import z from 'zod/v4'
+
+import { jsonValueSchema } from '../json'
+
+export const providerMetadataSchema = z.record(
+  z.string(),
+  z.record(z.string(), jsonValueSchema),
+)
+
+export type ProviderMetadata = z.infer<typeof providerMetadataSchema>

From 9034ac2425ac05ff933bb148a0bfdb0a8686f663 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Fri, 29 Aug 2025 10:54:47 -0700
Subject: [PATCH 15/18] update .agents copy flow

---
 .agents/types/agent-definition.ts             |  6 +-
 .agents/types/codebuff-message.ts             | 77 +++++++++++++++++++
 .agents/types/content-part.ts                 | 68 ++++++++++++++++
 .agents/types/data-content.ts                 | 14 ++++
 .agents/types/json.ts                         | 29 +++++++
 .agents/types/provider-metadata.ts            | 10 +++
 .agents/types/tools.ts                        |  2 +-
 .../types/agent-definition.ts                 |  7 +-
 .../initial-agents-dir/types/content-part.ts  |  4 +-
 .../initial-agents-dir/types/json.ts          | 29 +++++++
 .../types/provider-metadata.ts                |  2 +-
 .../initial-agents-dir/types/tools.ts         | 12 +--
 npm-app/src/cli-handlers/agents.ts            | 48 ++++++++++--
 13 files changed, 284 insertions(+), 24 deletions(-)
 create mode 100644 .agents/types/codebuff-message.ts
 create mode 100644 .agents/types/content-part.ts
 create mode 100644 .agents/types/data-content.ts
 create mode 100644 .agents/types/json.ts
 create mode 100644 .agents/types/provider-metadata.ts
 create mode 100644 common/src/templates/initial-agents-dir/types/json.ts

diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index ee5da544b..fa71fef3f 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -14,7 +14,9 @@
  *   export default definition
  */
 
-import type { Message } from '@codebuff/sdk'
+import type { Message } from './codebuff-message'
+import type * as Tools from './tools'
+type ToolName = Tools.ToolName
 
 // ============================================================================
 // Agent Definition and Utility Types
@@ -346,6 +348,4 @@ export type ModelName =
   | 'z-ai/glm-4.5:nitro'
   | (string & {})
 
-import type * as Tools from './tools'
 export type { Tools }
-type ToolName = Tools.ToolName
diff --git a/.agents/types/codebuff-message.ts b/.agents/types/codebuff-message.ts
new file mode 100644
index 000000000..97b9fdc1a
--- /dev/null
+++ b/.agents/types/codebuff-message.ts
@@ -0,0 +1,77 @@
+import z from 'zod/v4'
+
+import {
+  filePartSchema,
+  imagePartSchema,
+  reasoningPartSchema,
+  textPartSchema,
+  toolCallPartSchema,
+  toolResultPartSchema,
+} from './content-part'
+import { providerMetadataSchema } from './provider-metadata'
+
+const auxiliaryDataSchema = z.object({
+  providerOptions: providerMetadataSchema.optional(),
+  timeToLive: z
+    .union([z.literal('agentStep'), z.literal('userPrompt')])
+    .optional(),
+  keepDuringTruncation: z.boolean().optional(),
+})
+
+export const systemMessageSchema = z
+  .object({
+    role: z.literal('system'),
+    content: z.string(),
+  })
+  .and(auxiliaryDataSchema)
+export type SystemMessage = z.infer<typeof systemMessageSchema>
+
+export const userMessageSchema = z
+  .object({
+    role: z.literal('user'),
+    content: z.union([
+      z.string(),
+      z.union([textPartSchema, imagePartSchema, filePartSchema]).array(),
+    ]),
+  })
+  .and(auxiliaryDataSchema)
+export type UserMessage = z.infer<typeof userMessageSchema>
+
+export const assistantMessageSchema = z
+  .object({
+    role: z.literal('assistant'),
+    content: z.union([
+      z.string(),
+      z
+        .union([textPartSchema, reasoningPartSchema, toolCallPartSchema])
+        .array(),
+    ]),
+  })
+  .and(auxiliaryDataSchema)
+export type AssistantMessage = z.infer<typeof assistantMessageSchema>
+
+export const toolMessageSchema = z
+  .object({
+    role: z.literal('tool'),
+    content: toolResultPartSchema,
+  })
+  .and(auxiliaryDataSchema)
+export type ToolMessage = z.infer<typeof toolMessageSchema>
+
+export const messageSchema = z
+  .union([
+    systemMessageSchema,
+    userMessageSchema,
+    assistantMessageSchema,
+    toolMessageSchema,
+  ])
+  .and(
+    z.object({
+      providerOptions: providerMetadataSchema.optional(),
+      timeToLive: z
+        .union([z.literal('agentStep'), z.literal('userPrompt')])
+        .optional(),
+      keepDuringTruncation: z.boolean().optional(),
+    }),
+  )
+export type Message = z.infer<typeof messageSchema>
diff --git a/.agents/types/content-part.ts b/.agents/types/content-part.ts
new file mode 100644
index 000000000..474ce335e
--- /dev/null
+++ b/.agents/types/content-part.ts
@@ -0,0 +1,68 @@
+import z from 'zod/v4'
+
+import { dataContentSchema } from './data-content'
+import { jsonValueSchema } from './json'
+import { providerMetadataSchema } from './provider-metadata'
+
+export const textPartSchema = z.object({
+  type: z.literal('text'),
+  text: z.string(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type TextPart = z.infer<typeof textPartSchema>
+
+export const imagePartSchema = z.object({
+  type: z.literal('image'),
+  image: z.union([dataContentSchema, z.instanceof(URL)]),
+  mediaType: z.string().optional(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type ImagePart = z.infer<typeof imagePartSchema>
+
+export const filePartSchema = z.object({
+  type: z.literal('file'),
+  data: z.union([dataContentSchema, z.instanceof(URL)]),
+  filename: z.string().optional(),
+  mediaType: z.string(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type FilePart = z.infer<typeof filePartSchema>
+
+export const reasoningPartSchema = z.object({
+  type: z.literal('reasoning'),
+  text: z.string(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type ReasoningPart = z.infer<typeof reasoningPartSchema>
+
+export const toolCallPartSchema = z.object({
+  type: z.literal('tool-call'),
+  toolCallId: z.string(),
+  toolName: z.string(),
+  input: z.record(z.string(), z.unknown()),
+  providerOptions: providerMetadataSchema.optional(),
+  providerExecuted: z.boolean().optional(),
+})
+export type ToolCallPart = z.infer<typeof toolCallPartSchema>
+
+export const toolResultOutputSchema = z.discriminatedUnion('type', [
+  z.object({
+    type: z.literal('json'),
+    value: jsonValueSchema,
+  }),
+  z.object({
+    type: z.literal('media'),
+    data: z.string(),
+    mediaType: z.string(),
+  }),
+])
+export type ToolResultOutput = z.infer<typeof toolResultOutputSchema>
+
+export const toolResultPartSchema = z.object({
+  type: z.literal('tool-result'),
+  toolCallId: z.string(),
+  toolName: z.string(),
+  output: toolResultOutputSchema.array(),
+  providerOptions: providerMetadataSchema.optional(),
+})
+export type ToolResultPart = z.infer<typeof toolResultPartSchema>
diff --git a/.agents/types/data-content.ts b/.agents/types/data-content.ts
new file mode 100644
index 000000000..eb5c2e1ff
--- /dev/null
+++ b/.agents/types/data-content.ts
@@ -0,0 +1,14 @@
+import z from 'zod/v4'
+
+export const dataContentSchema = z.union([
+  z.string(),
+  z.instanceof(Uint8Array),
+  z.instanceof(ArrayBuffer),
+  z.custom<Buffer>(
+    // Buffer might not be available in some environments such as CloudFlare:
+    (value: unknown): value is Buffer =>
+      globalThis.Buffer?.isBuffer(value) ?? false,
+    { message: 'Must be a Buffer' },
+  ),
+])
+export type DataContent = z.infer<typeof dataContentSchema>
diff --git a/.agents/types/json.ts b/.agents/types/json.ts
new file mode 100644
index 000000000..167f8d051
--- /dev/null
+++ b/.agents/types/json.ts
@@ -0,0 +1,29 @@
+import z from 'zod/v4'
+
+export type JSONValue =
+  | null
+  | string
+  | number
+  | boolean
+  | JSONObject
+  | JSONArray
+export const jsonValueSchema: z.ZodType<JSONValue> = z.lazy(() =>
+  z.union([
+    z.null(),
+    z.string(),
+    z.number(),
+    z.boolean(),
+    jsonObjectSchema,
+    jsonArraySchema,
+  ]),
+)
+
+export const jsonObjectSchema: z.ZodType<JSONObject> = z.lazy(() =>
+  z.record(z.string(), jsonValueSchema),
+)
+export type JSONObject = { [key: string]: JSONValue }
+
+export const jsonArraySchema: z.ZodType<JSONArray> = z.lazy(() =>
+  z.array(jsonValueSchema),
+)
+export type JSONArray = JSONValue[]
diff --git a/.agents/types/provider-metadata.ts b/.agents/types/provider-metadata.ts
new file mode 100644
index 000000000..5749359fe
--- /dev/null
+++ b/.agents/types/provider-metadata.ts
@@ -0,0 +1,10 @@
+import z from 'zod/v4'
+
+import { jsonValueSchema } from './json'
+
+export const providerMetadataSchema = z.record(
+  z.string(),
+  z.record(z.string(), jsonValueSchema),
+)
+
+export type ProviderMetadata = z.infer<typeof providerMetadataSchema>
diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts
index 09b52b2b5..9acee92bd 100644
--- a/.agents/types/tools.ts
+++ b/.agents/types/tools.ts
@@ -1,4 +1,4 @@
-import type { Message } from '@codebuff/sdk'
+import type { Message } from './codebuff-message'
 
 /**
  * Union type of all available tool names
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index ccba67399..fa71fef3f 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -14,6 +14,10 @@
  *   export default definition
  */
 
+import type { Message } from './codebuff-message'
+import type * as Tools from './tools'
+type ToolName = Tools.ToolName
+
 // ============================================================================
 // Agent Definition and Utility Types
 // ============================================================================
@@ -344,7 +348,4 @@ export type ModelName =
   | 'z-ai/glm-4.5:nitro'
   | (string & {})
 
-import type { Message } from './codebuff-message'
-import type * as Tools from './tools'
 export type { Tools }
-type ToolName = Tools.ToolName
diff --git a/common/src/templates/initial-agents-dir/types/content-part.ts b/common/src/templates/initial-agents-dir/types/content-part.ts
index ff01b1f0c..474ce335e 100644
--- a/common/src/templates/initial-agents-dir/types/content-part.ts
+++ b/common/src/templates/initial-agents-dir/types/content-part.ts
@@ -1,8 +1,8 @@
 import z from 'zod/v4'
 
-import { providerMetadataSchema } from './provider-metadata'
-import { jsonValueSchema } from '../json'
 import { dataContentSchema } from './data-content'
+import { jsonValueSchema } from './json'
+import { providerMetadataSchema } from './provider-metadata'
 
 export const textPartSchema = z.object({
   type: z.literal('text'),
diff --git a/common/src/templates/initial-agents-dir/types/json.ts b/common/src/templates/initial-agents-dir/types/json.ts
new file mode 100644
index 000000000..167f8d051
--- /dev/null
+++ b/common/src/templates/initial-agents-dir/types/json.ts
@@ -0,0 +1,29 @@
+import z from 'zod/v4'
+
+export type JSONValue =
+  | null
+  | string
+  | number
+  | boolean
+  | JSONObject
+  | JSONArray
+export const jsonValueSchema: z.ZodType<JSONValue> = z.lazy(() =>
+  z.union([
+    z.null(),
+    z.string(),
+    z.number(),
+    z.boolean(),
+    jsonObjectSchema,
+    jsonArraySchema,
+  ]),
+)
+
+export const jsonObjectSchema: z.ZodType<JSONObject> = z.lazy(() =>
+  z.record(z.string(), jsonValueSchema),
+)
+export type JSONObject = { [key: string]: JSONValue }
+
+export const jsonArraySchema: z.ZodType<JSONArray> = z.lazy(() =>
+  z.array(jsonValueSchema),
+)
+export type JSONArray = JSONValue[]
diff --git a/common/src/templates/initial-agents-dir/types/provider-metadata.ts b/common/src/templates/initial-agents-dir/types/provider-metadata.ts
index 29d4b20f2..5749359fe 100644
--- a/common/src/templates/initial-agents-dir/types/provider-metadata.ts
+++ b/common/src/templates/initial-agents-dir/types/provider-metadata.ts
@@ -1,6 +1,6 @@
 import z from 'zod/v4'
 
-import { jsonValueSchema } from '../json'
+import { jsonValueSchema } from './json'
 
 export const providerMetadataSchema = z.record(
   z.string(),
diff --git a/common/src/templates/initial-agents-dir/types/tools.ts b/common/src/templates/initial-agents-dir/types/tools.ts
index 18b20f563..9acee92bd 100644
--- a/common/src/templates/initial-agents-dir/types/tools.ts
+++ b/common/src/templates/initial-agents-dir/types/tools.ts
@@ -1,3 +1,5 @@
+import type { Message } from './codebuff-message'
+
 /**
  * Union type of all available tool names
  */
@@ -118,15 +120,7 @@ export interface RunTerminalCommandParams {
  * Set the conversation history to the provided messages.
  */
 export interface SetMessagesParams {
-  messages: {
-    role: 'user' | 'assistant'
-    content:
-      | string
-      | {
-          type: 'text'
-          text: string
-        }[]
-  }[]
+  messages: Message[]
 }
 
 /**
diff --git a/npm-app/src/cli-handlers/agents.ts b/npm-app/src/cli-handlers/agents.ts
index 744cf9748..2bee5e30a 100644
--- a/npm-app/src/cli-handlers/agents.ts
+++ b/npm-app/src/cli-handlers/agents.ts
@@ -7,18 +7,31 @@ import {
   extractAgentIdFromFileName,
 } from '@codebuff/common/util/agent-file-utils'
 import { green, yellow, cyan, magenta, bold, gray, red } from 'picocolors'
+
 // Import files to replicate in the user's .agents directory. Bun bundler requires relative paths.
+
+import basicDiffReviewer from '../../../common/src/templates/initial-agents-dir/examples/01-basic-diff-reviewer' with { type: 'text' }
+import intermediateGitCommitter from '../../../common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer' with { type: 'text' }
+import advancedFileExplorer from '../../../common/src/templates/initial-agents-dir/examples/03-advanced-file-explorer' with { type: 'text' }
+import myCustomAgent from '../../../common/src/templates/initial-agents-dir/my-custom-agent' with { type: 'text' }
+
+// @ts-ignore - No default import, but we are importing as text so it's fine
 // @ts-ignore - It complains about the .md file, but it works.
 import readmeContent from '../../../common/src/templates/initial-agents-dir/README.md' with { type: 'text' }
 // @ts-ignore - No default import, but we are importing as text so it's fine
 import agentDefinitionTypes from '../../../common/src/templates/initial-agents-dir/types/agent-definition' with { type: 'text' }
 // @ts-ignore - No default import, but we are importing as text so it's fine
+import messageTypes from '../../../common/src/templates/initial-agents-dir/types/codebuff-message' with { type: 'text' }
+// @ts-ignore - No default import, but we are importing as text so it's fine
+import contentPartTypes from '../../../common/src/templates/initial-agents-dir/types/content-part' with { type: 'text' }
+// @ts-ignore - No default import, but we are importing as text so it's fine
+import dataContentTypes from '../../../common/src/templates/initial-agents-dir/types/data-content' with { type: 'text' }
+// @ts-ignore - No default import, but we are importing as text so it's fine
+import jsonTypes from '../../../common/src/templates/initial-agents-dir/types/json' with { type: 'text' }
+// @ts-ignore - No default import, but we are importing as text so it's fine
+import providerMetadataTypes from '../../../common/src/templates/initial-agents-dir/types/provider-metadata' with { type: 'text' }
+// @ts-ignore - No default import, but we are importing as text so it's fine
 import toolsTypes from '../../../common/src/templates/initial-agents-dir/types/tools' with { type: 'text' }
-import basicDiffReviewer from '../../../common/src/templates/initial-agents-dir/examples/01-basic-diff-reviewer' with { type: 'text' }
-import intermediateGitCommitter from '../../../common/src/templates/initial-agents-dir/examples/02-intermediate-git-committer' with { type: 'text' }
-import advancedFileExplorer from '../../../common/src/templates/initial-agents-dir/examples/03-advanced-file-explorer' with { type: 'text' }
-import myCustomAgent from '../../../common/src/templates/initial-agents-dir/my-custom-agent' with { type: 'text' }
-
 import {
   loadLocalAgents,
   getLoadedAgentNames,
@@ -657,6 +670,31 @@ async function createExampleAgentFiles() {
       content: toolsTypes,
       description: 'TypeScript type definitions for tools',
     },
+    {
+      path: path.join(typesDir, 'codebuff-message.ts'),
+      content: messageTypes,
+      description: 'TypeScript type definitions for messages',
+    },
+    {
+      path: path.join(typesDir, 'content-part.ts'),
+      content: contentPartTypes,
+      description: 'TypeScript type definitions for content parts',
+    },
+    {
+      path: path.join(typesDir, 'data-content.ts'),
+      content: dataContentTypes,
+      description: 'TypeScript type definitions for data content',
+    },
+    {
+      path: path.join(typesDir, 'provider-metadata.ts'),
+      content: providerMetadataTypes,
+      description: 'TypeScript type definitions for provider metadata',
+    },
+    {
+      path: path.join(typesDir, 'json.ts'),
+      content: jsonTypes,
+      description: 'TypeScript type definitions for JSON',
+    },
     {
       path: path.join(agentsDir, 'my-custom-agent.ts'),
       content: myCustomAgent,

From 10c4d2cfc9641425b84f2155125606697c6efe85 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Fri, 29 Aug 2025 10:59:19 -0700
Subject: [PATCH 16/18] remove dependency from @codebuff/sdk

---
 .agents/__tests__/context-pruner.test.ts | 2 +-
 .agents/context-pruner.ts                | 2 +-
 .agents/package.json                     | 3 ---
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
index 790e9d871..05a8f0125 100644
--- a/.agents/__tests__/context-pruner.test.ts
+++ b/.agents/__tests__/context-pruner.test.ts
@@ -2,7 +2,7 @@ import { describe, test, expect, beforeEach } from 'bun:test'
 
 import contextPruner from '../context-pruner'
 
-import type { Message } from '@codebuff/sdk'
+import type { Message } from '../types/codebuff-message'
 
 describe('context-pruner handleSteps', () => {
   let mockAgentState: any
diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts
index 78c4e327a..9b9604e3d 100644
--- a/.agents/context-pruner.ts
+++ b/.agents/context-pruner.ts
@@ -1,8 +1,8 @@
 import { publisher } from './constants'
 
 import type { AgentDefinition, ToolCall } from './types/agent-definition'
+import type { Message, ToolMessage } from './types/codebuff-message'
 import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
-import type { Message, ToolMessage } from '@codebuff/sdk'
 
 const definition: AgentDefinition = {
   id: 'context-pruner',
diff --git a/.agents/package.json b/.agents/package.json
index b995f9b5d..436971bde 100644
--- a/.agents/package.json
+++ b/.agents/package.json
@@ -6,8 +6,5 @@
   "scripts": {
     "typecheck": "bun x tsc --noEmit -p tsconfig.json",
     "test": "bun test"
-  },
-  "dependencies": {
-    "@codebuff/sdk": "workspace:*"
   }
 }

From 98242e50875b67ea8b8f3a218df469084b872c07 Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Fri, 29 Aug 2025 11:18:39 -0700
Subject: [PATCH 17/18] fix .agents tests

---
 .agents/__tests__/context-pruner.test.ts      | 75 ++++++-------------
 .../__tests__/run-programmatic-step.test.ts   |  1 -
 2 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
index 05a8f0125..552652891 100644
--- a/.agents/__tests__/context-pruner.test.ts
+++ b/.agents/__tests__/context-pruner.test.ts
@@ -87,30 +87,14 @@ describe('context-pruner handleSteps', () => {
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
-    expect(results[0]).toEqual({
-      toolName: 'set_messages',
-      input: {
-        messages,
-      },
-    })
-  })
-
-  test('removes spawn_agent_inline call for context-pruner and following messages', () => {
-    const messages = [
-      createMessage('user', 'Hello'),
-      createMessage(
-        'assistant',
-        'I will spawn the context-pruner agent.\n\n<codebuff_tool_call>\n{\n  "cb_tool_name": "spawn_agent_inline",\n  "agent_type": "context-pruner"\n}\n</codebuff_tool_call>',
-      ),
-      createMessage('user', '{"params": {"maxContextLength": 100000}}'),
-      createMessage('user', 'Tools and instructions'),
-    ]
-
-    const results = runHandleSteps(messages)
-
-    expect(results).toHaveLength(1)
-    expect(results[0].input.messages).toHaveLength(1)
-    expect(results[0].input.messages[0]).toEqual(createMessage('user', 'Hello'))
+    expect(results[0]).toEqual(
+      expect.objectContaining({
+        toolName: 'set_messages',
+        input: {
+          messages,
+        },
+      }),
+    )
   })
 
   test('does not remove messages if assistant message does not contain context-pruner spawn call', () => {
@@ -125,23 +109,6 @@ describe('context-pruner handleSteps', () => {
     expect(results[0].input.messages).toHaveLength(3)
   })
 
-  test('handles context-pruner spawn call without enough following messages', () => {
-    const messages = [
-      createMessage('user', 'Hello'),
-      createMessage(
-        'assistant',
-        'I will spawn the context-pruner agent.\n\n<codebuff_tool_call>\n{\n  "cb_tool_name": "spawn_agent_inline",\n  "agent_type": "context-pruner"\n}\n</codebuff_tool_call>',
-      ),
-      createMessage('user', '{"params": {"maxContextLength": 100000}}'),
-    ]
-
-    const results = runHandleSteps(messages)
-
-    expect(results).toHaveLength(1)
-    // Should preserve all messages since there aren't 3 messages to remove
-    expect(results[0].input.messages).toHaveLength(1)
-  })
-
   test('removes old terminal command results while keeping recent 5', () => {
     // Create content large enough to exceed 200k token limit (~600k chars)
     const largeContent = 'x'.repeat(150000)
@@ -184,7 +151,7 @@ describe('context-pruner handleSteps', () => {
         m.content?.toolName === 'run_terminal_command' &&
         (m.content?.output?.[0]?.value?.command === 'command-7' ||
           m.content?.output?.[0]?.value?.message ===
-            '[Large tool result omitted]'),
+            '[LARGE_TOOL_RESULT_OMITTED]'),
     )
     expect(recentTerminalMessage).toBeDefined()
   })
@@ -214,7 +181,7 @@ describe('context-pruner handleSteps', () => {
       (m: any) => m.role === 'tool' && m.content?.toolName === 'read_files',
     )
     expect(largeResultMessage?.content?.output?.[0]?.value?.message).toBe(
-      '[Large tool result omitted]',
+      '[LARGE_TOOL_RESULT_OMITTED]',
     )
 
     // Small tool result should be preserved
@@ -358,7 +325,7 @@ describe('context-pruner edge cases', () => {
   const runHandleSteps = (messages: Message[]) => {
     mockAgentState.messageHistory = messages
     const generator = contextPruner.handleSteps!({ agentState: mockAgentState })
-    const results: any[] = []
+    const results: ReturnType<typeof generator.next>['value'][] = []
     let result = generator.next()
     while (!result.done) {
       if (typeof result.value === 'object') {
@@ -380,7 +347,7 @@ describe('context-pruner edge cases', () => {
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
-    const resultMessages = results[0].input.messages
+    const resultMessages = (results[0] as any).input.messages
 
     // Should handle terminal commands gracefully
     expect(resultMessages.length).toBeGreaterThan(0)
@@ -404,7 +371,7 @@ describe('context-pruner edge cases', () => {
 
     expect(results).toHaveLength(1)
     // Should handle boundary condition without errors
-    expect(results[0].input.messages).toBeDefined()
+    expect((results[0] as any).input.messages).toBeDefined()
   })
 
   test('preserves message order after pruning', () => {
@@ -421,7 +388,7 @@ describe('context-pruner edge cases', () => {
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
-    const resultMessages = results[0].input.messages
+    const resultMessages = (results[0] as any).input.messages
 
     // Check that remaining messages maintain chronological order
     let previousIndex = -1
@@ -455,7 +422,7 @@ describe('context-pruner edge cases', () => {
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
-    expect(results[0].input.messages).toHaveLength(3)
+    expect((results[0] as any).input.messages).toHaveLength(3)
   })
 
   test('handles tool results with various sizes around 1000 char threshold', () => {
@@ -492,7 +459,7 @@ describe('context-pruner edge cases', () => {
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
-    const resultMessages = results[0].input.messages
+    const resultMessages = (results[0] as any).input.messages
 
     // Check that some tool result processing occurred
     const hasToolResults = resultMessages.some((m: any) => m.role === 'tool')
@@ -503,7 +470,7 @@ describe('context-pruner edge cases', () => {
       (m: any) =>
         m.role === 'tool' &&
         m.content?.output?.[0]?.value?.message ===
-          '[Large tool result omitted]',
+          '[LARGE_TOOL_RESULT_OMITTED]',
     )
     expect(hasLargeToolResultReplacement).toBe(true)
   })
@@ -541,13 +508,13 @@ describe('context-pruner edge cases', () => {
 
       if (shouldRemove) {
         // Should remove the assistant message and following 2 user messages
-        expect(results[0].input.messages).toHaveLength(1)
-        expect(results[0].input.messages[0]).toEqual(
+        expect(results).toHaveLength(1)
+        expect((results[0] as any).input.messages[0]).toEqual(
           createMessage('user', 'Hello'),
         )
       } else {
         // Should preserve all messages
-        expect(results[0].input.messages).toHaveLength(4)
+        expect((results[0] as any).input.messages).toHaveLength(4)
       }
     })
   })
@@ -563,7 +530,7 @@ describe('context-pruner edge cases', () => {
     const results = runHandleSteps(messages)
 
     expect(results).toHaveLength(1)
-    const resultMessages = results[0].input.messages
+    const resultMessages = (results[0] as any).input.messages
 
     // Should not have consecutive replacement messages
     let consecutiveReplacements = 0
diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts
index 7b5aef07b..b42a0de9e 100644
--- a/backend/src/__tests__/run-programmatic-step.test.ts
+++ b/backend/src/__tests__/run-programmatic-step.test.ts
@@ -234,7 +234,6 @@ describe('runProgrammaticStep', () => {
         expect.objectContaining({
           toolName: 'add_message',
           input: { role: 'user', content: 'Hello world' },
-          includeToolCall: false,
         }),
       )
 

From 0b30a5d22c538ead6ecfdf92b9b40b55709dc2fd Mon Sep 17 00:00:00 2001
From: Charles Lien <charleslien97@gmail.com>
Date: Fri, 29 Aug 2025 11:24:41 -0700
Subject: [PATCH 18/18] bump sdk version

---
 sdk/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/package.json b/sdk/package.json
index 9f985197a..252b786e1 100644
--- a/sdk/package.json
+++ b/sdk/package.json
@@ -2,7 +2,7 @@
   "name": "@codebuff/sdk",
   "private": false,
   "access": "public",
-  "version": "0.1.18",
+  "version": "0.1.19",
   "description": "Official SDK for Codebuff — AI coding agent & framework",
   "license": "MIT",
   "type": "module",