Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
- JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm)
- Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama)
- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
- Ruby: [docusealco/rllama](https://github.com/docusealco/rllama)
- Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs)
- Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
- Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs)
Expand Down
77 changes: 77 additions & 0 deletions tests/test-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,83 @@ static void test_convert_responses_to_chatcmpl() {
assert_equals(false, result.contains("max_output_tokens"));
assert_equals(100, result.at("max_tokens").get<int>());
}

// Test mixed Responses tools: convert only function tools
{
json input = json::parse(R"({
"input": "Hello",
"model": "test-model",
"tools": [
{
"type": "web_search"
},
{
"type": "function",
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string"
}
},
"required": ["location"]
}
},
{
"type": "image_generation"
},
{
"type": "mcp",
"server_label": "test-server"
},
{
"type": "namespace",
"name": "browser"
}
]
})");

json result = server_chat_convert_responses_to_chatcmpl(input);

assert_equals(true, result.contains("tools"));
assert_equals(true, result.at("tools").is_array());
assert_equals((size_t)1, result.at("tools").size());

const auto & tool = result.at("tools")[0];
assert_equals(std::string("function"), tool.at("type").get<std::string>());
assert_equals(std::string("get_weather"), tool.at("function").at("name").get<std::string>());
assert_equals(true, tool.at("function").at("strict").get<bool>());
}

// Test non-function Responses tools are ignored
{
json input = json::parse(R"({
"input": "Hello",
"model": "test-model",
"tools": [
{
"type": "web_search"
},
{
"type": "image_generation"
},
{
"type": "mcp",
"server_label": "test-server"
},
{
"type": "namespace",
"name": "browser"
}
]
})");

json result = server_chat_convert_responses_to_chatcmpl(input);

assert_equals(false, result.contains("tools"));
}
}

static void test_template_output_peg_parsers(bool detailed_debug) {
Expand Down
11 changes: 8 additions & 3 deletions tools/server/server-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,11 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
for (json resp_tool : response_body.at("tools")) {
json chatcmpl_tool;

if (json_value(resp_tool, "type", std::string()) != "function") {
throw std::invalid_argument("'type' of tool must be 'function'");
const std::string type = json_value(resp_tool, "type", std::string());
if (type != "function") {
// Non-function Responses tools have no Chat Completions equivalent.
SRV_WRN("unsupported Responses tool type '%s' skipped\n", type.c_str());
continue;
}
resp_tool.erase("type");
chatcmpl_tool["type"] = "function";
Expand All @@ -270,7 +273,9 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
chatcmpl_tools.push_back(chatcmpl_tool);
}
chatcmpl_body.erase("tools");
chatcmpl_body["tools"] = chatcmpl_tools;
if (!chatcmpl_tools.empty()) {
chatcmpl_body["tools"] = chatcmpl_tools;
}
}

if (response_body.contains("max_output_tokens")) {
Expand Down
3 changes: 0 additions & 3 deletions tools/server/webui/src/lib/constants/agentic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ export const ATTACHMENT_SAVED_REGEX = /\[Attachment saved: ([^\]]+)\]/;

export const NEWLINE_SEPARATOR = '\n';

export const LLM_ERROR_BLOCK_START = '\n\n```\nUpstream LLM error:\n';
export const LLM_ERROR_BLOCK_END = '\n```\n';

export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = {
enabled: true,
maxTurns: 100,
Expand Down
12 changes: 3 additions & 9 deletions tools/server/webui/src/lib/stores/agentic.svelte.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@ import { ToolSource, ToolPermissionDecision } from '$lib/enums';
import { SvelteMap } from 'svelte/reactivity';
import { ToolsService } from '$lib/services/tools.service';
import { isAbortError } from '$lib/utils';
import {
DEFAULT_AGENTIC_CONFIG,
NEWLINE_SEPARATOR,
LLM_ERROR_BLOCK_START,
LLM_ERROR_BLOCK_END
} from '$lib/constants';
import { DEFAULT_AGENTIC_CONFIG, NEWLINE_SEPARATOR } from '$lib/constants';
import {
IMAGE_MIME_TO_EXTENSION,
DATA_URI_BASE64_REGEX,
Expand Down Expand Up @@ -640,10 +635,9 @@ class AgenticStore {
return;
}
const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
// Save error as content in the current turn
onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`);
// preserve partial output as is, the outer error dialog informs the user separately
await onAssistantTurnComplete?.(
turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`,
turnContent,
turnReasoningContent || undefined,
this.buildFinalTimings(capturedTimings, agenticTimings),
undefined
Expand Down
23 changes: 14 additions & 9 deletions tools/server/webui/src/lib/stores/chat.svelte.ts
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ class ChatStore {
);
}
},
onError: (error: Error) => {
onError: async (error: Error) => {
this.setStreamingActive(false);
if (isAbortError(error)) {
cleanupStreamingState();
Expand All @@ -826,13 +826,10 @@ class ChatStore {
return;
}
console.error('Streaming error:', error);
// keep whatever was streamed so far, the message stays in memory and in DB
await this.savePartialResponseIfNeeded(convId);
cleanupStreamingState();
this.clearPendingMessage(convId);
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
if (idx !== -1) {
const failedMessage = conversationsStore.removeMessageAtIndex(idx);
if (failedMessage) DatabaseService.deleteMessage(failedMessage.id).catch(console.error);
}
const contextInfo = (
error as Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }
).contextInfo;
Expand Down Expand Up @@ -1389,9 +1386,17 @@ class ChatStore {
}

console.error('Continue generation error:', error);
conversationsStore.updateMessageAtIndex(idx, { content: originalContent });

await DatabaseService.updateMessage(msg.id, { content: originalContent });
// keep whatever was appended so far, the message stays in memory and in DB
await DatabaseService.updateMessage(msg.id, {
content: originalContent + appendedContent,
reasoningContent: originalReasoning + appendedReasoning || undefined,
timestamp: Date.now()
});
conversationsStore.updateMessageAtIndex(idx, {
content: originalContent + appendedContent,
reasoningContent: originalReasoning + appendedReasoning || undefined,
timestamp: Date.now()
});

this.setChatLoading(msg.convId, false);
this.clearChatStreaming(msg.convId);
Expand Down
Loading