From 7155a49771279063a8b9b647d33347811abc64c3 Mon Sep 17 00:00:00 2001 From: KITAITI Makoto Date: Fri, 15 May 2026 14:41:24 +0900 Subject: [PATCH 1/3] readme : update bindings (#23063) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index de93f17eb77..a0c14b9d7f0 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm) - Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama) - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb) +- Ruby: [docusealco/rllama](https://github.com/docusealco/rllama) - Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs) - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) From 91e84fed64329cd96202d68220724a1d92f5ec1f Mon Sep 17 00:00:00 2001 From: Sid Shaytay <2595088+SidShaytay@users.noreply.github.com> Date: Fri, 15 May 2026 00:03:24 -0700 Subject: [PATCH 2/3] Support for Codex CLI by skipping unsupported Responses tools (#23041) * Support for Codex CLI by skipping unsupported Responses tools * Warn on skipped Responses tools and preserve gpt-oss apply_patch rejection * Revert gpt-oss apply_patch special handling --- tests/test-chat.cpp | 77 ++++++++++++++++++++++++++++++++++++ tools/server/server-chat.cpp | 11 ++++-- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index ea9d87ebed2..05c60d29743 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1664,6 +1664,83 @@ static void test_convert_responses_to_chatcmpl() { assert_equals(false, result.contains("max_output_tokens")); assert_equals(100, result.at("max_tokens").get()); } + + // Test mixed Responses tools: convert only function tools + { + json input = json::parse(R"({ + "input": "Hello", + "model": "test-model", + "tools": [ + { + "type": "web_search" + }, + { + "type": "function", + "name": "get_weather", + "description": "Get weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string" + } + }, + "required": ["location"] + } + }, + { + "type": "image_generation" + }, + { + "type": "mcp", + "server_label": "test-server" + }, + { + "type": "namespace", + "name": "browser" + } + ] + })"); + + json result = server_chat_convert_responses_to_chatcmpl(input); + + assert_equals(true, result.contains("tools")); + assert_equals(true, result.at("tools").is_array()); + assert_equals((size_t)1, result.at("tools").size()); + + const auto & tool = result.at("tools")[0]; + assert_equals(std::string("function"), tool.at("type").get()); + assert_equals(std::string("get_weather"), tool.at("function").at("name").get()); + assert_equals(true, tool.at("function").at("strict").get()); + } + + // Test non-function Responses tools are ignored + { + json input = json::parse(R"({ + "input": "Hello", + "model": "test-model", + "tools": [ + { + "type": "web_search" + }, + { + "type": "image_generation" + }, + { + "type": "mcp", + "server_label": "test-server" + }, + { + "type": "namespace", + "name": "browser" + } + ] + })"); + + json result = server_chat_convert_responses_to_chatcmpl(input); + + assert_equals(false, result.contains("tools")); + } } static void test_template_output_peg_parsers(bool detailed_debug) { diff --git a/tools/server/server-chat.cpp b/tools/server/server-chat.cpp index f276f8da58f..02858a2a028 100644 --- a/tools/server/server-chat.cpp +++ b/tools/server/server-chat.cpp @@ -257,8 +257,11 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) { for (json resp_tool : response_body.at("tools")) { json chatcmpl_tool; - if (json_value(resp_tool, "type", std::string()) != "function") { - throw std::invalid_argument("'type' of tool must be 'function'"); + const std::string type = json_value(resp_tool, "type", std::string()); + if (type != "function") { + // Non-function Responses tools have no Chat Completions equivalent. + SRV_WRN("unsupported Responses tool type '%s' skipped\n", type.c_str()); + continue; } resp_tool.erase("type"); chatcmpl_tool["type"] = "function"; @@ -270,7 +273,9 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_tools.push_back(chatcmpl_tool); } chatcmpl_body.erase("tools"); - chatcmpl_body["tools"] = chatcmpl_tools; + if (!chatcmpl_tools.empty()) { + chatcmpl_body["tools"] = chatcmpl_tools; + } } if (response_body.contains("max_output_tokens")) { From d5284445802b5af6bff3240329f2558f9c35b5f5 Mon Sep 17 00:00:00 2001 From: Pascal Date: Fri, 15 May 2026 11:18:11 +0200 Subject: [PATCH 3/3] webui: preserve partial response on streaming error (#23090) --- .../server/webui/src/lib/constants/agentic.ts | 3 --- .../webui/src/lib/stores/agentic.svelte.ts | 12 +++------- .../webui/src/lib/stores/chat.svelte.ts | 23 +++++++++++-------- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/tools/server/webui/src/lib/constants/agentic.ts b/tools/server/webui/src/lib/constants/agentic.ts index 4fe6da61c38..c0575163efc 100644 --- a/tools/server/webui/src/lib/constants/agentic.ts +++ b/tools/server/webui/src/lib/constants/agentic.ts @@ -4,9 +4,6 @@ export const ATTACHMENT_SAVED_REGEX = /\[Attachment saved: ([^\]]+)\]/; export const NEWLINE_SEPARATOR = '\n'; -export const LLM_ERROR_BLOCK_START = '\n\n```\nUpstream LLM error:\n'; -export const LLM_ERROR_BLOCK_END = '\n```\n'; - export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = { enabled: true, maxTurns: 100, diff --git a/tools/server/webui/src/lib/stores/agentic.svelte.ts b/tools/server/webui/src/lib/stores/agentic.svelte.ts index 3334f6c111a..1f1f05c4539 100644 --- a/tools/server/webui/src/lib/stores/agentic.svelte.ts +++ b/tools/server/webui/src/lib/stores/agentic.svelte.ts @@ -30,12 +30,7 @@ import { ToolSource, ToolPermissionDecision } from '$lib/enums'; import { SvelteMap } from 'svelte/reactivity'; import { ToolsService } from '$lib/services/tools.service'; import { isAbortError } from '$lib/utils'; -import { - DEFAULT_AGENTIC_CONFIG, - NEWLINE_SEPARATOR, - LLM_ERROR_BLOCK_START, - LLM_ERROR_BLOCK_END -} from '$lib/constants'; +import { DEFAULT_AGENTIC_CONFIG, NEWLINE_SEPARATOR } from '$lib/constants'; import { IMAGE_MIME_TO_EXTENSION, DATA_URI_BASE64_REGEX, @@ -640,10 +635,9 @@ class AgenticStore { return; } const normalizedError = error instanceof Error ? error : new Error('LLM stream error'); - // Save error as content in the current turn - onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`); + // preserve partial output as is, the outer error dialog informs the user separately await onAssistantTurnComplete?.( - turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`, + turnContent, turnReasoningContent || undefined, this.buildFinalTimings(capturedTimings, agenticTimings), undefined diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 7c34579ca56..04a735eec91 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -814,7 +814,7 @@ class ChatStore { ); } }, - onError: (error: Error) => { + onError: async (error: Error) => { this.setStreamingActive(false); if (isAbortError(error)) { cleanupStreamingState(); @@ -826,13 +826,10 @@ class ChatStore { return; } console.error('Streaming error:', error); + // keep whatever was streamed so far, the message stays in memory and in DB + await this.savePartialResponseIfNeeded(convId); cleanupStreamingState(); this.clearPendingMessage(convId); - const idx = conversationsStore.findMessageIndex(assistantMessage.id); - if (idx !== -1) { - const failedMessage = conversationsStore.removeMessageAtIndex(idx); - if (failedMessage) DatabaseService.deleteMessage(failedMessage.id).catch(console.error); - } const contextInfo = ( error as Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } } ).contextInfo; @@ -1389,9 +1386,17 @@ class ChatStore { } console.error('Continue generation error:', error); - conversationsStore.updateMessageAtIndex(idx, { content: originalContent }); - - await DatabaseService.updateMessage(msg.id, { content: originalContent }); + // keep whatever was appended so far, the message stays in memory and in DB + await DatabaseService.updateMessage(msg.id, { + content: originalContent + appendedContent, + reasoningContent: originalReasoning + appendedReasoning || undefined, + timestamp: Date.now() + }); + conversationsStore.updateMessageAtIndex(idx, { + content: originalContent + appendedContent, + reasoningContent: originalReasoning + appendedReasoning || undefined, + timestamp: Date.now() + }); this.setChatLoading(msg.convId, false); this.clearChatStreaming(msg.convId);