CrazyForks · pull · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/README.md b/README.md
@@ -172,6 +172,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 - JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm)
 - Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama)
 - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
+- Ruby: [docusealco/rllama](https://github.com/docusealco/rllama)
 - Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs)
 - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
 - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs)

diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
@@ -1664,6 +1664,83 @@ static void test_convert_responses_to_chatcmpl() {
         assert_equals(false, result.contains("max_output_tokens"));
         assert_equals(100, result.at("max_tokens").get<int>());
     }
+
+    // Test mixed Responses tools: convert only function tools
+    {
+        json input = json::parse(R"({
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [
+                {
+                    "type": "web_search"
+                },
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "description": "Get weather for a location",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string"
+                            }
+                        },
+                        "required": ["location"]
+                    }
+                },
+                {
+                    "type": "image_generation"
+                },
+                {
+                    "type": "mcp",
+                    "server_label": "test-server"
+                },
+                {
+                    "type": "namespace",
+                    "name": "browser"
+                }
+            ]
+        })");
+
+        json result = server_chat_convert_responses_to_chatcmpl(input);
+
+        assert_equals(true, result.contains("tools"));
+        assert_equals(true, result.at("tools").is_array());
+        assert_equals((size_t)1, result.at("tools").size());
+
+        const auto & tool = result.at("tools")[0];
+        assert_equals(std::string("function"), tool.at("type").get<std::string>());
+        assert_equals(std::string("get_weather"), tool.at("function").at("name").get<std::string>());
+        assert_equals(true, tool.at("function").at("strict").get<bool>());
+    }
+
+    // Test non-function Responses tools are ignored
+    {
+        json input = json::parse(R"({
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [
+                {
+                    "type": "web_search"
+                },
+                {
+                    "type": "image_generation"
+                },
+                {
+                    "type": "mcp",
+                    "server_label": "test-server"
+                },
+                {
+                    "type": "namespace",
+                    "name": "browser"
+                }
+            ]
+        })");
+
+        json result = server_chat_convert_responses_to_chatcmpl(input);
+
+        assert_equals(false, result.contains("tools"));
+    }
 }
 
 static void test_template_output_peg_parsers(bool detailed_debug) {

diff --git a/tools/server/server-chat.cpp b/tools/server/server-chat.cpp
@@ -257,8 +257,11 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
         for (json resp_tool : response_body.at("tools")) {
             json chatcmpl_tool;
 
-            if (json_value(resp_tool, "type", std::string()) != "function") {
-                throw std::invalid_argument("'type' of tool must be 'function'");
+            const std::string type = json_value(resp_tool, "type", std::string());
+            if (type != "function") {
+                // Non-function Responses tools have no Chat Completions equivalent.
+                SRV_WRN("unsupported Responses tool type '%s' skipped\n", type.c_str());
+                continue;
             }
             resp_tool.erase("type");
             chatcmpl_tool["type"] = "function";
@@ -270,7 +273,9 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
             chatcmpl_tools.push_back(chatcmpl_tool);
         }
         chatcmpl_body.erase("tools");
-        chatcmpl_body["tools"] = chatcmpl_tools;
+        if (!chatcmpl_tools.empty()) {
+            chatcmpl_body["tools"] = chatcmpl_tools;
+        }
     }
 
     if (response_body.contains("max_output_tokens")) {

diff --git a/tools/server/webui/src/lib/constants/agentic.ts b/tools/server/webui/src/lib/constants/agentic.ts
@@ -4,9 +4,6 @@ export const ATTACHMENT_SAVED_REGEX = /\[Attachment saved: ([^\]]+)\]/;
 
 export const NEWLINE_SEPARATOR = '\n';
 
-export const LLM_ERROR_BLOCK_START = '\n\n```\nUpstream LLM error:\n';
-export const LLM_ERROR_BLOCK_END = '\n```\n';
-
 export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = {
 	enabled: true,
 	maxTurns: 100,

diff --git a/tools/server/webui/src/lib/stores/agentic.svelte.ts b/tools/server/webui/src/lib/stores/agentic.svelte.ts
@@ -30,12 +30,7 @@ import { ToolSource, ToolPermissionDecision } from '$lib/enums';
 import { SvelteMap } from 'svelte/reactivity';
 import { ToolsService } from '$lib/services/tools.service';
 import { isAbortError } from '$lib/utils';
-import {
-	DEFAULT_AGENTIC_CONFIG,
-	NEWLINE_SEPARATOR,
-	LLM_ERROR_BLOCK_START,
-	LLM_ERROR_BLOCK_END
-} from '$lib/constants';
+import { DEFAULT_AGENTIC_CONFIG, NEWLINE_SEPARATOR } from '$lib/constants';
 import {
 	IMAGE_MIME_TO_EXTENSION,
 	DATA_URI_BASE64_REGEX,
@@ -640,10 +635,9 @@ class AgenticStore {
 					return;
 				}
 				const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
-				// Save error as content in the current turn
-				onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`);
+				// preserve partial output as is, the outer error dialog informs the user separately
 				await onAssistantTurnComplete?.(
-					turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`,
+					turnContent,
 					turnReasoningContent || undefined,
 					this.buildFinalTimings(capturedTimings, agenticTimings),
 					undefined

diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -814,7 +814,7 @@ class ChatStore {
 					);
 				}
 			},
-			onError: (error: Error) => {
+			onError: async (error: Error) => {
 				this.setStreamingActive(false);
 				if (isAbortError(error)) {
 					cleanupStreamingState();
@@ -826,13 +826,10 @@ class ChatStore {
 					return;
 				}
 				console.error('Streaming error:', error);
+				// keep whatever was streamed so far, the message stays in memory and in DB
+				await this.savePartialResponseIfNeeded(convId);
 				cleanupStreamingState();
 				this.clearPendingMessage(convId);
-				const idx = conversationsStore.findMessageIndex(assistantMessage.id);
-				if (idx !== -1) {
-					const failedMessage = conversationsStore.removeMessageAtIndex(idx);
-					if (failedMessage) DatabaseService.deleteMessage(failedMessage.id).catch(console.error);
-				}
 				const contextInfo = (
 					error as Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }
 				).contextInfo;
@@ -1389,9 +1386,17 @@ class ChatStore {
 						}
 
 						console.error('Continue generation error:', error);
-						conversationsStore.updateMessageAtIndex(idx, { content: originalContent });
-
-						await DatabaseService.updateMessage(msg.id, { content: originalContent });
+						// keep whatever was appended so far, the message stays in memory and in DB
+						await DatabaseService.updateMessage(msg.id, {
+							content: originalContent + appendedContent,
+							reasoningContent: originalReasoning + appendedReasoning || undefined,
+							timestamp: Date.now()
+						});
+						conversationsStore.updateMessageAtIndex(idx, {
+							content: originalContent + appendedContent,
+							reasoningContent: originalReasoning + appendedReasoning || undefined,
+							timestamp: Date.now()
+						});
 
 						this.setChatLoading(msg.convId, false);
 						this.clearChatStreaming(msg.convId);