From 7155a49771279063a8b9b647d33347811abc64c3 Mon Sep 17 00:00:00 2001
From: KITAITI Makoto <KitaitiMakoto@gmail.com>
Date: Fri, 15 May 2026 14:41:24 +0900
Subject: [PATCH 1/3] readme : update bindings (#23063)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index de93f17eb77..a0c14b9d7f0 100644
--- a/README.md
+++ b/README.md
@@ -172,6 +172,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 - JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm)
 - Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama)
 - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
+- Ruby: [docusealco/rllama](https://github.com/docusealco/rllama)
 - Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs)
 - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
 - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs)

From 91e84fed64329cd96202d68220724a1d92f5ec1f Mon Sep 17 00:00:00 2001
From: Sid Shaytay <2595088+SidShaytay@users.noreply.github.com>
Date: Fri, 15 May 2026 00:03:24 -0700
Subject: [PATCH 2/3] Support for Codex CLI by skipping unsupported Responses
 tools (#23041)

* Support for Codex CLI by skipping unsupported Responses tools

* Warn on skipped Responses tools and preserve gpt-oss apply_patch rejection

* Revert gpt-oss apply_patch special handling
---
 tests/test-chat.cpp          | 77 ++++++++++++++++++++++++++++++++++++
 tools/server/server-chat.cpp | 11 ++++--
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index ea9d87ebed2..05c60d29743 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1664,6 +1664,83 @@ static void test_convert_responses_to_chatcmpl() {
         assert_equals(false, result.contains("max_output_tokens"));
         assert_equals(100, result.at("max_tokens").get<int>());
     }
+
+    // Test mixed Responses tools: convert only function tools
+    {
+        json input = json::parse(R"({
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [
+                {
+                    "type": "web_search"
+                },
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "description": "Get weather for a location",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string"
+                            }
+                        },
+                        "required": ["location"]
+                    }
+                },
+                {
+                    "type": "image_generation"
+                },
+                {
+                    "type": "mcp",
+                    "server_label": "test-server"
+                },
+                {
+                    "type": "namespace",
+                    "name": "browser"
+                }
+            ]
+        })");
+
+        json result = server_chat_convert_responses_to_chatcmpl(input);
+
+        assert_equals(true, result.contains("tools"));
+        assert_equals(true, result.at("tools").is_array());
+        assert_equals((size_t)1, result.at("tools").size());
+
+        const auto & tool = result.at("tools")[0];
+        assert_equals(std::string("function"), tool.at("type").get<std::string>());
+        assert_equals(std::string("get_weather"), tool.at("function").at("name").get<std::string>());
+        assert_equals(true, tool.at("function").at("strict").get<bool>());
+    }
+
+    // Test non-function Responses tools are ignored
+    {
+        json input = json::parse(R"({
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [
+                {
+                    "type": "web_search"
+                },
+                {
+                    "type": "image_generation"
+                },
+                {
+                    "type": "mcp",
+                    "server_label": "test-server"
+                },
+                {
+                    "type": "namespace",
+                    "name": "browser"
+                }
+            ]
+        })");
+
+        json result = server_chat_convert_responses_to_chatcmpl(input);
+
+        assert_equals(false, result.contains("tools"));
+    }
 }
 
 static void test_template_output_peg_parsers(bool detailed_debug) {
diff --git a/tools/server/server-chat.cpp b/tools/server/server-chat.cpp
index f276f8da58f..02858a2a028 100644
--- a/tools/server/server-chat.cpp
+++ b/tools/server/server-chat.cpp
@@ -257,8 +257,11 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
         for (json resp_tool : response_body.at("tools")) {
             json chatcmpl_tool;
 
-            if (json_value(resp_tool, "type", std::string()) != "function") {
-                throw std::invalid_argument("'type' of tool must be 'function'");
+            const std::string type = json_value(resp_tool, "type", std::string());
+            if (type != "function") {
+                // Non-function Responses tools have no Chat Completions equivalent.
+                SRV_WRN("unsupported Responses tool type '%s' skipped\n", type.c_str());
+                continue;
             }
             resp_tool.erase("type");
             chatcmpl_tool["type"] = "function";
@@ -270,7 +273,9 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
             chatcmpl_tools.push_back(chatcmpl_tool);
         }
         chatcmpl_body.erase("tools");
-        chatcmpl_body["tools"] = chatcmpl_tools;
+        if (!chatcmpl_tools.empty()) {
+            chatcmpl_body["tools"] = chatcmpl_tools;
+        }
     }
 
     if (response_body.contains("max_output_tokens")) {

From d5284445802b5af6bff3240329f2558f9c35b5f5 Mon Sep 17 00:00:00 2001
From: Pascal <admin@serveurperso.com>
Date: Fri, 15 May 2026 11:18:11 +0200
Subject: [PATCH 3/3] webui: preserve partial response on streaming error
 (#23090)

---
 .../server/webui/src/lib/constants/agentic.ts |  3 ---
 .../webui/src/lib/stores/agentic.svelte.ts    | 12 +++-------
 .../webui/src/lib/stores/chat.svelte.ts       | 23 +++++++++++--------
 3 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/tools/server/webui/src/lib/constants/agentic.ts b/tools/server/webui/src/lib/constants/agentic.ts
index 4fe6da61c38..c0575163efc 100644
--- a/tools/server/webui/src/lib/constants/agentic.ts
+++ b/tools/server/webui/src/lib/constants/agentic.ts
@@ -4,9 +4,6 @@ export const ATTACHMENT_SAVED_REGEX = /\[Attachment saved: ([^\]]+)\]/;
 
 export const NEWLINE_SEPARATOR = '\n';
 
-export const LLM_ERROR_BLOCK_START = '\n\n```\nUpstream LLM error:\n';
-export const LLM_ERROR_BLOCK_END = '\n```\n';
-
 export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = {
 	enabled: true,
 	maxTurns: 100,
diff --git a/tools/server/webui/src/lib/stores/agentic.svelte.ts b/tools/server/webui/src/lib/stores/agentic.svelte.ts
index 3334f6c111a..1f1f05c4539 100644
--- a/tools/server/webui/src/lib/stores/agentic.svelte.ts
+++ b/tools/server/webui/src/lib/stores/agentic.svelte.ts
@@ -30,12 +30,7 @@ import { ToolSource, ToolPermissionDecision } from '$lib/enums';
 import { SvelteMap } from 'svelte/reactivity';
 import { ToolsService } from '$lib/services/tools.service';
 import { isAbortError } from '$lib/utils';
-import {
-	DEFAULT_AGENTIC_CONFIG,
-	NEWLINE_SEPARATOR,
-	LLM_ERROR_BLOCK_START,
-	LLM_ERROR_BLOCK_END
-} from '$lib/constants';
+import { DEFAULT_AGENTIC_CONFIG, NEWLINE_SEPARATOR } from '$lib/constants';
 import {
 	IMAGE_MIME_TO_EXTENSION,
 	DATA_URI_BASE64_REGEX,
@@ -640,10 +635,9 @@ class AgenticStore {
 					return;
 				}
 				const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
-				// Save error as content in the current turn
-				onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`);
+				// preserve partial output as is, the outer error dialog informs the user separately
 				await onAssistantTurnComplete?.(
-					turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`,
+					turnContent,
 					turnReasoningContent || undefined,
 					this.buildFinalTimings(capturedTimings, agenticTimings),
 					undefined
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 7c34579ca56..04a735eec91 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -814,7 +814,7 @@ class ChatStore {
 					);
 				}
 			},
-			onError: (error: Error) => {
+			onError: async (error: Error) => {
 				this.setStreamingActive(false);
 				if (isAbortError(error)) {
 					cleanupStreamingState();
@@ -826,13 +826,10 @@ class ChatStore {
 					return;
 				}
 				console.error('Streaming error:', error);
+				// keep whatever was streamed so far, the message stays in memory and in DB
+				await this.savePartialResponseIfNeeded(convId);
 				cleanupStreamingState();
 				this.clearPendingMessage(convId);
-				const idx = conversationsStore.findMessageIndex(assistantMessage.id);
-				if (idx !== -1) {
-					const failedMessage = conversationsStore.removeMessageAtIndex(idx);
-					if (failedMessage) DatabaseService.deleteMessage(failedMessage.id).catch(console.error);
-				}
 				const contextInfo = (
 					error as Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }
 				).contextInfo;
@@ -1389,9 +1386,17 @@ class ChatStore {
 						}
 
 						console.error('Continue generation error:', error);
-						conversationsStore.updateMessageAtIndex(idx, { content: originalContent });
-
-						await DatabaseService.updateMessage(msg.id, { content: originalContent });
+						// keep whatever was appended so far, the message stays in memory and in DB
+						await DatabaseService.updateMessage(msg.id, {
+							content: originalContent + appendedContent,
+							reasoningContent: originalReasoning + appendedReasoning || undefined,
+							timestamp: Date.now()
+						});
+						conversationsStore.updateMessageAtIndex(idx, {
+							content: originalContent + appendedContent,
+							reasoningContent: originalReasoning + appendedReasoning || undefined,
+							timestamp: Date.now()
+						});
 
 						this.setChatLoading(msg.convId, false);
 						this.clearChatStreaming(msg.convId);