From 2bf4cb2271280c63d8c0b852d9f6471355cf060e Mon Sep 17 00:00:00 2001 From: wsp1911 Date: Thu, 9 Apr 2026 21:01:01 +0800 Subject: [PATCH 1/3] refactor(thinking): remove preserved thinking trimming and simplify reasoning replay - remove preserved_thinking config and related UI/installer wiring - stop trimming historical reasoning_content before model requests - separate request token estimation from send-path replay behavior - count reasoning tokens only for the latest live turn during local estimation - document OpenAI chat/completions reasoning replay compatibility behavior - clean up obsolete preserved thinking locale strings and add coverage tests --- .../src-tauri/src/connection_test/client.rs | 5 - .../src/connection_test/types/config.rs | 1 - .../src-tauri/src/installer/ai_config.rs | 1 - .../src-tauri/src/installer/commands.rs | 1 - src/crates/core/src/agentic/core/message.rs | 19 +- .../core/src/agentic/core/messages_helper.rs | 206 ++++++++++++------ src/crates/core/src/agentic/core/mod.rs | 2 +- .../src/agentic/execution/execution_engine.rs | 47 ++-- .../core/src/infrastructure/ai/client.rs | 5 - .../ai/providers/openai/message_converter.rs | 4 + src/crates/core/src/service/config/types.rs | 6 - src/crates/core/src/util/types/config.rs | 2 - .../config/components/AIModelConfig.tsx | 3 - .../src/infrastructure/config/types/index.ts | 1 - src/web-ui/src/locales/en-US/settings.json | 4 +- .../src/locales/en-US/settings/ai-model.json | 4 +- src/web-ui/src/locales/zh-CN/settings.json | 4 +- .../src/locales/zh-CN/settings/ai-model.json | 4 +- 18 files changed, 182 insertions(+), 137 deletions(-) diff --git a/BitFun-Installer/src-tauri/src/connection_test/client.rs b/BitFun-Installer/src-tauri/src/connection_test/client.rs index 345035579..3be23fca3 100644 --- a/BitFun-Installer/src-tauri/src/connection_test/client.rs +++ b/BitFun-Installer/src-tauri/src/connection_test/client.rs @@ -2097,7 +2097,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2121,7 +2120,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2150,7 +2148,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2180,7 +2177,6 @@ mod tests { temperature: Some(0.2), top_p: Some(0.8), enable_thinking_process: true, - support_preserved_thinking: true, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2259,7 +2255,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: true, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, diff --git a/BitFun-Installer/src-tauri/src/connection_test/types/config.rs b/BitFun-Installer/src-tauri/src/connection_test/types/config.rs index ab45cbcb3..87ec6cbe9 100644 --- a/BitFun-Installer/src-tauri/src/connection_test/types/config.rs +++ b/BitFun-Installer/src-tauri/src/connection_test/types/config.rs @@ -82,7 +82,6 @@ pub struct AIConfig { pub temperature: Option, pub top_p: Option, pub enable_thinking_process: bool, - pub support_preserved_thinking: bool, pub inline_think_in_text: bool, pub custom_headers: Option>, /// "replace" (default) or "merge" (defaults first, then custom) diff --git a/BitFun-Installer/src-tauri/src/installer/ai_config.rs b/BitFun-Installer/src-tauri/src/installer/ai_config.rs index 38c6719ba..63730fad1 100644 --- a/BitFun-Installer/src-tauri/src/installer/ai_config.rs +++ b/BitFun-Installer/src-tauri/src/installer/ai_config.rs @@ -48,7 +48,6 @@ pub fn ai_config_from_installer_model(m: &ModelConfig) -> Result Result<(), String> { model_map.insert("recommended_for".to_string(), Value::Array(Vec::new())); model_map.insert("metadata".to_string(), Value::Null); model_map.insert("enable_thinking_process".to_string(), Value::Bool(false)); - model_map.insert("support_preserved_thinking".to_string(), Value::Bool(false)); model_map.insert("inline_think_in_text".to_string(), Value::Bool(false)); if let Some(skip_ssl_verify) = model.skip_ssl_verify { diff --git a/src/crates/core/src/agentic/core/message.rs b/src/crates/core/src/agentic/core/message.rs index 955f5a1ea..5655f1d35 100644 --- a/src/crates/core/src/agentic/core/message.rs +++ b/src/crates/core/src/agentic/core/message.rs @@ -56,8 +56,6 @@ pub struct MessageMetadata { pub turn_id: Option, pub round_id: Option, pub tokens: Option, - #[serde(skip)] // Not serialized, auxiliary field for runtime use only - pub keep_thinking: bool, /// Anthropic extended thinking signature (for passing back in multi-turn conversations) #[serde(skip_serializing_if = "Option::is_none")] pub thinking_signature: Option, @@ -168,7 +166,6 @@ impl From for AIMessage { MessageRole::Tool => "tool", MessageRole::System => "system", }; - let keep_thinking = msg.metadata.keep_thinking; let thinking_signature = msg.metadata.thinking_signature.clone(); match msg.content { @@ -273,16 +270,10 @@ impl From for AIMessage { }; // Reasoning content (interleaved thinking mode) - let reasoning = if keep_thinking { - reasoning_content.filter(|r| !r.is_empty()) - } else { - None - }; - Self { role: "assistant".to_string(), content, - reasoning_content: reasoning, + reasoning_content: reasoning_content.filter(|r| !r.is_empty()), thinking_signature: thinking_signature.clone(), tool_calls: converted_tool_calls, tool_call_id: None, @@ -506,7 +497,7 @@ impl Message { 50 + tiles * 200 } - pub fn estimate_tokens(&self) -> usize { + pub fn estimate_tokens_with_reasoning(&self, include_reasoning: bool) -> usize { let mut total = 0usize; total += 4; @@ -525,7 +516,7 @@ impl Message { text, tool_calls, } => { - if self.metadata.keep_thinking { + if include_reasoning { if let Some(reasoning) = reasoning_content.as_ref() { total += TokenCounter::estimate_tokens(reasoning); } @@ -564,6 +555,10 @@ impl Message { total } + + fn estimate_tokens(&self) -> usize { + self.estimate_tokens_with_reasoning(true) + } } impl Display for MessageContent { diff --git a/src/crates/core/src/agentic/core/messages_helper.rs b/src/crates/core/src/agentic/core/messages_helper.rs index 281af38ed..371101da4 100644 --- a/src/crates/core/src/agentic/core/messages_helper.rs +++ b/src/crates/core/src/agentic/core/messages_helper.rs @@ -1,75 +1,71 @@ use super::{CompressedTodoItem, CompressedTodoSnapshot, Message, MessageContent, MessageRole}; +use crate::util::token_counter::TokenCounter; use crate::util::types::Message as AIMessage; -use log::warn; +use crate::util::types::ToolDefinition; pub struct MessageHelper; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RequestReasoningTokenPolicy { + FullHistory, + LatestTurnOnly, + SkipAll, +} + impl MessageHelper { - pub fn compute_keep_thinking_flags( - messages: &mut [Message], - enable_thinking: bool, - support_preserved_thinking: bool, - ) { + pub fn convert_messages(messages: &[Message]) -> Vec { + messages.iter().map(AIMessage::from).collect() + } + + pub fn estimate_request_tokens( + messages: &[Message], + tools: Option<&[ToolDefinition]>, + reasoning_policy: RequestReasoningTokenPolicy, + ) -> usize { + let reasoning_frontier_start = match reasoning_policy { + RequestReasoningTokenPolicy::FullHistory => Some(0), + RequestReasoningTokenPolicy::LatestTurnOnly => { + Some(Self::find_reasoning_frontier_start(messages)) + } + RequestReasoningTokenPolicy::SkipAll => None, + }; + + let mut total = messages + .iter() + .enumerate() + .map(|(index, message)| { + let include_reasoning = + reasoning_frontier_start.is_some_and(|frontier_start| index >= frontier_start); + message.estimate_tokens_with_reasoning(include_reasoning) + }) + .sum::(); + + total += 3; + + if let Some(tool_defs) = tools { + total += TokenCounter::estimate_tool_definitions_tokens(tool_defs); + } + + total + } + + fn find_reasoning_frontier_start(messages: &[Message]) -> usize { if messages.is_empty() { - return; + return 0; } - if !enable_thinking { - messages.iter_mut().for_each(|m| { - if m.metadata.keep_thinking { - m.metadata.keep_thinking = false; - m.metadata.tokens = None; - } - }); - } else if support_preserved_thinking { - messages.iter_mut().for_each(|m| { - if !m.metadata.keep_thinking { - m.metadata.keep_thinking = true; - m.metadata.tokens = None; - } - }); - } else { - let last_message_turn_id = messages.last().and_then(|m| m.metadata.turn_id.clone()); - if let Some(last_turn_id) = last_message_turn_id { - messages.iter_mut().for_each(|m| { - let keep_thinking = m - .metadata - .turn_id - .as_ref() - .is_some_and(|cur_turn_id| cur_turn_id == &last_turn_id); - if m.metadata.keep_thinking != keep_thinking { - m.metadata.keep_thinking = keep_thinking; - m.metadata.tokens = None; - } - }) - } else { - // Find the last actual user-turn boundary from back to front. - let last_user_message_index = - messages.iter().rposition(|m| m.is_actual_user_message()); - if let Some(last_user_message_index) = last_user_message_index { - // Messages from the last user message onwards are messages for this turn - messages.iter_mut().enumerate().for_each(|(index, m)| { - let keep_thinking = index >= last_user_message_index; - if m.metadata.keep_thinking != keep_thinking { - m.metadata.keep_thinking = keep_thinking; - m.metadata.tokens = None; - } - }) - } else { - // No user message found, should not reach here in practice - warn!("compute_keep_thinking_flags: no user message found"); - - messages.iter_mut().for_each(|m| { - if m.metadata.keep_thinking { - m.metadata.keep_thinking = false; - m.metadata.tokens = None; - } - }); - } + + if let Some(last_turn_id) = messages.last().and_then(|m| m.metadata.turn_id.as_deref()) { + if let Some(frontier_start) = messages + .iter() + .position(|m| m.metadata.turn_id.as_deref() == Some(last_turn_id)) + { + return frontier_start; } } - } - pub fn convert_messages(messages: &[Message]) -> Vec { - messages.iter().map(AIMessage::from).collect() + messages + .iter() + .rposition(Message::is_actual_user_message) + .unwrap_or(messages.len().saturating_sub(1)) } pub fn group_messages_by_turns(mut messages: Vec) -> Vec> { @@ -194,3 +190,89 @@ impl MessageHelper { None } } + +#[cfg(test)] +mod tests { + use super::{MessageHelper, RequestReasoningTokenPolicy}; + use crate::agentic::core::Message; + use crate::util::token_counter::TokenCounter; + + #[test] + fn latest_turn_reasoning_policy_uses_turn_id_boundary() { + let messages = vec![ + Message::user("old user".to_string()).with_turn_id("turn-1".to_string()), + Message::assistant_with_reasoning( + Some("old reasoning".to_string()), + "old answer".to_string(), + Vec::new(), + ) + .with_turn_id("turn-1".to_string()), + Message::user("new user".to_string()).with_turn_id("turn-2".to_string()), + Message::assistant_with_reasoning( + Some("new reasoning".to_string()), + "new answer".to_string(), + Vec::new(), + ) + .with_turn_id("turn-2".to_string()), + ]; + + let full = MessageHelper::estimate_request_tokens( + &messages, + None, + RequestReasoningTokenPolicy::FullHistory, + ); + let latest = MessageHelper::estimate_request_tokens( + &messages, + None, + RequestReasoningTokenPolicy::LatestTurnOnly, + ); + let skip_all = MessageHelper::estimate_request_tokens( + &messages, + None, + RequestReasoningTokenPolicy::SkipAll, + ); + + assert_eq!( + full - latest, + TokenCounter::estimate_tokens("old reasoning") + ); + assert_eq!( + latest - skip_all, + TokenCounter::estimate_tokens("new reasoning") + ); + } + + #[test] + fn latest_turn_reasoning_policy_falls_back_to_last_actual_user_message() { + let messages = vec![ + Message::user("old user".to_string()), + Message::assistant_with_reasoning( + Some("old reasoning".to_string()), + "old answer".to_string(), + Vec::new(), + ), + Message::user("new user".to_string()), + Message::assistant_with_reasoning( + Some("new reasoning".to_string()), + "new answer".to_string(), + Vec::new(), + ), + ]; + + let latest = MessageHelper::estimate_request_tokens( + &messages, + None, + RequestReasoningTokenPolicy::LatestTurnOnly, + ); + let skip_all = MessageHelper::estimate_request_tokens( + &messages, + None, + RequestReasoningTokenPolicy::SkipAll, + ); + + assert_eq!( + latest - skip_all, + TokenCounter::estimate_tokens("new reasoning") + ); + } +} diff --git a/src/crates/core/src/agentic/core/mod.rs b/src/crates/core/src/agentic/core/mod.rs index ad8b6a552..7d46528b5 100644 --- a/src/crates/core/src/agentic/core/mod.rs +++ b/src/crates/core/src/agentic/core/mod.rs @@ -15,7 +15,7 @@ pub use message::{ CompressedToolCall, CompressionEntry, CompressionPayload, Message, MessageContent, MessageRole, MessageSemanticKind, ToolCall, ToolResult, }; -pub use messages_helper::MessageHelper; +pub use messages_helper::{MessageHelper, RequestReasoningTokenPolicy}; pub use model_round::ModelRound; pub use prompt_markup::{ has_prompt_markup, is_system_reminder_only, render_system_reminder, render_user_query, diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index 35e463115..8340c4128 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -5,7 +5,10 @@ use super::round_executor::RoundExecutor; use super::types::{ExecutionContext, ExecutionResult, RoundContext}; use crate::agentic::agents::{get_agent_registry, PromptBuilderContext, RemoteExecutionHints}; -use crate::agentic::core::{Message, MessageContent, MessageHelper, MessageSemanticKind, Session}; +use crate::agentic::core::{ + Message, MessageContent, MessageHelper, MessageSemanticKind, RequestReasoningTokenPolicy, + Session, +}; use crate::agentic::events::{AgenticEvent, EventPriority, EventQueue}; use crate::agentic::image_analysis::{ build_multimodal_message_with_images, process_image_contexts_for_provider, ImageContextData, @@ -81,17 +84,14 @@ impl ExecutionEngine { } fn estimate_request_tokens_internal( - messages: &mut [Message], + messages: &[Message], tools: Option<&[ToolDefinition]>, ) -> usize { - let mut total: usize = messages.iter_mut().map(|m| m.get_tokens()).sum(); - total += 3; - - if let Some(tool_defs) = tools { - total += TokenCounter::estimate_tool_definitions_tokens(tool_defs); - } - - total + MessageHelper::estimate_request_tokens( + messages, + tools, + RequestReasoningTokenPolicy::LatestTurnOnly, + ) } /// Emergency truncation: drop oldest API rounds (assistant+tool pairs) @@ -152,7 +152,10 @@ impl ExecutionEngine { let tool_tokens = tools .map(TokenCounter::estimate_tool_definitions_tokens) .unwrap_or(0); - let preserved_tokens: usize = preserved.iter().map(|m| m.estimate_tokens()).sum::() + let preserved_tokens: usize = preserved + .iter() + .map(|m| m.estimate_tokens_with_reasoning(true)) + .sum::() + tool_tokens + 3; @@ -161,11 +164,14 @@ impl ExecutionEngine { + rounds .iter() .flat_map(|r| r.iter()) - .map(|m| m.estimate_tokens()) + .map(|m| m.estimate_tokens_with_reasoning(true)) .sum::(); while total_tokens > context_window && kept_start < rounds.len() { - let round_tokens: usize = rounds[kept_start].iter().map(|m| m.estimate_tokens()).sum(); + let round_tokens: usize = rounds[kept_start] + .iter() + .map(|m| m.estimate_tokens_with_reasoning(true)) + .sum(); total_tokens -= round_tokens; kept_start += 1; } @@ -1003,9 +1009,6 @@ impl ExecutionEngine { } }; - // Get configuration for whether to support preserving historical thinking content - let enable_thinking = ai_client.config.enable_thinking_process; - let support_preserved_thinking = ai_client.config.support_preserved_thinking; let model_context_window = ai_client.config.context_window as usize; let session_max_tokens = session.config.max_context_tokens; let context_window = model_context_window.min(session_max_tokens); @@ -1228,15 +1231,9 @@ impl ExecutionEngine { break; } - MessageHelper::compute_keep_thinking_flags( - &mut messages, - enable_thinking, - support_preserved_thinking, - ); - // Check and compress before sending AI request let mut current_tokens = - Self::estimate_request_tokens_internal(&mut messages, tool_definitions.as_deref()); + Self::estimate_request_tokens_internal(&messages, tool_definitions.as_deref()); debug!( "Round {} token usage before send: {} / {} tokens ({:.1}%)", round_index, @@ -1347,7 +1344,7 @@ impl ExecutionEngine { // L2: Emergency truncation — if tokens still exceed context_window // after all compression layers, drop oldest API rounds until we fit. let post_compress_tokens = Self::estimate_request_tokens_internal( - &mut messages, + &messages, tool_definitions.as_deref(), ); if post_compress_tokens > context_window { @@ -1361,7 +1358,7 @@ impl ExecutionEngine { tool_definitions.as_deref(), ); let after_truncate = Self::estimate_request_tokens_internal( - &mut messages, + &messages, tool_definitions.as_deref(), ); info!( diff --git a/src/crates/core/src/infrastructure/ai/client.rs b/src/crates/core/src/infrastructure/ai/client.rs index b36c87810..aa668ecbb 100644 --- a/src/crates/core/src/infrastructure/ai/client.rs +++ b/src/crates/core/src/infrastructure/ai/client.rs @@ -2137,7 +2137,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2161,7 +2160,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2190,7 +2188,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2220,7 +2217,6 @@ mod tests { temperature: Some(0.2), top_p: Some(0.8), enable_thinking_process: true, - support_preserved_thinking: true, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, @@ -2299,7 +2295,6 @@ mod tests { temperature: None, top_p: None, enable_thinking_process: false, - support_preserved_thinking: true, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, diff --git a/src/crates/core/src/infrastructure/ai/providers/openai/message_converter.rs b/src/crates/core/src/infrastructure/ai/providers/openai/message_converter.rs index d0760a257..8f9a5312d 100644 --- a/src/crates/core/src/infrastructure/ai/providers/openai/message_converter.rs +++ b/src/crates/core/src/infrastructure/ai/providers/openai/message_converter.rs @@ -309,6 +309,10 @@ impl OpenAIMessageConverter { if let Some(reasoning) = msg.reasoning_content { if !reasoning.is_empty() { + // Official OpenAI Chat Completions may ignore replayed reasoning_content, but + // many OpenAI-compatible providers require it to continue interleaved thinking. + // Replaying it here is therefore the compatibility default; at worst this only + // adds transport cost for providers that ignore the field. openai_msg["reasoning_content"] = Value::String(reasoning); } } diff --git a/src/crates/core/src/service/config/types.rs b/src/crates/core/src/service/config/types.rs index f75129e97..d808083a9 100644 --- a/src/crates/core/src/service/config/types.rs +++ b/src/crates/core/src/service/config/types.rs @@ -840,11 +840,6 @@ pub struct AIModelConfig { #[serde(default)] pub enable_thinking_process: bool, - /// Whether preserved thinking is supported (Preserved Thinking). - /// If false, `reasoning_content` from previous turns is ignored when sending messages. - #[serde(default)] - pub support_preserved_thinking: bool, - /// Whether to parse OpenAI-compatible text chunks containing `...` into /// streaming reasoning content. #[serde(default)] @@ -1255,7 +1250,6 @@ impl Default for AIModelConfig { recommended_for: vec![], metadata: None, enable_thinking_process: false, - support_preserved_thinking: false, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, diff --git a/src/crates/core/src/util/types/config.rs b/src/crates/core/src/util/types/config.rs index 076cfc7d2..11896b7a3 100644 --- a/src/crates/core/src/util/types/config.rs +++ b/src/crates/core/src/util/types/config.rs @@ -81,7 +81,6 @@ pub struct AIConfig { pub temperature: Option, pub top_p: Option, pub enable_thinking_process: bool, - pub support_preserved_thinking: bool, pub inline_think_in_text: bool, pub custom_headers: Option>, /// "replace" (default) or "merge" (defaults first, then custom) @@ -205,7 +204,6 @@ impl TryFrom for AIConfig { temperature: other.temperature, top_p: other.top_p, enable_thinking_process: other.enable_thinking_process, - support_preserved_thinking: other.support_preserved_thinking, inline_think_in_text: other.inline_think_in_text, custom_headers: other.custom_headers, custom_headers_mode: other.custom_headers_mode, diff --git a/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx b/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx index a471dca77..0aeef7556 100644 --- a/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx +++ b/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx @@ -556,7 +556,6 @@ const AIModelConfig: React.FC = () => { recommended_for: config.recommended_for || [], metadata: config.metadata || {}, enable_thinking_process: config.enable_thinking_process ?? false, - support_preserved_thinking: config.support_preserved_thinking ?? false, inline_think_in_text: config.inline_think_in_text ?? false, reasoning_effort: config.reasoning_effort, custom_headers: config.custom_headers, @@ -750,7 +749,6 @@ const AIModelConfig: React.FC = () => { recommended_for: config.recommended_for || [], metadata: config.metadata || {}, enable_thinking_process: config.enable_thinking_process ?? false, - support_preserved_thinking: config.support_preserved_thinking ?? false, inline_think_in_text: config.inline_think_in_text ?? false, reasoning_effort: config.reasoning_effort, custom_headers: config.custom_headers, @@ -841,7 +839,6 @@ const AIModelConfig: React.FC = () => { recommended_for: editingConfig.recommended_for || [], metadata: editingConfig.metadata, enable_thinking_process: draft.enableThinking, - support_preserved_thinking: editingConfig.support_preserved_thinking ?? false, inline_think_in_text: editingConfig.inline_think_in_text ?? false, reasoning_effort: editingConfig.reasoning_effort, custom_headers: editingConfig.custom_headers, diff --git a/src/web-ui/src/infrastructure/config/types/index.ts b/src/web-ui/src/infrastructure/config/types/index.ts index 25ef27131..2dd33c976 100644 --- a/src/web-ui/src/infrastructure/config/types/index.ts +++ b/src/web-ui/src/infrastructure/config/types/index.ts @@ -121,7 +121,6 @@ export interface AIModelConfig { recommended_for?: string[]; metadata?: Record; enable_thinking_process?: boolean; - support_preserved_thinking?: boolean; /** Parse `...` text chunks into streaming reasoning content. */ inline_think_in_text?: boolean; /** Reasoning effort for OpenAI Responses API ("low" | "medium" | "high" | "xhigh") */ diff --git a/src/web-ui/src/locales/en-US/settings.json b/src/web-ui/src/locales/en-US/settings.json index 1b0d12312..fd0e2c4a3 100644 --- a/src/web-ui/src/locales/en-US/settings.json +++ b/src/web-ui/src/locales/en-US/settings.json @@ -324,9 +324,7 @@ "modelNameHint": "GLM-ASR uses glm-asr, other speech models use their corresponding model names", "thinking": { "enable": "Enable Thinking", - "enableHint": "Requires model to support thinking mode", - "preserve": "Preserve Historical Thinking", - "preserveHint": "When enabled, preserves thinking content from previous conversation turns (requires model to support Preserved Thinking)" + "enableHint": "Requires model to support thinking mode" }, "details": { "basicInfo": "Basic Information", diff --git a/src/web-ui/src/locales/en-US/settings/ai-model.json b/src/web-ui/src/locales/en-US/settings/ai-model.json index cfb100323..808acf2f3 100644 --- a/src/web-ui/src/locales/en-US/settings/ai-model.json +++ b/src/web-ui/src/locales/en-US/settings/ai-model.json @@ -156,9 +156,7 @@ "summaryOn": "thinking on", "summaryOff": "thinking off", "optionEnabled": "Enabled", - "optionDisabled": "Disabled", - "preserve": "Preserve Historical Thinking", - "preserveHint": "When enabled, preserves thinking content from past conversations" + "optionDisabled": "Disabled" }, "reasoningEffort": { "label": "Reasoning Effort", diff --git a/src/web-ui/src/locales/zh-CN/settings.json b/src/web-ui/src/locales/zh-CN/settings.json index 0f2354455..a58d26a1d 100644 --- a/src/web-ui/src/locales/zh-CN/settings.json +++ b/src/web-ui/src/locales/zh-CN/settings.json @@ -324,9 +324,7 @@ "modelNameHint": "智谱 GLM-ASR 使用 glm-asr,其他语音模型填写对应的模型名称", "thinking": { "enable": "启用思考", - "enableHint": "需要模型支持思考模式", - "preserve": "保留历史思考", - "preserveHint": "开启后将保留之前对话轮的思考内容(需模型支持 Preserved Thinking)" + "enableHint": "需要模型支持思考模式" }, "details": { "basicInfo": "基本信息", diff --git a/src/web-ui/src/locales/zh-CN/settings/ai-model.json b/src/web-ui/src/locales/zh-CN/settings/ai-model.json index 702668791..977fd3af8 100644 --- a/src/web-ui/src/locales/zh-CN/settings/ai-model.json +++ b/src/web-ui/src/locales/zh-CN/settings/ai-model.json @@ -156,9 +156,7 @@ "summaryOn": "思考开", "summaryOff": "思考关", "optionEnabled": "启用", - "optionDisabled": "禁用", - "preserve": "保留历史思考", - "preserveHint": "开启后将保留历史对话的思考内容" + "optionDisabled": "禁用" }, "reasoningEffort": { "label": "推理深度", From 33ce996876cad903106aa4673d81cddbbdb22443 Mon Sep 17 00:00:00 2001 From: wsp1911 Date: Thu, 9 Apr 2026 21:01:01 +0800 Subject: [PATCH 2/3] refactor(thinking): unify reasoning configuration across providers - replace legacy thinking flag with reasoning mode and keep backward-compatible config migration - normalize provider-specific reasoning payloads for OpenAI-compatible, Anthropic, and Gemini models - expose reasoning mode, effort, and thinking budget controls in model config UI and schema --- .../prompt_builder/prompt_builder_impl.rs | 2 +- .../src/agentic/session/session_manager.rs | 4 +- .../tools/implementations/skills/builtin.rs | 5 +- .../core/src/agentic/tools/workspace_paths.rs | 5 +- .../core/src/infrastructure/ai/client.rs | 353 +++++++++++++++--- src/crates/core/src/service/config/types.rs | 192 +++++++++- .../core/src/service/filesystem/listing.rs | 10 +- .../src/service/remote_connect/bot/mod.rs | 5 +- .../service/remote_connect/remote_server.rs | 63 ++-- .../src/service/remote_ssh/workspace_state.rs | 3 +- src/crates/core/src/util/types/config.rs | 151 +++++--- src/mobile-web/src/pages/ChatPage.tsx | 14 +- .../src/services/RemoteSessionManager.ts | 3 +- .../flow_chat/components/ModelSelector.tsx | 7 +- .../config/components/AIModelConfig.tsx | 229 +++++++++--- .../config/components/DefaultModelConfig.tsx | 3 +- .../config/schemas/ai-models.json | 60 ++- .../src/infrastructure/config/types/index.ts | 13 +- .../infrastructure/config/utils/reasoning.ts | 32 ++ .../src/locales/en-US/settings/ai-model.json | 14 +- .../src/locales/zh-CN/settings/ai-model.json | 14 +- 21 files changed, 959 insertions(+), 223 deletions(-) create mode 100644 src/web-ui/src/infrastructure/config/utils/reasoning.ts diff --git a/src/crates/core/src/agentic/agents/prompt_builder/prompt_builder_impl.rs b/src/crates/core/src/agentic/agents/prompt_builder/prompt_builder_impl.rs index 82410bc0e..5b92cff84 100644 --- a/src/crates/core/src/agentic/agents/prompt_builder/prompt_builder_impl.rs +++ b/src/crates/core/src/agentic/agents/prompt_builder/prompt_builder_impl.rs @@ -5,8 +5,8 @@ use crate::service::ai_memory::AIMemoryManager; use crate::service::ai_rules::get_global_ai_rules_service; use crate::service::bootstrap::build_workspace_persona_prompt; use crate::service::config::get_app_language_code; -use crate::service::filesystem::get_formatted_directory_listing; use crate::service::config::global::GlobalConfigManager; +use crate::service::filesystem::get_formatted_directory_listing; use crate::service::project_context::ProjectContextService; use crate::util::errors::{BitFunError, BitFunResult}; use log::{debug, warn}; diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index 5f93472da..8521fc939 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -668,7 +668,9 @@ impl SessionManager { "Session evicted from memory, restoring for model update: session_id={}", session_id ); - let _ = self.restore_session(&workspace_path.clone(), session_id).await; + let _ = self + .restore_session(&workspace_path.clone(), session_id) + .await; } } diff --git a/src/crates/core/src/agentic/tools/implementations/skills/builtin.rs b/src/crates/core/src/agentic/tools/implementations/skills/builtin.rs index d4b2ce46d..9996e7ecb 100644 --- a/src/crates/core/src/agentic/tools/implementations/skills/builtin.rs +++ b/src/crates/core/src/agentic/tools/implementations/skills/builtin.rs @@ -173,7 +173,10 @@ mod tests { assert_eq!(builtin_skill_group_key("xlsx"), Some("office")); assert_eq!(builtin_skill_group_key("find-skills"), Some("meta")); assert_eq!(builtin_skill_group_key("writing-skills"), Some("meta")); - assert_eq!(builtin_skill_group_key("agent-browser"), Some("computer-use")); + assert_eq!( + builtin_skill_group_key("agent-browser"), + Some("computer-use") + ); assert_eq!( builtin_skill_group_key("test-driven-development"), Some("superpowers") diff --git a/src/crates/core/src/agentic/tools/workspace_paths.rs b/src/crates/core/src/agentic/tools/workspace_paths.rs index 63fc93a74..017328fe1 100644 --- a/src/crates/core/src/agentic/tools/workspace_paths.rs +++ b/src/crates/core/src/agentic/tools/workspace_paths.rs @@ -133,7 +133,10 @@ mod tests { let resolved = resolve_path_with_workspace("src/main.rs", Some(Path::new("/repo"))) .expect("path should resolve"); - assert_eq!(PathBuf::from(resolved), Path::new("/repo").join("src/main.rs")); + assert_eq!( + PathBuf::from(resolved), + Path::new("/repo").join("src/main.rs") + ); } #[test] diff --git a/src/crates/core/src/infrastructure/ai/client.rs b/src/crates/core/src/infrastructure/ai/client.rs index aa668ecbb..5cbed9a8a 100644 --- a/src/crates/core/src/infrastructure/ai/client.rs +++ b/src/crates/core/src/infrastructure/ai/client.rs @@ -6,6 +6,7 @@ use crate::infrastructure::ai::providers::anthropic::AnthropicMessageConverter; use crate::infrastructure::ai::providers::gemini::GeminiMessageConverter; use crate::infrastructure::ai::providers::openai::OpenAIMessageConverter; use crate::infrastructure::ai::tool_call_accumulator::{PendingToolCall, ToolCallBoundary}; +use crate::service::config::types::ReasoningMode; use crate::service::config::ProxyConfig; use crate::util::types::*; use ai_stream_handlers::{ @@ -400,6 +401,12 @@ impl AIClient { url.contains("dashscope.aliyuncs.com") } + /// Whether the URL is SiliconFlow's OpenAI-compatible endpoint. + /// SiliconFlow uses the same `enable_thinking` boolean switch as DashScope. + fn is_siliconflow_url(url: &str) -> bool { + url.contains("api.siliconflow.cn") + } + /// Whether the URL is MiniMax API. /// MiniMax (api.minimaxi.com) uses `reasoning_split=true` to enable streamed thinking content /// delivered via `delta.reasoning_details` rather than the standard `reasoning_content` field. @@ -407,52 +414,133 @@ impl AIClient { url.contains("api.minimaxi.com") } - /// Apply thinking-related fields onto the request body (mutates `request_body`). - /// - /// * `enable` - whether thinking process is enabled - /// * `url` - request URL - /// * `model_name` - model name (e.g. for Claude budget_tokens in Anthropic format) - /// * `api_format` - "openai" or "anthropic" - /// * `max_tokens` - optional max_tokens (for Anthropic Claude budget_tokens) - fn apply_thinking_fields( + fn anthropic_supports_adaptive_reasoning(model_name: &str) -> bool { + matches!( + model_name, + name if name.starts_with("claude-opus-4-6") + || name.starts_with("claude-sonnet-4-6") + || name.starts_with("claude-mythos") + ) + } + + fn anthropic_supports_thinking_budget(model_name: &str) -> bool { + model_name.starts_with("claude") + } + + fn default_anthropic_budget_tokens(max_tokens: Option) -> Option { + max_tokens.map(|value| 10_000u32.min(value.saturating_mul(3) / 4)) + } + + fn apply_openai_compatible_reasoning_fields( request_body: &mut serde_json::Value, - enable: bool, + mode: ReasoningMode, url: &str, - model_name: &str, - api_format: &str, - max_tokens: Option, ) { - if Self::is_dashscope_url(url) && api_format.eq_ignore_ascii_case("openai") { - request_body["enable_thinking"] = serde_json::json!(enable); + let normalized_mode = if mode == ReasoningMode::Adaptive { + ReasoningMode::Enabled + } else { + mode + }; + + if Self::is_dashscope_url(url) || Self::is_siliconflow_url(url) { + if normalized_mode != ReasoningMode::Default { + request_body["enable_thinking"] = + serde_json::json!(normalized_mode == ReasoningMode::Enabled); + } return; } - if Self::is_minimax_url(url) && api_format.eq_ignore_ascii_case("openai") { - if enable { + + if Self::is_minimax_url(url) { + if normalized_mode == ReasoningMode::Enabled { request_body["reasoning_split"] = serde_json::json!(true); } return; } - let thinking_value = if enable { - if api_format.eq_ignore_ascii_case("anthropic") && model_name.starts_with("claude") { - let mut obj = serde_json::map::Map::new(); - obj.insert( - "type".to_string(), - serde_json::Value::String("enabled".to_string()), - ); - if let Some(m) = max_tokens { - obj.insert( - "budget_tokens".to_string(), - serde_json::json!(10000u32.min(m * 3 / 4)), + + match normalized_mode { + ReasoningMode::Default => {} + ReasoningMode::Enabled => { + request_body["thinking"] = serde_json::json!({ "type": "enabled" }); + } + ReasoningMode::Disabled => { + request_body["thinking"] = serde_json::json!({ "type": "disabled" }); + } + ReasoningMode::Adaptive => unreachable!("adaptive mode is normalized above"), + } + } + + fn apply_anthropic_reasoning_fields( + request_body: &mut serde_json::Value, + mode: ReasoningMode, + model_name: &str, + max_tokens: Option, + reasoning_effort: Option<&str>, + thinking_budget_tokens: Option, + ) { + match mode { + ReasoningMode::Default => {} + ReasoningMode::Disabled => { + request_body["thinking"] = serde_json::json!({ "type": "disabled" }); + } + ReasoningMode::Enabled => { + let mut thinking = serde_json::json!({ "type": "enabled" }); + if Self::anthropic_supports_thinking_budget(model_name) { + if let Some(budget_tokens) = thinking_budget_tokens + .or_else(|| Self::default_anthropic_budget_tokens(max_tokens)) + { + thinking["budget_tokens"] = serde_json::json!(budget_tokens); + } + } + request_body["thinking"] = thinking; + } + ReasoningMode::Adaptive => { + if Self::anthropic_supports_adaptive_reasoning(model_name) { + request_body["thinking"] = serde_json::json!({ "type": "adaptive" }); + if let Some(effort) = reasoning_effort.filter(|value| !value.trim().is_empty()) + { + request_body["output_config"] = serde_json::json!({ + "effort": effort + }); + } + } else { + warn!( + target: "ai::anthropic_stream_request", + "Model {} does not advertise Anthropic adaptive reasoning support; falling back to manual thinking", + model_name + ); + Self::apply_anthropic_reasoning_fields( + request_body, + ReasoningMode::Enabled, + model_name, + max_tokens, + None, + thinking_budget_tokens, ); } - serde_json::Value::Object(obj) - } else { - serde_json::json!({ "type": "enabled" }) } - } else { - serde_json::json!({ "type": "disabled" }) - }; - request_body["thinking"] = thinking_value; + } + + if mode != ReasoningMode::Adaptive + && reasoning_effort.is_some_and(|value| !value.trim().is_empty()) + { + warn!( + target: "ai::anthropic_stream_request", + "Ignoring reasoning_effort for Anthropic model {} because effort currently applies only to adaptive reasoning mode", + model_name + ); + } + } + + fn apply_gemini_reasoning_fields(request_body: &mut serde_json::Value, mode: ReasoningMode) { + if matches!(mode, ReasoningMode::Enabled | ReasoningMode::Adaptive) { + Self::insert_gemini_generation_field( + request_body, + "thinkingConfig", + serde_json::json!({ + "includeThoughts": true, + }), + ); + } } /// Whether to append the `tool_stream` request field. @@ -829,13 +917,10 @@ impl AIClient { request_body["tool_stream"] = serde_json::Value::Bool(true); } - Self::apply_thinking_fields( + Self::apply_openai_compatible_reasoning_fields( &mut request_body, - self.config.enable_thinking_process, + self.config.reasoning_mode, url, - &model_name, - "openai", - self.config.max_tokens, ); if let Some(max_tokens) = self.config.max_tokens { @@ -911,10 +996,23 @@ impl AIClient { request_body["max_output_tokens"] = serde_json::json!(max_tokens); } - if let Some(ref effort) = self.config.reasoning_effort { + let responses_effort = self + .config + .reasoning_effort + .as_deref() + .filter(|value| !value.trim().is_empty()) + .map(str::to_string) + .or_else(|| { + if self.config.reasoning_mode == ReasoningMode::Disabled { + Some("none".to_string()) + } else { + None + } + }); + + if let Some(effort) = responses_effort { request_body["reasoning"] = serde_json::json!({ - "effort": effort, - "summary": "auto" + "effort": effort }); } @@ -984,13 +1082,13 @@ impl AIClient { request_body["tool_stream"] = serde_json::Value::Bool(true); } - Self::apply_thinking_fields( + Self::apply_anthropic_reasoning_fields( &mut request_body, - self.config.enable_thinking_process, - url, + self.config.reasoning_mode, &model_name, - "anthropic", Some(max_tokens), + self.config.reasoning_effort.as_deref(), + self.config.thinking_budget_tokens, ); if let Some(system) = system_message { @@ -1065,15 +1163,7 @@ impl AIClient { ); } - if self.config.enable_thinking_process { - Self::insert_gemini_generation_field( - &mut request_body, - "thinkingConfig", - serde_json::json!({ - "includeThoughts": true, - }), - ); - } + Self::apply_gemini_reasoning_fields(&mut request_body, self.config.reasoning_mode); if let Some(tools) = gemini_tools { let tool_names = tools @@ -2121,6 +2211,7 @@ impl AIClient { mod tests { use super::AIClient; use crate::infrastructure::ai::providers::gemini::GeminiMessageConverter; + use crate::service::config::types::ReasoningMode; use crate::util::types::{AIConfig, ToolDefinition}; use serde_json::json; @@ -2136,12 +2227,13 @@ mod tests { max_tokens: Some(8192), temperature: None, top_p: None, - enable_thinking_process: false, + reasoning_mode: ReasoningMode::Default, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, skip_ssl_verify: false, reasoning_effort: None, + thinking_budget_tokens: None, custom_request_body, }) } @@ -2159,12 +2251,13 @@ mod tests { max_tokens: Some(8192), temperature: None, top_p: None, - enable_thinking_process: false, + reasoning_mode: ReasoningMode::Default, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, skip_ssl_verify: false, reasoning_effort: None, + thinking_budget_tokens: None, custom_request_body: None, }); @@ -2187,12 +2280,13 @@ mod tests { max_tokens: Some(8192), temperature: None, top_p: None, - enable_thinking_process: false, + reasoning_mode: ReasoningMode::Default, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, skip_ssl_verify: false, reasoning_effort: None, + thinking_budget_tokens: None, custom_request_body: None, }); @@ -2216,12 +2310,13 @@ mod tests { max_tokens: Some(4096), temperature: Some(0.2), top_p: Some(0.8), - enable_thinking_process: true, + reasoning_mode: ReasoningMode::Enabled, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, skip_ssl_verify: false, reasoning_effort: None, + thinking_budget_tokens: None, custom_request_body: None, }); @@ -2294,12 +2389,13 @@ mod tests { max_tokens: Some(4096), temperature: None, top_p: None, - enable_thinking_process: false, + reasoning_mode: ReasoningMode::Default, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, skip_ssl_verify: false, reasoning_effort: None, + thinking_budget_tokens: None, custom_request_body: None, }); @@ -2328,6 +2424,143 @@ mod tests { assert!(request_body.get("toolConfig").is_none()); } + #[test] + fn build_openai_request_body_uses_generic_thinking_object_when_enabled() { + let client = AIClient::new(AIConfig { + name: "openai-compatible".to_string(), + base_url: "https://example.com/v1".to_string(), + request_url: "https://example.com/v1/chat/completions".to_string(), + api_key: "test-key".to_string(), + model: "test-model".to_string(), + format: "openai".to_string(), + context_window: 128000, + max_tokens: Some(4096), + temperature: None, + top_p: None, + reasoning_mode: ReasoningMode::Enabled, + inline_think_in_text: false, + custom_headers: None, + custom_headers_mode: None, + skip_ssl_verify: false, + reasoning_effort: None, + thinking_budget_tokens: None, + custom_request_body: None, + }); + + let request_body = client.build_openai_request_body( + &client.config.request_url, + vec![json!({ "role": "user", "content": "hello" })], + None, + None, + ); + + assert_eq!(request_body["thinking"]["type"], "enabled"); + assert!(request_body.get("enable_thinking").is_none()); + assert!(request_body.get("reasoning_split").is_none()); + } + + #[test] + fn build_openai_request_body_uses_enable_thinking_for_siliconflow() { + let client = AIClient::new(AIConfig { + name: "siliconflow".to_string(), + base_url: "https://api.siliconflow.cn/v1".to_string(), + request_url: "https://api.siliconflow.cn/v1/chat/completions".to_string(), + api_key: "test-key".to_string(), + model: "Qwen/Qwen3-Coder-480B-A35B-Instruct".to_string(), + format: "openai".to_string(), + context_window: 128000, + max_tokens: Some(4096), + temperature: None, + top_p: None, + reasoning_mode: ReasoningMode::Enabled, + inline_think_in_text: false, + custom_headers: None, + custom_headers_mode: None, + skip_ssl_verify: false, + reasoning_effort: None, + thinking_budget_tokens: None, + custom_request_body: None, + }); + + let request_body = client.build_openai_request_body( + &client.config.request_url, + vec![json!({ "role": "user", "content": "hello" })], + None, + None, + ); + + assert_eq!(request_body["enable_thinking"], true); + assert!(request_body.get("thinking").is_none()); + } + + #[test] + fn build_responses_request_body_maps_disabled_mode_to_none_effort() { + let client = AIClient::new(AIConfig { + name: "responses".to_string(), + base_url: "https://api.openai.com/v1".to_string(), + request_url: "https://api.openai.com/v1/responses".to_string(), + api_key: "test-key".to_string(), + model: "gpt-5".to_string(), + format: "responses".to_string(), + context_window: 128000, + max_tokens: Some(4096), + temperature: None, + top_p: None, + reasoning_mode: ReasoningMode::Disabled, + inline_think_in_text: false, + custom_headers: None, + custom_headers_mode: None, + skip_ssl_verify: false, + reasoning_effort: None, + thinking_budget_tokens: None, + custom_request_body: None, + }); + + let request_body = client.build_responses_request_body( + Some("Be concise".to_string()), + vec![json!({ "role": "user", "content": [{ "type": "input_text", "text": "hello" }] })], + None, + None, + ); + + assert_eq!(request_body["reasoning"]["effort"], "none"); + } + + #[test] + fn build_anthropic_request_body_uses_adaptive_reasoning_and_effort() { + let client = AIClient::new(AIConfig { + name: "anthropic".to_string(), + base_url: "https://api.anthropic.com".to_string(), + request_url: "https://api.anthropic.com/v1/messages".to_string(), + api_key: "test-key".to_string(), + model: "claude-sonnet-4-6".to_string(), + format: "anthropic".to_string(), + context_window: 200000, + max_tokens: Some(8192), + temperature: None, + top_p: None, + reasoning_mode: ReasoningMode::Adaptive, + inline_think_in_text: false, + custom_headers: None, + custom_headers_mode: None, + skip_ssl_verify: false, + reasoning_effort: Some("high".to_string()), + thinking_budget_tokens: None, + custom_request_body: None, + }); + + let request_body = client.build_anthropic_request_body( + &client.config.request_url, + None, + vec![json!({ "role": "user", "content": [{ "type": "text", "text": "hello" }] })], + None, + None, + ); + + assert_eq!(request_body["thinking"]["type"], "adaptive"); + assert_eq!(request_body["output_config"]["effort"], "high"); + } + #[test] fn streaming_http_client_does_not_apply_global_request_timeout() { let client = make_test_client("openai", None); diff --git a/src/crates/core/src/service/config/types.rs b/src/crates/core/src/service/config/types.rs index d808083a9..16d2f42db 100644 --- a/src/crates/core/src/service/config/types.rs +++ b/src/crates/core/src/service/config/types.rs @@ -366,6 +366,22 @@ pub enum ModelCategory { SpeechRecognition, } +/// Provider-agnostic reasoning mode. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +#[derive(Default)] +pub enum ReasoningMode { + /// Omit provider-specific reasoning fields and use the upstream API default behavior. + #[default] + Default, + /// Explicitly enable reasoning / thinking output when the provider supports it. + Enabled, + /// Explicitly disable reasoning / thinking output when the provider supports it. + Disabled, + /// Use provider-native adaptive reasoning when supported. + Adaptive, +} + /// Default model configuration. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] @@ -803,7 +819,7 @@ impl Default for SubAgentConfig { } #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(default)] +#[serde(default, from = "AIModelConfigCompat")] pub struct AIModelConfig { pub id: String, pub name: String, @@ -836,10 +852,18 @@ pub struct AIModelConfig { /// Additional metadata (JSON, for extensibility). pub metadata: Option, - /// Whether to display the thinking process (for hybrid/thinking models such as o1). - #[serde(default)] + /// Compatibility-only input field for older saved configs. + /// + /// New code should use `reasoning_mode`. This field is deserialized for migration and + /// compatibility, then omitted from future saves. When `reasoning_mode` is absent, `true` + /// maps to `enabled` and `false` maps to `default`. + #[serde(default, skip_serializing)] pub enable_thinking_process: bool, + /// Provider-agnostic reasoning mode. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub reasoning_mode: Option, + /// Whether to parse OpenAI-compatible text chunks containing `...` into /// streaming reasoning content. #[serde(default)] @@ -858,16 +882,108 @@ pub struct AIModelConfig { #[serde(default)] pub skip_ssl_verify: bool, - /// Reasoning effort level for OpenAI Responses API (o-series / GPT-5+). - /// Valid values: "low", "medium", "high", "xhigh". None = use API default. + /// Reasoning effort level for providers that support explicit effort controls. + /// Valid values are provider-specific. None = use API default. #[serde(default, skip_serializing_if = "Option::is_none")] pub reasoning_effort: Option, + /// Optional Anthropic manual thinking token budget. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub thinking_budget_tokens: Option, + /// Custom request body (JSON string, used to override default request body fields). #[serde(default)] pub custom_request_body: Option, } +#[derive(Debug, Clone, Deserialize, Default)] +#[serde(default)] +struct AIModelConfigCompat { + id: String, + name: String, + provider: String, + model_name: String, + base_url: String, + request_url: Option, + api_key: String, + context_window: Option, + max_tokens: Option, + temperature: Option, + top_p: Option, + frequency_penalty: Option, + presence_penalty: Option, + enabled: bool, + category: ModelCategory, + capabilities: Vec, + recommended_for: Vec, + metadata: Option, + enable_thinking_process: Option, + reasoning_mode: Option, + inline_think_in_text: bool, + custom_headers: Option>, + custom_headers_mode: Option, + skip_ssl_verify: bool, + reasoning_effort: Option, + thinking_budget_tokens: Option, + custom_request_body: Option, +} + +impl From for AIModelConfig { + fn from(value: AIModelConfigCompat) -> Self { + let reasoning_mode = value.reasoning_mode.or_else(|| { + value.enable_thinking_process.map(|enabled| { + if enabled { + ReasoningMode::Enabled + } else { + ReasoningMode::Default + } + }) + }); + + Self { + id: value.id, + name: value.name, + provider: value.provider, + model_name: value.model_name, + base_url: value.base_url, + request_url: value.request_url, + api_key: value.api_key, + context_window: value.context_window, + max_tokens: value.max_tokens, + temperature: value.temperature, + top_p: value.top_p, + frequency_penalty: value.frequency_penalty, + presence_penalty: value.presence_penalty, + enabled: value.enabled, + category: value.category, + capabilities: value.capabilities, + recommended_for: value.recommended_for, + metadata: value.metadata, + enable_thinking_process: value.enable_thinking_process.unwrap_or(false), + reasoning_mode, + inline_think_in_text: value.inline_think_in_text, + custom_headers: value.custom_headers, + custom_headers_mode: value.custom_headers_mode, + skip_ssl_verify: value.skip_ssl_verify, + reasoning_effort: value.reasoning_effort, + thinking_budget_tokens: value.thinking_budget_tokens, + custom_request_body: value.custom_request_body, + } + } +} + +impl AIModelConfig { + pub fn effective_reasoning_mode(&self) -> ReasoningMode { + self.reasoning_mode.unwrap_or({ + if self.enable_thinking_process { + ReasoningMode::Enabled + } else { + ReasoningMode::Default + } + }) + } +} + /// Proxy configuration. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] @@ -1250,11 +1366,13 @@ impl Default for AIModelConfig { recommended_for: vec![], metadata: None, enable_thinking_process: false, + reasoning_mode: None, inline_think_in_text: false, custom_headers: None, custom_headers_mode: None, skip_ssl_verify: false, reasoning_effort: None, + thinking_budget_tokens: None, custom_request_body: None, } } @@ -1423,3 +1541,67 @@ impl AIModelConfig { } } } + +#[cfg(test)] +mod tests { + use super::{AIModelConfig, ReasoningMode}; + + #[test] + fn deserializes_compatibility_thinking_flag_into_reasoning_mode() { + let config: AIModelConfig = serde_json::from_value(serde_json::json!({ + "id": "model_1", + "name": "Provider", + "provider": "openai", + "model_name": "test-model", + "base_url": "https://example.com/v1", + "api_key": "key", + "enabled": true, + "enable_thinking_process": true + })) + .expect("legacy config should deserialize"); + + assert_eq!(config.reasoning_mode, Some(ReasoningMode::Enabled)); + assert!(config.enable_thinking_process); + } + + #[test] + fn deserializes_compatibility_false_thinking_flag_into_default_reasoning_mode() { + let config: AIModelConfig = serde_json::from_value(serde_json::json!({ + "id": "model_1", + "name": "Provider", + "provider": "openai", + "model_name": "test-model", + "base_url": "https://example.com/v1", + "api_key": "key", + "enabled": true, + "enable_thinking_process": false + })) + .expect("legacy config should deserialize"); + + assert_eq!(config.reasoning_mode, Some(ReasoningMode::Default)); + assert!(!config.enable_thinking_process); + } + + #[test] + fn serialization_omits_compatibility_thinking_flag() { + let config: AIModelConfig = serde_json::from_value(serde_json::json!({ + "id": "model_1", + "name": "Provider", + "provider": "openai", + "model_name": "test-model", + "base_url": "https://example.com/v1", + "api_key": "key", + "enabled": true, + "enable_thinking_process": true + })) + .expect("legacy config should deserialize"); + + let value = serde_json::to_value(&config).expect("config should serialize"); + + assert!(value.get("enable_thinking_process").is_none()); + assert_eq!( + value.get("reasoning_mode").and_then(|v| v.as_str()), + Some("enabled") + ); + } +} diff --git a/src/crates/core/src/service/filesystem/listing.rs b/src/crates/core/src/service/filesystem/listing.rs index b29216049..fa4a7be74 100644 --- a/src/crates/core/src/service/filesystem/listing.rs +++ b/src/crates/core/src/service/filesystem/listing.rs @@ -26,7 +26,10 @@ struct TreeEntry { modified_time: SystemTime, } -pub fn list_directory_entries(dir_path: &str, limit: usize) -> BitFunResult> { +pub fn list_directory_entries( + dir_path: &str, + limit: usize, +) -> BitFunResult> { let path = Path::new(dir_path); if !path.exists() { return Err(BitFunError::service(format!( @@ -239,7 +242,10 @@ pub fn format_directory_listing(entries: &[DirectoryListingEntry], dir_path: &st "/".to_string() } } else if parts_for_parent.len() > 1 { - format!("{}/", parts_for_parent[..parts_for_parent.len() - 1].join("/")) + format!( + "{}/", + parts_for_parent[..parts_for_parent.len() - 1].join("/") + ) } else { "/".to_string() }; diff --git a/src/crates/core/src/service/remote_connect/bot/mod.rs b/src/crates/core/src/service/remote_connect/bot/mod.rs index 2ee11f238..0c0921c9a 100644 --- a/src/crates/core/src/service/remote_connect/bot/mod.rs +++ b/src/crates/core/src/service/remote_connect/bot/mod.rs @@ -646,9 +646,8 @@ mod tests { assert_eq!(paths.len(), 1); assert!(std::path::Path::new(&paths[0]).is_absolute()); - assert!(std::path::Path::new(&paths[0]).ends_with( - std::path::Path::new("artifacts").join("report.pptx") - )); + assert!(std::path::Path::new(&paths[0]) + .ends_with(std::path::Path::new("artifacts").join("report.pptx"))); assert!(std::path::Path::new(&paths[0]).exists()); let _ = std::fs::remove_dir_all(base); } diff --git a/src/crates/core/src/service/remote_connect/remote_server.rs b/src/crates/core/src/service/remote_connect/remote_server.rs index c626a03c1..51b003ef7 100644 --- a/src/crates/core/src/service/remote_connect/remote_server.rs +++ b/src/crates/core/src/service/remote_connect/remote_server.rs @@ -144,7 +144,11 @@ pub struct RemoteModelConfig { pub capabilities: Vec, pub enable_thinking_process: bool, #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning_mode: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub reasoning_effort: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub thinking_budget_tokens: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -173,22 +177,26 @@ async fn load_remote_model_catalog( .map_err(|e| format!("Failed to load global config: {e}"))?; let ai_config: AIConfig = global_config.ai; - let models: Vec = ai_config - .models - .into_iter() - .map(|model| RemoteModelConfig { - id: model.id, - name: model.name, - provider: model.provider, - base_url: model.base_url, - model_name: model.model_name, - context_window: model.context_window, - enabled: model.enabled, - capabilities: model - .capabilities - .into_iter() - .map(|capability| { - match capability { + let models: Vec = + ai_config + .models + .into_iter() + .map(|model| { + let reasoning_mode = model.effective_reasoning_mode(); + + RemoteModelConfig { + id: model.id, + name: model.name, + provider: model.provider, + base_url: model.base_url, + model_name: model.model_name, + context_window: model.context_window, + enabled: model.enabled, + capabilities: model + .capabilities + .into_iter() + .map(|capability| { + match capability { crate::service::config::types::ModelCapability::TextChat => "text_chat", crate::service::config::types::ModelCapability::ImageUnderstanding => { "image_understanding" @@ -209,12 +217,23 @@ async fn load_remote_model_catalog( } } .to_string() - }) - .collect(), - enable_thinking_process: model.enable_thinking_process, - reasoning_effort: model.reasoning_effort, - }) - .collect(); + }) + .collect(), + enable_thinking_process: model.enable_thinking_process, + reasoning_mode: Some( + match reasoning_mode { + crate::service::config::types::ReasoningMode::Default => "default", + crate::service::config::types::ReasoningMode::Enabled => "enabled", + crate::service::config::types::ReasoningMode::Disabled => "disabled", + crate::service::config::types::ReasoningMode::Adaptive => "adaptive", + } + .to_string(), + ), + reasoning_effort: model.reasoning_effort, + thinking_budget_tokens: model.thinking_budget_tokens, + } + }) + .collect(); let session_model_id = if let Some(session_id) = session_id { resolve_session_model_id(session_id).await diff --git a/src/crates/core/src/service/remote_ssh/workspace_state.rs b/src/crates/core/src/service/remote_ssh/workspace_state.rs index c38385674..7e97d648b 100644 --- a/src/crates/core/src/service/remote_ssh/workspace_state.rs +++ b/src/crates/core/src/service/remote_ssh/workspace_state.rs @@ -422,7 +422,8 @@ impl RemoteWorkspaceStateManager { // Assistant sessions use client-local paths under ~/.bitfun/personal_assistant. // A registered remote root of `/` matches every absolute path; without an explicit // `remote_connection_id`, those paths must not be treated as SSH workspaces. - let is_local_assistant_path = get_path_manager_arc().is_local_assistant_workspace_path(path); + let is_local_assistant_path = + get_path_manager_arc().is_local_assistant_workspace_path(path); if is_local_assistant_path { let preferred_connection_id = preferred_connection_id?; let guard = self.registrations.read().await; diff --git a/src/crates/core/src/util/types/config.rs b/src/crates/core/src/util/types/config.rs index 11896b7a3..7e8c1606c 100644 --- a/src/crates/core/src/util/types/config.rs +++ b/src/crates/core/src/util/types/config.rs @@ -1,4 +1,4 @@ -use crate::service::config::types::AIModelConfig; +use crate::service::config::types::{AIModelConfig, ReasoningMode}; use log::warn; use serde::{Deserialize, Serialize}; @@ -80,21 +80,78 @@ pub struct AIConfig { pub max_tokens: Option, pub temperature: Option, pub top_p: Option, - pub enable_thinking_process: bool, + pub reasoning_mode: ReasoningMode, pub inline_think_in_text: bool, pub custom_headers: Option>, /// "replace" (default) or "merge" (defaults first, then custom) pub custom_headers_mode: Option, pub skip_ssl_verify: bool, - /// Reasoning effort for OpenAI Responses API ("low", "medium", "high", "xhigh") + /// Provider-specific reasoning effort. pub reasoning_effort: Option, + /// Optional Anthropic manual thinking budget. + pub thinking_budget_tokens: Option, /// Custom JSON overriding default request body fields pub custom_request_body: Option, } +impl TryFrom for AIConfig { + type Error = String; + fn try_from(other: AIModelConfig) -> Result>::Error> { + let reasoning_mode = other.effective_reasoning_mode(); + + // Parse custom request body (convert JSON string to serde_json::Value) + let custom_request_body = if let Some(body_str) = &other.custom_request_body { + match serde_json::from_str::(body_str) { + Ok(value) => Some(value), + Err(e) => { + warn!( + "Failed to parse custom_request_body: {}, config: {}", + e, other.name + ); + None + } + } + } else { + None + }; + + // Use stored request_url if present; otherwise derive from base_url + provider for legacy configs. + let request_url = other + .request_url + .clone() + .filter(|u| !u.is_empty()) + .unwrap_or_else(|| { + resolve_request_url(&other.base_url, &other.provider, &other.model_name) + }); + + Ok(AIConfig { + name: other.name.clone(), + base_url: other.base_url.clone(), + request_url, + api_key: other.api_key.clone(), + model: other.model_name.clone(), + format: other.provider.clone(), + context_window: other.context_window.unwrap_or(128128), + max_tokens: other.max_tokens, + temperature: other.temperature, + top_p: other.top_p, + reasoning_mode, + inline_think_in_text: other.inline_think_in_text, + custom_headers: other.custom_headers, + custom_headers_mode: other.custom_headers_mode, + skip_ssl_verify: other.skip_ssl_verify, + reasoning_effort: other.reasoning_effort, + thinking_budget_tokens: other.thinking_budget_tokens, + custom_request_body, + }) + } +} + #[cfg(test)] mod tests { use super::resolve_request_url; + use super::AIConfig; + use crate::service::config::types::{AIModelConfig, ModelCategory, ReasoningMode}; #[test] fn resolves_openai_request_url() { @@ -163,53 +220,51 @@ mod tests { "https://openrouter.ai/api/v1/chat/completions" ); } -} -impl TryFrom for AIConfig { - type Error = String; - fn try_from(other: AIModelConfig) -> Result>::Error> { - // Parse custom request body (convert JSON string to serde_json::Value) - let custom_request_body = if let Some(body_str) = &other.custom_request_body { - match serde_json::from_str::(body_str) { - Ok(value) => Some(value), - Err(e) => { - warn!( - "Failed to parse custom_request_body: {}, config: {}", - e, other.name - ); - None - } - } - } else { - None - }; + fn base_model_config() -> AIModelConfig { + AIModelConfig { + id: "model_1".to_string(), + name: "Provider".to_string(), + provider: "openai".to_string(), + model_name: "test-model".to_string(), + base_url: "https://example.com/v1".to_string(), + request_url: Some("https://example.com/v1/chat/completions".to_string()), + api_key: "key".to_string(), + context_window: Some(128000), + max_tokens: Some(4096), + temperature: None, + top_p: None, + frequency_penalty: None, + presence_penalty: None, + enabled: true, + category: ModelCategory::GeneralChat, + capabilities: vec![], + recommended_for: vec![], + metadata: None, + enable_thinking_process: false, + reasoning_mode: None, + inline_think_in_text: false, + custom_headers: None, + custom_headers_mode: None, + skip_ssl_verify: false, + reasoning_effort: None, + thinking_budget_tokens: None, + custom_request_body: None, + } + } - // Use stored request_url if present; otherwise derive from base_url + provider for legacy configs. - let request_url = other - .request_url - .filter(|u| !u.is_empty()) - .unwrap_or_else(|| { - resolve_request_url(&other.base_url, &other.provider, &other.model_name) - }); + #[test] + fn compatibility_false_thinking_maps_to_default_mode() { + let config = AIConfig::try_from(base_model_config()).expect("conversion should succeed"); + assert_eq!(config.reasoning_mode, ReasoningMode::Default); + } - Ok(AIConfig { - name: other.name.clone(), - base_url: other.base_url.clone(), - request_url, - api_key: other.api_key.clone(), - model: other.model_name.clone(), - format: other.provider.clone(), - context_window: other.context_window.unwrap_or(128128), - max_tokens: other.max_tokens, - temperature: other.temperature, - top_p: other.top_p, - enable_thinking_process: other.enable_thinking_process, - inline_think_in_text: other.inline_think_in_text, - custom_headers: other.custom_headers, - custom_headers_mode: other.custom_headers_mode, - skip_ssl_verify: other.skip_ssl_verify, - reasoning_effort: other.reasoning_effort, - custom_request_body, - }) + #[test] + fn compatibility_true_thinking_maps_to_enabled_mode() { + let mut model = base_model_config(); + model.enable_thinking_process = true; + + let config = AIConfig::try_from(model).expect("conversion should succeed"); + assert_eq!(config.reasoning_mode, ReasoningMode::Enabled); } } diff --git a/src/mobile-web/src/pages/ChatPage.tsx b/src/mobile-web/src/pages/ChatPage.tsx index e7830e38b..aab74c189 100644 --- a/src/mobile-web/src/pages/ChatPage.tsx +++ b/src/mobile-web/src/pages/ChatPage.tsx @@ -1719,6 +1719,14 @@ function getModelDisplayName(model: RemoteModelConfig | null): string { return model.model_name || model.name || ''; } +function isReasoningEnabled(model: RemoteModelConfig | null): boolean { + if (!model) return false; + if (model.reasoning_mode) { + return model.reasoning_mode === 'enabled' || model.reasoning_mode === 'adaptive'; + } + return !!model.enable_thinking_process; +} + function getSelectedModelInfo( selectedModelId: string, catalog: RemoteModelCatalog | null, @@ -1747,7 +1755,7 @@ function getSelectedModelInfo( ? (selectedModelId === 'primary' ? t('chat.modelPrimary') : t('chat.modelFast')) : t('chat.modelAuto'), meta: buildModelProviderMeta(resolved) || t('chat.modelAutoDesc'), - enableThinking: !!resolved?.enable_thinking_process, + enableThinking: isReasoningEnabled(resolved), reasoningEffort: resolved?.reasoning_effort, }; } @@ -1764,7 +1772,7 @@ function getSelectedModelInfo( return { label: getModelDisplayName(resolved), meta: buildModelProviderMeta(resolved), - enableThinking: resolved.enable_thinking_process, + enableThinking: isReasoningEnabled(resolved), reasoningEffort: resolved.reasoning_effort, }; } @@ -1914,7 +1922,7 @@ const ModelSelectorPill: React.FC<{ {getModelDisplayName(model)} - {model.enable_thinking_process && ( + {isReasoningEnabled(model) && ( )} diff --git a/src/mobile-web/src/services/RemoteSessionManager.ts b/src/mobile-web/src/services/RemoteSessionManager.ts index a5eb4fc0e..47d8d928e 100644 --- a/src/mobile-web/src/services/RemoteSessionManager.ts +++ b/src/mobile-web/src/services/RemoteSessionManager.ts @@ -53,7 +53,8 @@ export interface RemoteModelConfig { context_window?: number; enabled: boolean; capabilities: string[]; - enable_thinking_process: boolean; + enable_thinking_process?: boolean; + reasoning_mode?: 'default' | 'enabled' | 'disabled' | 'adaptive'; reasoning_effort?: string; } diff --git a/src/web-ui/src/flow_chat/components/ModelSelector.tsx b/src/web-ui/src/flow_chat/components/ModelSelector.tsx index 490942f2e..31cb8dc4f 100644 --- a/src/web-ui/src/flow_chat/components/ModelSelector.tsx +++ b/src/web-ui/src/flow_chat/components/ModelSelector.tsx @@ -13,6 +13,7 @@ import { useTranslation } from 'react-i18next'; import { configManager } from '@/infrastructure/config/services/ConfigManager'; import { agentAPI } from '@/infrastructure/api/service-api/AgentAPI'; import { getProviderDisplayName } from '@/infrastructure/config/services/modelConfigs'; +import { getEffectiveReasoningMode, isReasoningVisiblyEnabled } from '@/infrastructure/config/utils/reasoning'; import { globalEventBus } from '@/infrastructure/event-bus'; import type { AIModelConfig } from '@/infrastructure/config/types'; import { Tooltip } from '@/component-library'; @@ -223,7 +224,7 @@ export const ModelSelector: React.FC = ({ providerName: getProviderDisplayName(model), provider: model.provider, contextWindow: model.context_window, - enableThinking: model.enable_thinking_process, + enableThinking: isReasoningVisiblyEnabled(getEffectiveReasoningMode(model)), reasoningEffort: model.reasoning_effort, }; } @@ -238,7 +239,7 @@ export const ModelSelector: React.FC = ({ providerName: getProviderDisplayName(model), provider: model.provider, contextWindow: model.context_window, - enableThinking: model.enable_thinking_process, + enableThinking: isReasoningVisiblyEnabled(getEffectiveReasoningMode(model)), reasoningEffort: model.reasoning_effort, }; }, [getCurrentModelId, allModels, defaultModels, t]); @@ -258,7 +259,7 @@ export const ModelSelector: React.FC = ({ providerName: getProviderDisplayName(m), provider: m.provider, contextWindow: m.context_window, - enableThinking: m.enable_thinking_process, + enableThinking: isReasoningVisiblyEnabled(getEffectiveReasoningMode(m)), reasoningEffort: m.reasoning_effort, })); }, [allModels]); diff --git a/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx b/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx index 0aeef7556..d7b0a3f23 100644 --- a/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx +++ b/src/web-ui/src/infrastructure/config/components/AIModelConfig.tsx @@ -6,10 +6,12 @@ import { AIModelConfig as AIModelConfigType, ProxyConfig, ModelCategory, - ModelCapability + ModelCapability, + ReasoningMode } from '../types'; import { configManager } from '../services/ConfigManager'; import { PROVIDER_TEMPLATES, getModelDisplayName, getProviderDisplayName, getProviderTemplateId } from '../services/modelConfigs'; +import { DEFAULT_REASONING_MODE, getEffectiveReasoningMode, supportsAnthropicAdaptive, supportsAnthropicReasoning, supportsAnthropicThinkingBudget, supportsResponsesReasoning } from '../utils/reasoning'; import { aiApi, systemAPI } from '@/infrastructure/api'; import { useNotification } from '@/shared/notification-system'; import { ConfigPageHeader, ConfigPageLayout, ConfigPageContent, ConfigPageSection, ConfigPageRow, ConfigCollectionItem } from './common'; @@ -32,7 +34,9 @@ interface SelectedModelDraft { category: ModelCategory; contextWindow: number; maxTokens: number; - enableThinking: boolean; + reasoningMode: ReasoningMode; + reasoningEffort?: string; + thinkingBudgetTokens?: number; } interface ProviderGroup { @@ -42,7 +46,7 @@ interface ProviderGroup { } function isResponsesProvider(provider?: string): boolean { - return provider === 'response' || provider === 'responses'; + return supportsResponsesReasoning(provider); } function createModelDraft( @@ -59,7 +63,39 @@ function createModelDraft( category: overrides?.category ?? baseConfig?.category ?? 'general_chat', contextWindow: overrides?.contextWindow ?? baseConfig?.context_window ?? 128000, maxTokens: overrides?.maxTokens ?? baseConfig?.max_tokens ?? 8192, - enableThinking: overrides?.enableThinking ?? baseConfig?.enable_thinking_process ?? false, + reasoningMode: overrides?.reasoningMode ?? getEffectiveReasoningMode(baseConfig), + reasoningEffort: overrides?.reasoningEffort ?? baseConfig?.reasoning_effort, + thinkingBudgetTokens: overrides?.thinkingBudgetTokens ?? baseConfig?.thinking_budget_tokens, + }; +} + +function normalizeDraftReasoningForProvider( + draft: SelectedModelDraft, + provider?: string +): SelectedModelDraft { + let reasoningMode = draft.reasoningMode; + + if (supportsResponsesReasoning(provider)) { + reasoningMode = DEFAULT_REASONING_MODE; + } else if (!supportsAnthropicReasoning(provider) && reasoningMode === 'adaptive') { + reasoningMode = 'enabled'; + } else if (supportsAnthropicReasoning(provider) + && reasoningMode === 'adaptive' + && !supportsAnthropicAdaptive(draft.modelName)) { + reasoningMode = 'enabled'; + } + + const keepReasoningEffort = supportsResponsesReasoning(provider) + || (supportsAnthropicReasoning(provider) && reasoningMode === 'adaptive'); + const keepThinkingBudget = supportsAnthropicReasoning(provider) + && reasoningMode === 'enabled' + && supportsAnthropicThinkingBudget(draft.modelName); + + return { + ...draft, + reasoningMode, + reasoningEffort: keepReasoningEffort ? draft.reasoningEffort : undefined, + thinkingBudgetTokens: keepThinkingBudget ? draft.thinkingBudgetTokens : undefined, }; } @@ -257,8 +293,10 @@ const AIModelConfig: React.FC = () => { [requestFormatOptions] ); - const reasoningEffortOptions = useMemo( + const responsesReasoningEffortOptions = useMemo( () => [ + { label: 'None', value: 'none' }, + { label: 'Minimal', value: 'minimal' }, { label: 'Low', value: 'low' }, { label: 'Medium', value: 'medium' }, { label: 'High', value: 'high' }, @@ -267,14 +305,33 @@ const AIModelConfig: React.FC = () => { [] ); - const thinkingModeOptions = useMemo( + const anthropicReasoningEffortOptions = useMemo( () => [ - { label: t('thinking.optionEnabled'), value: 'enabled' }, - { label: t('thinking.optionDisabled'), value: 'disabled' }, + { label: 'Low', value: 'low' }, + { label: 'Medium', value: 'medium' }, + { label: 'High', value: 'high' }, + { label: 'Max', value: 'max' }, ], - [t] + [] ); + const buildReasoningModeOptions = useCallback((provider?: string, modelName?: string, currentMode?: ReasoningMode): SelectOption[] => { + const options: SelectOption[] = [ + { label: t('thinking.optionDefault'), value: DEFAULT_REASONING_MODE }, + { label: t('thinking.optionEnabled'), value: 'enabled' }, + { label: t('thinking.optionDisabled'), value: 'disabled' }, + ]; + + if ( + supportsAnthropicReasoning(provider) + && (supportsAnthropicAdaptive(modelName) || currentMode === 'adaptive') + ) { + options.push({ label: t('thinking.optionAdaptive'), value: 'adaptive' }); + } + + return options; + }, [t]); + const categoryOptions = useMemo( () => [ { label: t('category.general_chat'), value: 'general_chat' }, @@ -362,7 +419,9 @@ const AIModelConfig: React.FC = () => { configId: config.id, contextWindow: config.context_window || 128000, maxTokens: config.max_tokens || 8192, - enableThinking: config.enable_thinking_process ?? false, + reasoningMode: getEffectiveReasoningMode(config), + reasoningEffort: config.reasoning_effort, + thinkingBudgetTokens: config.thinking_budget_tokens, })) ); @@ -545,7 +604,6 @@ const AIModelConfig: React.FC = () => { base_url: resolvedBaseUrl, request_url: config.request_url || resolveRequestUrl(resolvedBaseUrl, resolvedProvider, resolvedModelName), model_name: resolvedModelName, - description: config.description, context_window: config.context_window || 128000, max_tokens: config.max_tokens || 8192, temperature: config.temperature, @@ -555,9 +613,10 @@ const AIModelConfig: React.FC = () => { capabilities: config.capabilities || ['text_chat'], recommended_for: config.recommended_for || [], metadata: config.metadata || {}, - enable_thinking_process: config.enable_thinking_process ?? false, + reasoning_mode: config.reasoning_mode ?? getEffectiveReasoningMode(config), inline_think_in_text: config.inline_think_in_text ?? false, reasoning_effort: config.reasoning_effort, + thinking_budget_tokens: config.thinking_budget_tokens, custom_headers: config.custom_headers, custom_headers_mode: config.custom_headers_mode, skip_ssl_verify: config.skip_ssl_verify ?? false, @@ -687,7 +746,7 @@ const AIModelConfig: React.FC = () => { : (defaultModel ? [createModelDraft(defaultModel, { context_window: 128000, max_tokens: 8192, - enable_thinking_process: false, + reasoning_mode: DEFAULT_REASONING_MODE, })] : []) ); setShowAdvancedSettings(false); @@ -741,16 +800,13 @@ const AIModelConfig: React.FC = () => { model_name: '', provider: config.provider, enabled: true, - description: config.description, context_window: config.context_window || 128000, max_tokens: config.max_tokens || 8192, category: config.category || 'general_chat', capabilities: config.capabilities || getCapabilitiesByCategory(config.category || 'general_chat'), recommended_for: config.recommended_for || [], metadata: config.metadata || {}, - enable_thinking_process: config.enable_thinking_process ?? false, inline_think_in_text: config.inline_think_in_text ?? false, - reasoning_effort: config.reasoning_effort, custom_headers: config.custom_headers, custom_headers_mode: config.custom_headers_mode, skip_ssl_verify: config.skip_ssl_verify ?? false, @@ -776,7 +832,9 @@ const AIModelConfig: React.FC = () => { createModelDraft(config.model_name, config, { contextWindow: config.context_window || 128000, maxTokens: config.max_tokens || 8192, - enableThinking: config.enable_thinking_process ?? false, + reasoningMode: getEffectiveReasoningMode(config), + reasoningEffort: config.reasoning_effort, + thinkingBudgetTokens: config.thinking_budget_tokens, }) ]); @@ -831,16 +889,16 @@ const AIModelConfig: React.FC = () => { model_name: draft.modelName, provider: editingConfig.provider || 'openai', enabled: editingConfig.enabled ?? true, - description: editingConfig.description, context_window: draft.contextWindow, max_tokens: draft.maxTokens, category: draft.category, capabilities: getCapabilitiesByCategory(draft.category), recommended_for: editingConfig.recommended_for || [], metadata: editingConfig.metadata, - enable_thinking_process: draft.enableThinking, + reasoning_mode: draft.reasoningMode, inline_think_in_text: editingConfig.inline_think_in_text ?? false, - reasoning_effort: editingConfig.reasoning_effort, + reasoning_effort: draft.reasoningEffort, + thinking_budget_tokens: draft.thinkingBudgetTokens, custom_headers: editingConfig.custom_headers, custom_headers_mode: editingConfig.custom_headers_mode, skip_ssl_verify: editingConfig.skip_ssl_verify ?? false, @@ -1261,6 +1319,43 @@ const AIModelConfig: React.FC = () => { ); + const formatReasoningSummary = (draft: SelectedModelDraft) => { + const parts: string[] = []; + + switch (draft.reasoningMode) { + case 'enabled': + parts.push(t('thinking.summaryEnabled')); + break; + case 'disabled': + parts.push(t('thinking.summaryDisabled')); + break; + case 'adaptive': + parts.push(t('thinking.summaryAdaptive')); + break; + default: + parts.push(t('thinking.summaryDefault')); + break; + } + + if (draft.reasoningEffort) { + parts.push(draft.reasoningEffort); + } + + return parts.join(' · '); + }; + + const getDraftReasoningEffortOptions = (provider?: string) => { + if (supportsResponsesReasoning(provider)) { + return responsesReasoningEffortOptions; + } + + if (supportsAnthropicReasoning(provider)) { + return anthropicReasoningEffortOptions; + } + + return []; + }; + const renderSelectedModelRows = () => { if (selectedModelDrafts.length === 0) { return ( @@ -1277,6 +1372,19 @@ const AIModelConfig: React.FC = () => { const categoryLabel = categoryCompactLabels[draft.category] ?? draft.category; const canToggleExpand = selectedModelDrafts.length > 1; const modelDisplayName = draft.modelName; + const reasoningModeOptions = buildReasoningModeOptions(editingConfig.provider, draft.modelName, draft.reasoningMode); + const reasoningEffortOptions = getDraftReasoningEffortOptions(editingConfig.provider); + const showReasoningModeControl = !supportsResponsesReasoning(editingConfig.provider); + const showReasoningEffortControl = reasoningEffortOptions.length > 0 + && ( + supportsResponsesReasoning(editingConfig.provider) + || (supportsAnthropicReasoning(editingConfig.provider) && draft.reasoningMode === 'adaptive') + ); + const showThinkingBudgetControl = supportsAnthropicReasoning(editingConfig.provider) + && draft.reasoningMode === 'enabled' + && supportsAnthropicThinkingBudget(draft.modelName); + const displayedThinkingBudget = draft.thinkingBudgetTokens + ?? Math.min(Math.floor(draft.maxTokens * 0.75), 10000); return (
@@ -1332,7 +1440,7 @@ const AIModelConfig: React.FC = () => { {' · '} {formatTokenCountShort(draft.maxTokens)} out {' · '} - {draft.enableThinking ? t('thinking.summaryOn') : t('thinking.summaryOff')} + {formatReasoningSummary(draft)}
)} @@ -1386,15 +1494,43 @@ const AIModelConfig: React.FC = () => { disableWheel /> -
- {t('thinking.enable')} - updateModelDraft(draft.modelName, { reasoningMode: value as ReasoningMode })} + options={reasoningModeOptions} + size="small" + /> +
+ )} + {showReasoningEffortControl && ( +
+ {t('reasoningEffort.label')} + { + const provider = value as string; resetRemoteModelDiscovery(); + setSelectedModelDrafts(prevDrafts => + prevDrafts.map(draft => normalizeDraftReasoningForProvider(draft, provider)) + ); setEditingConfig(prev => ({ ...prev, - provider: value as string, - request_url: resolveRequestUrl(prev?.base_url || '', value as string, prev?.model_name || '') + provider, + request_url: resolveRequestUrl(prev?.base_url || '', provider, prev?.model_name || '') })); }} placeholder={t('form.providerPlaceholder')} @@ -1542,11 +1682,6 @@ const AIModelConfig: React.FC = () => { {renderSelectedModelRows()}
- {isResponsesProvider(editingConfig.provider) && ( - - { const provider = value as string; resetRemoteModelDiscovery(); + setSelectedModelDrafts(prevDrafts => + prevDrafts.map(draft => normalizeDraftReasoningForProvider(draft, provider)) + ); setEditingConfig(prev => ({ ...prev, provider, request_url: resolveRequestUrl(prev?.base_url || '', provider, prev?.model_name || ''), inline_think_in_text: provider === 'openai' ? (prev?.inline_think_in_text ?? false) : false, - reasoning_effort: isResponsesProvider(provider) ? (prev?.reasoning_effort || 'medium') : undefined, })); }} placeholder={t('form.providerPlaceholder')} options={requestFormatOptions} size="small" /> @@ -1665,14 +1802,6 @@ const AIModelConfig: React.FC = () => { {renderSelectedModelRows()} - {isResponsesProvider(editingConfig.provider) && ( - -