diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index e321d495..1fc90a6c 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -2015,6 +2015,105 @@ fn ensure_qa_volcengine_credentials() -> Result<(), String> { /// 润色文本;失败时返回原文 + 失败原因,调用方据此弹错误胶囊 + 写历史 error_code。 /// 之前固定返回 String,调用方拿不到失败信号 → 用户感知"为什么风格设置没生效"。issue #57。 +/// 流式润色的三态结果。让上层(dictation pipeline)能区分「已经流出去了」、 +/// 「降级到一次性」和「真失败了走 raw 兜底」三种 case。 +pub enum StreamingPolishOutcome { + /// 流式润色成功,`String` 是已经一边流一边交给 `on_delta` 的全部文本(用于写 + /// history、做词条命中统计)。调用方不应再 `inserter.insert(&text)`,因为字符 + /// 已经通过键盘事件落到光标处。 + Streamed(String), + /// 当前配置不支持流式:用户没开 streaming_insert / Gemini provider / Codex + /// provider / Raw 模式 / 翻译模式 / 不是 macOS。调用方应回到现有的 + /// `polish_or_passthrough` 一次性路径,跟历史行为完全一致。 + UnsupportedFallback, + /// 流式过程中失败(HTTP / 解析 / 空流等)。`String` 是失败原因,调用方应当 + /// 走 raw 兜底(同 `polish_or_passthrough` 失败分支的语义)。 + Failed(String), +} + +/// 流式润色入口。在不支持流式的所有 case 都返回 `UnsupportedFallback`,让调用方 +/// 透明降级。不修改任何持久化 / 焦点 / 光标状态。 +/// +/// `on_delta` 每收到一个 SSE chunk 就被调用一次(同步),调用方负责把 chunk 实际 +/// 模拟键盘事件落到光标 —— 见 `coordinator/dictation.rs` 的流式分支。 +/// `should_cancel` 用户取消时返回 true,立即 break SSE 读循环避免烧 quota。 +pub async fn polish_or_passthrough_streaming( + raw: &RawTranscript, + mode: PolishMode, + hotwords: &[String], + working_languages: &[String], + chinese_script_preference: ChineseScriptPreference, + output_language_preference: OutputLanguagePreference, + llm_thinking_enabled: bool, + front_app: Option<&str>, + prior_turns: &[(String, String)], + on_delta: F, + should_cancel: C, +) -> StreamingPolishOutcome +where + F: Fn(&str) + Send + Sync, + C: Fn() -> bool + Send + Sync, +{ + if mode == PolishMode::Raw { + log::info!("[coord] streaming polish skipped: mode=Raw, fall back to one-shot"); + return StreamingPolishOutcome::UnsupportedFallback; + } + let active_llm = CredentialsVault::get_active_llm(); + if active_llm == "gemini" { + log::info!( + "[coord] streaming polish skipped: active LLM provider=gemini (v1 not implemented), fall back to one-shot" + ); + return StreamingPolishOutcome::UnsupportedFallback; + } + let provider = match build_active_llm_provider(llm_thinking_enabled) { + Ok(p) => p, + Err(e) => { + log::error!("[coord] streaming polish: build provider failed: {e}"); + return StreamingPolishOutcome::Failed(e.to_string()); + } + }; + if !provider.supports_streaming_polish() { + log::info!( + "[coord] streaming polish skipped: provider does not support streaming (likely codex OAuth), fall back to one-shot" + ); + return StreamingPolishOutcome::UnsupportedFallback; + } + log::info!( + "[coord] streaming polish START: provider=openai-compatible mode={:?} raw_chars={} prior_turns={}", + mode, + raw.text.chars().count(), + prior_turns.len() + ); + match provider + .polish_streaming( + &raw.text, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + front_app, + prior_turns, + on_delta, + should_cancel, + ) + .await + { + Ok(text) => { + log::info!( + "[coord] streaming polish OK: final_chars={}", + text.chars().count() + ); + StreamingPolishOutcome::Streamed(text) + } + Err(e) => { + let reason = e.to_string(); + log::error!("[coord] streaming polish FAILED: {reason}"); + StreamingPolishOutcome::Failed(reason) + } + } +} + async fn polish_or_passthrough( raw: &RawTranscript, mode: PolishMode, diff --git a/openless-all/app/src-tauri/src/coordinator/dictation.rs b/openless-all/app/src-tauri/src/coordinator/dictation.rs index 5ce913ae..58f6f44f 100644 --- a/openless-all/app/src-tauri/src/coordinator/dictation.rs +++ b/openless-all/app/src-tauri/src/coordinator/dictation.rs @@ -13,6 +13,264 @@ use super::*; /// 避免微动开关回弹 / 用户手抖双击造成的空转写报错和 ASR session 抢资源。 const HOTKEY_DEBOUNCE: std::time::Duration = std::time::Duration::from_millis(250); +/// 跑流式润色路径(opt-in,跨平台)。 +/// +/// 平台差异: +/// - **macOS**:`switch_to_ascii` 切到 ABC 输入源(规避 CJK / 日文 IME 拦截 Unicode 事件), +/// session 结束 `restore_input_source` 切回。`type_unicode_chunk` 走 CGEvent FFI。 +/// - **Windows**:`switch_to_ascii` 是 no-op(SendInput Unicode 绕过 TSF); +/// `type_unicode_chunk` 走 `SendInput(KEYEVENTF_UNICODE)`。 +/// - **Linux(实验)**:`switch_to_ascii` 是 no-op;`type_unicode_chunk` 走 enigo +/// `Keyboard::text`。X11 / XTest 稳定,Wayland 看 compositor 给不给 libei 权限。 +/// +/// 通用流程: +/// 1. `switch_to_ascii`(macOS)/ no-op(其他);失败则降级回一次性 `polish_or_passthrough`。 +/// 2. 起一个 `spawn_blocking` 后台任务,从 mpsc 收 SSE delta,逐 delta 调 +/// `type_unicode_chunk` 模拟键盘事件落到光标处。串行有序,无竞态。 +/// 3. 调 `polish_or_passthrough_streaming`,`on_delta` 把 chunk 塞进 mpsc。 +/// 4. 流结束 / 失败 / 取消 → drop mpsc 发送端 → typer 任务 drain 完剩余 delta 退出 → +/// `restore_input_source` 恢复用户原输入源(macOS 才有意义,其他平台 no-op)。 +/// 5. 返回 `(polished, polish_error, already_streamed)`: +/// - 成功:`(text, None, true)` — 字符已经在屏幕上,调用方应当跳过 `inserter.insert` +/// - 失败:`(raw_text, Some(reason), false)` — 流式过程出错,调用方走 raw 一次性兜底 +/// - 不支持:`run_streaming_polish` 内部直接调 `polish_or_passthrough` 透明降级 +/// +/// **不在流式路径里做**:`apply_chinese_script_preference` / `apply_correction_rules` +/// 这两步在 v1 跳过 —— 字符已经一边流一边落出去了,不好回退。需要的话只能关 toggle 走 +/// 一次性路径。 +#[allow(clippy::too_many_arguments)] +async fn run_streaming_polish( + inner: &Arc, + raw: &RawTranscript, + mode: PolishMode, + hotwords: &[String], + working_languages: &[String], + chinese_script_preference: crate::types::ChineseScriptPreference, + output_language_preference: crate::types::OutputLanguagePreference, + llm_thinking_enabled: bool, + front_app: Option<&str>, + prior_turns: &[(String, String)], +) -> (String, Option, bool) { + log::info!( + "[coord] streaming_insert path ENTER (raw_chars={})", + raw.text.chars().count() + ); + + let app = inner.app.lock().clone(); + let Some(app) = app else { + log::warn!("[coord] streaming_insert: no AppHandle in Inner; fall back to one-shot"); + let (p, e) = polish_or_passthrough( + raw, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + llm_thinking_enabled, + front_app, + prior_turns, + ) + .await; + return (p, e, false); + }; + + // 1. 切到 ABC 输入源。失败则降级 —— 流式路径上 CJK IME 拦截不是可恢复错误。 + log::info!("[coord] streaming_insert: switching input source to ABC"); + let prev_ime = match crate::unicode_keystroke::switch_to_ascii(&app).await { + Ok(prev) => { + log::info!( + "[coord] streaming_insert: switched to ABC (had_previous={})", + prev.is_some() + ); + prev + } + Err(e) => { + log::warn!( + "[coord] streaming_insert: switch_to_ascii failed: {e}; fall back to one-shot" + ); + let (p, err) = polish_or_passthrough( + raw, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + llm_thinking_enabled, + front_app, + prior_turns, + ) + .await; + return (p, err, false); + } + }; + + // 2. 起 typer 后台任务:从 mpsc 收 delta,串行调 type_unicode_chunk。 + // 同时累积 typed_text:屏幕上真正落字的内容,用于(a)SSE 中途失败时让 history + // 与用户实际看到的内容一致;(b)pr-agent #412 反馈 \"saved output diverges + // from what the user actually sees\"。 + let (tx, rx) = tokio::sync::mpsc::unbounded_channel::(); + let typer_handle = tokio::task::spawn_blocking(move || { + let mut rx = rx; + let mut typed_text = String::new(); + let mut first_failure: Option = None; + while let Some(delta) = rx.blocking_recv() { + if first_failure.is_some() { + // 一旦类型链路出错(如 Secure Input 启用),后续 delta 全部丢弃,但仍 + // 把 mpsc drain 完,避免发送端阻塞。 + continue; + } + match crate::unicode_keystroke::type_unicode_chunk(&delta) { + Ok(()) => { + typed_text.push_str(&delta); + } + Err(e) => { + log::error!( + "[coord] streaming_insert: type_unicode_chunk failed at typed={} chars: {e}; \ + dropping remaining deltas", + typed_text.chars().count() + ); + first_failure = Some(e.to_string()); + } + } + } + (typed_text, first_failure) + }); + + // 3. 调流式润色,on_delta 塞 mpsc;should_cancel 检查 dictation 取消旗。 + let inner_for_cancel = Arc::clone(inner); + let should_cancel = move || inner_for_cancel.state.lock().cancelled; + let outcome = super::polish_or_passthrough_streaming( + raw, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + llm_thinking_enabled, + front_app, + prior_turns, + move |delta: &str| { + let _ = tx.send(delta.to_string()); + }, + should_cancel, + ) + .await; + // tx 已经被 move 进 on_delta 闭包;闭包随 polish_or_passthrough_streaming 返回 + // 而 drop,typer 那侧 blocking_recv 拿到 None 自然退出。 + + // 4. 等 typer 把缓冲 drain 完,拿到实际落字的全文 + 第一条失败原因。 + let (typed_text, typer_failure) = typer_handle.await.unwrap_or_else(|e| { + log::error!("[coord] streaming_insert: typer task join failed: {e}"); + (String::new(), Some(format!("typer join: {e}"))) + }); + let typed_chars = typed_text.chars().count(); + log::info!("[coord] streaming_insert: typer drained, typed {typed_chars} chars"); + + // 5. 无论流是否成功,都恢复用户原输入源。 + log::info!("[coord] streaming_insert: restoring input source"); + if let Err(e) = crate::unicode_keystroke::restore_input_source(&app, prev_ime).await { + log::warn!("[coord] streaming_insert: restore_input_source failed: {e}"); + } else { + log::info!("[coord] streaming_insert: input source restored"); + } + + // 6. 把 outcome 翻译成 (polished, polish_error, already_streamed)。 + match outcome { + super::StreamingPolishOutcome::Streamed(text) => { + log::info!( + "[coord] streaming_insert SUCCESS: polished_chars={} typed_chars={} typer_err={:?}", + text.chars().count(), + typed_chars, + typer_failure + ); + // 边界 case:polish 成功但 typer 在第一字就失败(最常见:session 开始时 + // 已处于 Secure Input;或 SendInput / enigo 拒绝)。屏幕上一字未见, + // already_streamed=true 会让上层跳过 inserter,最终用户看不到任何内容。 + // 这里显式回退到一次性兜底,让正常 inserter 路径写出 polish 结果。 + // pr-agent #412 反馈 \"Missing fallback\"。 + if typed_chars == 0 { + if let Some(reason) = typer_failure { + log::warn!( + "[coord] streaming_insert: zero chars typed despite polish success ({reason}); falling back to one-shot inserter" + ); + return (text, Some(reason), false); + } + } + // 先确定 final_text —— typer 中途失败时屏幕只有 typed_text 这一段, + // history 记完整 polish 反而会让用户复盘困惑。让 history / clipboard / + // 后续逻辑统统用 final_text,三处保持一致。 + // pr-agent #412 反馈 \"Clipboard Mismatch\":之前先写 text 到剪贴板再 + // 决定 typer 是否中途失败,导致 Cmd+V 粘出用户屏幕上没见过的内容。 + let (final_text, polish_err) = match typer_failure { + Some(e) => ( + typed_text, + Some(format!("typing partially failed: {e}")), + ), + None => (text, None), + }; + // 把 final_text 写回剪贴板(默认 on,可关)。一次性路径天然走剪贴板, + // 开关默认对齐一次性行为,让 Cmd+V 重复粘贴可用。 + if inner.prefs.get().streaming_insert_save_clipboard { + match arboard::Clipboard::new() { + Ok(mut cb) => match cb.set_text(final_text.clone()) { + Ok(()) => log::info!( + "[coord] streaming_insert: final text written to clipboard ({} chars)", + final_text.chars().count() + ), + Err(e) => log::warn!( + "[coord] streaming_insert: clipboard set_text failed: {e}" + ), + }, + Err(e) => log::warn!( + "[coord] streaming_insert: clipboard handle init failed: {e}" + ), + } + } else { + log::info!( + "[coord] streaming_insert: clipboard save skipped (pref off)" + ); + } + (final_text, polish_err, true) + } + super::StreamingPolishOutcome::UnsupportedFallback => { + log::info!("[coord] streaming_insert: dispatch reported unsupported, fall back to one-shot"); + let (p, e) = polish_or_passthrough( + raw, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + llm_thinking_enabled, + front_app, + prior_turns, + ) + .await; + (p, e, false) + } + super::StreamingPolishOutcome::Failed(reason) => { + log::warn!( + "[coord] streaming_insert FAILED: {reason}; typed {typed_chars} chars before failure" + ); + // 流式失败但已经流了一部分 chars:用户屏幕上有半截 polish。history 应当 + // 跟屏幕一致 —— 记 typed_text 而不是 raw.text,否则保存内容跟用户看见的 + // 内容会分叉(pr-agent #412 \"Wrong final text\" 反馈)。 + // 一字都没流时 typed_text 是空串,回到 raw 一次性兜底。 + if typed_chars > 0 { + ( + typed_text, + Some(format!( + "streaming polish failed mid-stream after {typed_chars} chars: {reason}" + )), + true, + ) + } else { + (raw.text.clone(), Some(reason), false) + } + } + } +} + pub(super) async fn handle_pressed_edge(inner: &Arc) { let was_held = inner.hotkey_trigger_held.swap(true, Ordering::SeqCst); if !was_held { @@ -990,14 +1248,22 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { } else { Vec::new() }; - let (polished, polish_error) = if translation_active { + // 流式插入 opt-in 路径:开关打开 + 非翻译 + 非 Raw 模式 → 进入流式分支。 + // 任何不满足都走原一次性 polish_or_passthrough 路径,行为跟历史完全一致。 + let streaming_eligible = + prefs.streaming_insert && !translation_active && mode != PolishMode::Raw; + log::info!( + "[coord] polish dispatch: translation={translation_active} mode={mode:?} streaming_eligible={streaming_eligible}" + ); + + let (polished, polish_error, already_streamed) = if translation_active { log::info!( "[coord] translation mode → target=\u{300C}{}\u{300D} working={:?} front_app={:?}", translation_target, working_languages, front_app ); - translate_or_passthrough( + let (p, e) = translate_or_passthrough( &raw, &translation_target, &working_languages, @@ -1006,9 +1272,24 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { llm_thinking_enabled, front_app.as_deref(), ) + .await; + (p, e, false) + } else if streaming_eligible { + run_streaming_polish( + inner, + &raw, + mode, + &hotword_strs, + &working_languages, + chinese_script_preference, + output_language_preference, + llm_thinking_enabled, + front_app.as_deref(), + &prior_turns, + ) .await } else { - polish_or_passthrough( + let (p, e) = polish_or_passthrough( &raw, mode, &hotword_strs, @@ -1019,7 +1300,8 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { front_app.as_deref(), &prior_turns, ) - .await + .await; + (p, e, false) }; // 仅在“ASR 直出文本”场景做强制简繁收敛,避免误伤成功的翻译/常规 LLM 输出: @@ -1053,9 +1335,13 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { // 在同一 lock 内决定「丢弃」还是「进入 Inserting」。一旦设到 Inserting, // cancel_session 就拒绝介入(Cmd+V 已发出,撤销不掉)。这是 audit HIGH #2 的修复, // 之前 check 与 inserter.insert 之间有窗口期。 + // + // 流式路径例外:`already_streamed = true` 表示字符已经一边流一边落到光标了, + // 撤销不掉。即使 cancel 旗在中途被立起来,也只能尊重「已经发生」的事实,进入 + // Inserting 状态完成 history / vocab 等收尾工作。 let proceed_to_insert = { let mut state = inner.state.lock(); - if state.cancelled { + if state.cancelled && !already_streamed { state.phase = SessionPhase::Idle; false } else { @@ -1078,7 +1364,15 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { let restore_clipboard = prefs.restore_clipboard_after_paste; let allow_non_tsf_insertion_fallback = prefs.allow_non_tsf_insertion_fallback; let paste_shortcut = prefs.paste_shortcut; - let status = if focus_ready_for_paste { + // 流式路径下,字符已经通过 Unicode keystroke 落到光标处,跳过 inserter.insert。 + let status = if already_streamed { + log::info!( + "[coord] insertion skipped: {} chars already streamed via unicode_keystroke (polish_error={:?})", + polished.chars().count(), + polish_error + ); + InsertStatus::Inserted + } else if focus_ready_for_paste { #[cfg(target_os = "windows")] { let ime_target = capture_ime_submit_target(); diff --git a/openless-all/app/src-tauri/src/lib.rs b/openless-all/app/src-tauri/src/lib.rs index 6b82383f..7ace76be 100644 --- a/openless-all/app/src-tauri/src/lib.rs +++ b/openless-all/app/src-tauri/src/lib.rs @@ -29,6 +29,7 @@ mod recorder; mod selection; mod shortcut_binding; mod types; +mod unicode_keystroke; mod windows_ime_ipc; mod windows_ime_profile; mod windows_ime_protocol; diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs index 0943c46e..e42bdb53 100644 --- a/openless-all/app/src-tauri/src/polish.rs +++ b/openless-all/app/src-tauri/src/polish.rs @@ -88,6 +88,53 @@ pub enum ActiveLLMProvider { } impl ActiveLLMProvider { + /// v1 流式润色只在 OpenAI-compatible 走通;Codex 走 Responses API,shape 与 + /// chat completions SSE 不同,留给 v2。Gemini 在 coordinator.rs 路径上自己分流, + /// 不进 ActiveLLMProvider 枚举。 + pub fn supports_streaming_polish(&self) -> bool { + matches!(self, Self::OpenAI(_)) + } + + pub async fn polish_streaming( + &self, + raw_text: &str, + mode: PolishMode, + hotwords: &[String], + working_languages: &[String], + chinese_script_preference: ChineseScriptPreference, + output_language_preference: OutputLanguagePreference, + front_app: Option<&str>, + prior_turns: &[(String, String)], + on_delta: F, + should_cancel: C, + ) -> Result + where + F: Fn(&str) + Send + Sync, + C: Fn() -> bool + Send + Sync, + { + match self { + Self::OpenAI(provider) => { + provider + .polish_streaming( + raw_text, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + front_app, + prior_turns, + on_delta, + should_cancel, + ) + .await + } + Self::Codex(_) => Err(LLMError::Network( + "streaming polish not implemented for codex provider (v1)".into(), + )), + } + } + pub async fn polish( &self, raw_text: &str, @@ -262,6 +309,49 @@ impl OpenAICompatibleLLMProvider { } } + /// 润色路径的**流式**变体。Prompts 与 `polish()` 完全同源(共用 `compose_polish_prompts` + /// + `build_polish_history_messages`),只是 body 开 `stream: true`,SSE 一帧一帧 + /// 喂给 `on_delta`。最终返回拼好的完整字符串供调用方写 history / 记词条命中。 + /// `should_cancel` 让上层在用户取消时立即 break SSE 读循环,避免烧 LLM quota。 + pub async fn polish_streaming( + &self, + raw_text: &str, + mode: PolishMode, + hotwords: &[String], + working_languages: &[String], + chinese_script_preference: ChineseScriptPreference, + output_language_preference: OutputLanguagePreference, + front_app: Option<&str>, + prior_turns: &[(String, String)], + on_delta: F, + should_cancel: C, + ) -> Result + where + F: Fn(&str) + Send + Sync, + C: Fn() -> bool + Send + Sync, + { + let (system_prompt, user_prompt) = compose_polish_prompts( + raw_text, + mode, + hotwords, + working_languages, + chinese_script_preference, + output_language_preference, + front_app, + !prior_turns.is_empty(), + ); + let messages = build_polish_history_messages(&system_prompt, prior_turns, &user_prompt); + log::info!( + "[llm] polish_streaming provider={} model={} prior_turns={} raw_chars={}", + self.config.provider_id, + self.config.model, + prior_turns.len(), + raw_text.chars().count() + ); + self.chat_completion_messages_streaming(messages, on_delta, should_cancel) + .await + } + /// 多轮划词追问,**流式**返回。`messages` 包含历史对话(user/assistant 交替), /// 最后一条必须是新一轮的 user 提问。第一条 user 消息里如果有选区,调用方应在 /// content 里就把选区原文注入。`on_delta` 在每个 SSE chunk 到达时被调;最终返回 @@ -561,6 +651,133 @@ impl OpenAICompatibleLLMProvider { } Ok(full_text) } + + /// 把已经构造好的 `messages` 列表(包含 system + 历史 + 当前 user)作为 + /// `stream: true` 的 body 发出去,SSE 一帧一帧解析。供 `polish_streaming` 复用, + /// 跟 `chat_completion_history_streaming` 的 SSE 解析逻辑同款 —— 后者多了一步从 + /// `QaChatMessage[]` 装配 messages 的工作。 + async fn chat_completion_messages_streaming( + &self, + messages: Vec, + on_delta: F, + should_cancel: C, + ) -> Result + where + F: Fn(&str) + Send + Sync, + C: Fn() -> bool + Send + Sync, + { + let url = chat_completions_url(&self.config.base_url); + let body = self.chat_body(true, messages); + + let mut request = self + .client + .post(&url) + .header("Content-Type", "application/json") + .header("Accept", "text/event-stream"); + if !self.config.api_key.trim().is_empty() { + request = request.header("Authorization", format!("Bearer {}", self.config.api_key)); + } + for (k, v) in &self.config.extra_headers { + request = request.header(k.as_str(), v.as_str()); + } + let request = request.json(&body); + + let response = match request.send().await { + Ok(r) => r, + Err(e) => { + if e.is_timeout() { + return Err(LLMError::Timeout); + } + return Err(LLMError::Network(e.to_string())); + } + }; + + let status = response.status(); + if !status.is_success() { + let body_text = response + .text() + .await + .map_err(|e| LLMError::Network(e.to_string()))?; + let preview_end = BODY_PREVIEW_LIMIT.min(body_text.len()); + let preview = safe_str_slice(&body_text, preview_end); + log::error!("[llm] streaming HTTP {} body={}", status.as_u16(), preview); + return Err(LLMError::InvalidResponse { + status: status.as_u16(), + body: preview.to_string(), + }); + } + + let mut response = response; + let mut buffer = String::new(); + let mut full_text = String::new(); + let mut delta_count: u64 = 0; + loop { + if should_cancel() { + log::info!( + "[llm] polish stream cancelled by caller after {} deltas ({} chars); breaking SSE loop", + delta_count, + full_text.chars().count() + ); + break; + } + let chunk_opt = response + .chunk() + .await + .map_err(|e| LLMError::Network(e.to_string()))?; + let Some(chunk) = chunk_opt else { break }; + let s = std::str::from_utf8(&chunk) + .map_err(|e| LLMError::Network(format!("non-utf8 SSE chunk: {e}")))?; + buffer.push_str(s); + + while let Some(idx) = buffer.find("\n\n") { + let event = buffer[..idx].to_string(); + buffer.drain(..idx + 2); + for line in event.lines() { + let Some(payload) = line + .strip_prefix("data: ") + .or_else(|| line.strip_prefix("data:")) + else { + continue; + }; + let payload = payload.trim(); + if payload.is_empty() || payload == "[DONE]" { + continue; + } + let v: Value = match serde_json::from_str(payload) { + Ok(v) => v, + Err(e) => { + log::warn!( + "[llm] polish SSE parse skip: {e}; payload preview: {}", + safe_str_slice(payload, 80) + ); + continue; + } + }; + if let Some(delta) = v["choices"][0]["delta"]["content"].as_str() { + if !delta.is_empty() { + full_text.push_str(delta); + delta_count += 1; + on_delta(delta); + } + } + } + } + } + + log::info!( + "[llm] polish stream done; total deltas={} chars={}", + delta_count, + full_text.chars().count() + ); + + if full_text.is_empty() { + return Err(LLMError::InvalidResponse { + status: 200, + body: "empty polish stream".to_string(), + }); + } + Ok(full_text) + } } #[derive(Clone, Debug)] diff --git a/openless-all/app/src-tauri/src/types.rs b/openless-all/app/src-tauri/src/types.rs index 3aac1cb5..c0bc5e37 100644 --- a/openless-all/app/src-tauri/src/types.rs +++ b/openless-all/app/src-tauri/src/types.rs @@ -268,6 +268,28 @@ pub struct UserPreferences { /// 用户改用托盘菜单访问主窗口。默认 false 跟历史行为一致。 #[serde(default)] pub start_minimized: bool, + /// 流式输入:润色 SSE 一边到达一边逐字模拟键盘事件输出到当前焦点。开启后用户感知到 + /// 的处理时延显著降低(润色 LLM 第一个 token 即开始落字)。 + /// + /// 平台原语: + /// - macOS:CGEvent Unicode FFI;CJK / 日文 IME 会拦截,session 期间临时切到 ABC + /// - Windows:SendInput Unicode(绕过 TSF);不需要切输入法 + /// - Linux(实验性):enigo `Keyboard::text`;X11 稳定,Wayland 看 compositor + /// + /// 限制: + /// - 不再走剪贴板路径,对 secure input 框(密码框 / 1Password)静默拒绝 + /// - 仅 OpenAI-compatible provider 实装(v1);Gemini / Codex provider 走原一次性 + /// 插入路径 + /// + /// 默认 false 与历史行为一致。 + #[serde(default)] + pub streaming_insert: bool, + /// 流式输入成功后是否把最终润色文本写回剪贴板。一次性路径天然走剪贴板,所以 + /// Cmd+V 可以重复粘贴;流式路径直接合成键盘事件、不动剪贴板,会让用户失去这层 + /// 兜底。开启后流式成功收尾时把 final text 写到系统剪贴板,跟一次性行为对齐。 + /// 默认 true(更接近用户习惯)。 + #[serde(default = "default_true")] + pub streaming_insert_save_clipboard: bool, } fn default_local_asr_model() -> String { @@ -363,6 +385,10 @@ struct UserPreferencesWire { polish_context_window_minutes: u32, #[serde(default)] start_minimized: bool, + #[serde(default)] + streaming_insert: bool, + #[serde(default = "default_true")] + streaming_insert_save_clipboard: bool, } impl Default for UserPreferencesWire { @@ -404,6 +430,8 @@ impl Default for UserPreferencesWire { history_retention_days: prefs.history_retention_days, polish_context_window_minutes: prefs.polish_context_window_minutes, start_minimized: prefs.start_minimized, + streaming_insert: prefs.streaming_insert, + streaming_insert_save_clipboard: prefs.streaming_insert_save_clipboard, } } } @@ -462,6 +490,8 @@ impl<'de> Deserialize<'de> for UserPreferences { history_retention_days: wire.history_retention_days, polish_context_window_minutes: wire.polish_context_window_minutes, start_minimized: wire.start_minimized, + streaming_insert: wire.streaming_insert, + streaming_insert_save_clipboard: wire.streaming_insert_save_clipboard, }) } } @@ -573,6 +603,8 @@ impl Default for UserPreferences { history_retention_days: default_history_retention_days(), polish_context_window_minutes: default_polish_context_window_minutes(), start_minimized: false, + streaming_insert: false, + streaming_insert_save_clipboard: true, } } } diff --git a/openless-all/app/src-tauri/src/unicode_keystroke.rs b/openless-all/app/src-tauri/src/unicode_keystroke.rs new file mode 100644 index 00000000..e9be34ba --- /dev/null +++ b/openless-all/app/src-tauri/src/unicode_keystroke.rs @@ -0,0 +1,413 @@ +//! 跨平台 Unicode keystroke 合成(流式输入用)。 +//! +//! 公开 API 三件套: +//! - `type_unicode_chunk(text)` —— 阻塞地把一段文字逐 codepoint 当作键盘事件发出去, +//! 不动剪贴板。各平台用各自的原语。 +//! - `switch_to_ascii(app)` —— 仅 macOS 有效;切到 ABC 输入源以绕过 CJK / 日文 IME +//! 对 Unicode 字符串事件的拦截。Windows / Linux 上是 no-op。 +//! - `restore_input_source(app, prev)` —— 配对调用,恢复 macOS 上的原输入源。 +//! +//! ## 平台差异 +//! +//! - **macOS**:手写 CGEvent FFI(与 `insertion.rs::macos` 的 Cmd+V 同源)。 +//! `CGEventKeyboardSetUnicodeString` 在 CJK / 日文 IME 激活时被拦截 —— +//! 必须 `switch_to_ascii` 切到 ABC,session 结束再 `restore_input_source` 切回。 +//! - **Windows**:`SendInput(KEYEVENTF_UNICODE)` 直接发 UTF-16 scancode。TSF 不拦 +//! Unicode 事件(与 keyboard layout / IME 解耦),所以不需要切输入法。 +//! - **Linux**:enigo `Keyboard::text(...)`。X11 走 XTest 稳定;Wayland 看 compositor +//! 是否给 libei 权限,stock GNOME-Wayland 经常拒绝,调用方应当容忍失败回落到一次性。 +//! 不切输入法 —— Linux 的 fcitx / ibus 与 enigo 的交互非常碎,v1 不尝试。 +//! +//! ## 已知坑(macOS) +//! +//! - Secure Event Input(密码框、1Password 等)下 CGEventPost 静默失败; +//! `type_unicode_chunk` 开头先用 `IsSecureEventInputEnabled` 探测,命中即返 +//! `TypeError::SecureInputActive`。 +//! - Modifier 状态继承 —— 用户按着 Shift 不清零会被映射成大写,每个事件显式 +//! `CGEventSetFlags(_, 0)`。 +//! - Chromium / Electron / Tauri 自身在 keyDown/keyUp 之间无延迟时会丢字,每 codepoint +//! sleep 1ms。 +//! +//! ## 线程安全(macOS) +//! +//! - `type_unicode_chunk`(CGEventPost)任意线程可调,对齐 `insertion.rs::macos:: +//! simulate_paste` 现状。 +//! - TIS(`switch_to_ascii` / `restore_input_source`)调度到主线程,规避 macOS 14+ +//! 对 TSM/TIS 主线程的 `dispatch_assert_queue_fail` SIGTRAP(与 +//! `feedback_rdev_macos_trap.md` 同款风险类别)。 + +#[allow(unused_imports)] +use tauri::{AppHandle, Runtime}; + +#[derive(Debug, thiserror::Error)] +pub enum TypeError { + #[cfg(target_os = "macos")] + #[error("CGEventSourceCreate returned null")] + SourceAllocFailed, + #[cfg(target_os = "macos")] + #[error("CGEventCreateKeyboardEvent returned null")] + EventAllocFailed, + #[cfg(target_os = "macos")] + #[error("Secure Event Input is enabled — synthetic keystrokes will be silently dropped")] + SecureInputActive, + #[cfg(target_os = "windows")] + #[error("Windows SendInput failed: {0}")] + SendInputFailed(String), + #[cfg(target_os = "linux")] + #[error("enigo init failed: {0}")] + EnigoInit(String), + #[cfg(target_os = "linux")] + #[error("enigo text input failed: {0}")] + EnigoText(String), +} + +#[derive(Debug, thiserror::Error)] +pub enum TisError { + #[error("dispatch to main thread failed: {0}")] + MainThreadDispatch(String), + #[error("TISCopyInputSourceForLanguage(\"en\") returned null — ABC source not installed?")] + AbcSourceNotFound, + #[error("TISSelectInputSource failed: OSStatus={0}")] + SelectFailed(i32), +} + +// ═══════════════════════════════════════════════════════════════════════════ +// macOS 实现 +// ═══════════════════════════════════════════════════════════════════════════ +#[cfg(target_os = "macos")] +mod macos_impl { + use super::{TisError, TypeError}; + use std::ffi::c_void; + use std::time::Duration; + use tauri::{AppHandle, Runtime}; + + const INTER_KEYSTROKE_DELAY: Duration = Duration::from_millis(1); + + /// 之前激活的 input source 引用 token。携带 raw ptr 的 usize 表示,所有解引用都 + /// 通过 `restore_input_source` 调度到主线程执行;手动 `Send + Sync`。 + pub struct PreviousInputSource { + raw: usize, + } + unsafe impl Send for PreviousInputSource {} + unsafe impl Sync for PreviousInputSource {} + + pub fn type_unicode_chunk(text: &str) -> Result<(), TypeError> { + if text.is_empty() { + return Ok(()); + } + if is_secure_input_enabled() { + return Err(TypeError::SecureInputActive); + } + for ch in text.chars() { + send_one_codepoint(ch)?; + std::thread::sleep(INTER_KEYSTROKE_DELAY); + } + Ok(()) + } + + fn send_one_codepoint(ch: char) -> Result<(), TypeError> { + let mut buf = [0u16; 2]; + let utf16 = ch.encode_utf16(&mut buf); + let len = utf16.len(); + unsafe { + let src = CGEventSourceCreate(KCG_EVENT_SOURCE_STATE_HID_SYSTEM_STATE); + if src.is_null() { + return Err(TypeError::SourceAllocFailed); + } + let down = CGEventCreateKeyboardEvent(src, 0, true); + let up = CGEventCreateKeyboardEvent(src, 0, false); + if down.is_null() || up.is_null() { + if !down.is_null() { + CFRelease(down as _); + } + if !up.is_null() { + CFRelease(up as _); + } + CFRelease(src as _); + return Err(TypeError::EventAllocFailed); + } + CGEventSetFlags(down, 0); + CGEventSetFlags(up, 0); + CGEventKeyboardSetUnicodeString(down, len, utf16.as_ptr()); + CGEventKeyboardSetUnicodeString(up, len, utf16.as_ptr()); + CGEventPost(KCG_HID_EVENT_TAP, down); + CGEventPost(KCG_HID_EVENT_TAP, up); + CFRelease(down as _); + CFRelease(up as _); + CFRelease(src as _); + } + Ok(()) + } + + fn is_secure_input_enabled() -> bool { + unsafe { IsSecureEventInputEnabled() != 0 } + } + + pub async fn switch_to_ascii( + app: &AppHandle, + ) -> Result, TisError> { + let (tx, rx) = tokio::sync::oneshot::channel(); + app.run_on_main_thread(move || { + let result = unsafe { switch_to_ascii_on_main() }; + let _ = tx.send(result); + }) + .map_err(|e| TisError::MainThreadDispatch(e.to_string()))?; + rx.await + .map_err(|e| TisError::MainThreadDispatch(e.to_string()))? + } + + unsafe fn switch_to_ascii_on_main() -> Result, TisError> { + let prev = TISCopyCurrentKeyboardInputSource(); + let prev_token = if prev.is_null() { + None + } else { + Some(PreviousInputSource { raw: prev as usize }) + }; + let lang_bytes = b"en\0"; + let lang = CFStringCreateWithCString( + std::ptr::null(), + lang_bytes.as_ptr() as *const i8, + K_CF_STRING_ENCODING_ASCII, + ); + if lang.is_null() { + if let Some(p) = prev_token { + CFRelease(p.raw as *const _); + } + return Err(TisError::AbcSourceNotFound); + } + let abc = TISCopyInputSourceForLanguage(lang); + CFRelease(lang as _); + if abc.is_null() { + if let Some(p) = prev_token { + CFRelease(p.raw as *const _); + } + return Err(TisError::AbcSourceNotFound); + } + let status = TISSelectInputSource(abc); + CFRelease(abc as _); + if status != 0 { + if let Some(p) = prev_token { + CFRelease(p.raw as *const _); + } + return Err(TisError::SelectFailed(status)); + } + Ok(prev_token) + } + + pub async fn restore_input_source( + app: &AppHandle, + prev: Option, + ) -> Result<(), TisError> { + let Some(prev) = prev else { + return Ok(()); + }; + let (tx, rx) = tokio::sync::oneshot::channel(); + app.run_on_main_thread(move || { + let result = unsafe { restore_input_source_on_main(prev) }; + let _ = tx.send(result); + }) + .map_err(|e| TisError::MainThreadDispatch(e.to_string()))?; + rx.await + .map_err(|e| TisError::MainThreadDispatch(e.to_string()))? + } + + unsafe fn restore_input_source_on_main(prev: PreviousInputSource) -> Result<(), TisError> { + let raw = prev.raw as *mut c_void; + let status = TISSelectInputSource(raw); + CFRelease(raw as _); + if status != 0 { + return Err(TisError::SelectFailed(status)); + } + Ok(()) + } + + // ─── FFI ─── + type CGEventTapLocation = u32; + type CGEventSourceStateID = i32; + type CGKeyCode = u16; + type CGEventFlags = u64; + type CFStringEncoding = u32; + type CFAllocatorRef = *const c_void; + type CFStringRef = *const c_void; + type TISInputSourceRef = *mut c_void; + + const KCG_HID_EVENT_TAP: CGEventTapLocation = 0; + const KCG_EVENT_SOURCE_STATE_HID_SYSTEM_STATE: CGEventSourceStateID = 1; + const K_CF_STRING_ENCODING_ASCII: CFStringEncoding = 0x0600; + + #[repr(C)] + struct OpaqueCGEvent(c_void); + type CGEventRef = *mut OpaqueCGEvent; + #[repr(C)] + struct OpaqueCGEventSource(c_void); + type CGEventSourceRef = *mut OpaqueCGEventSource; + + #[link(name = "CoreGraphics", kind = "framework")] + extern "C" { + fn CGEventSourceCreate(state_id: CGEventSourceStateID) -> CGEventSourceRef; + fn CGEventCreateKeyboardEvent( + source: CGEventSourceRef, + virtual_key: CGKeyCode, + key_down: bool, + ) -> CGEventRef; + fn CGEventSetFlags(event: CGEventRef, flags: CGEventFlags); + fn CGEventKeyboardSetUnicodeString( + event: CGEventRef, + string_length: usize, + unicode_string: *const u16, + ); + fn CGEventPost(tap: CGEventTapLocation, event: CGEventRef); + } + + #[link(name = "CoreFoundation", kind = "framework")] + extern "C" { + fn CFRelease(cf: *const c_void); + fn CFStringCreateWithCString( + alloc: CFAllocatorRef, + c_str: *const i8, + encoding: CFStringEncoding, + ) -> CFStringRef; + } + + #[link(name = "Carbon", kind = "framework")] + extern "C" { + fn IsSecureEventInputEnabled() -> i32; + fn TISCopyCurrentKeyboardInputSource() -> TISInputSourceRef; + fn TISCopyInputSourceForLanguage(lang: CFStringRef) -> TISInputSourceRef; + fn TISSelectInputSource(source: TISInputSourceRef) -> i32; + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Windows 实现 +// ═══════════════════════════════════════════════════════════════════════════ +#[cfg(target_os = "windows")] +mod windows_impl { + use super::{TisError, TypeError}; + use std::time::Duration; + use tauri::{AppHandle, Runtime}; + use windows::Win32::UI::Input::KeyboardAndMouse::{ + SendInput, INPUT, INPUT_0, INPUT_KEYBOARD, KEYBDINPUT, KEYBD_EVENT_FLAGS, KEYEVENTF_KEYUP, + KEYEVENTF_UNICODE, VIRTUAL_KEY, + }; + + /// Windows / Linux 上没有 input source 概念,token 留空。Send/Sync 自动派生。 + pub struct PreviousInputSource; + + /// 同一个会话内 keyDown/keyUp 之间的微延迟。Windows SendInput Unicode 在大多数 + /// 应用上不需要延迟,但 Chromium 系(Edge / VSCode)观察到偶尔丢字,保留 1ms + /// 兜底跟 macOS 对齐。 + const INTER_KEYSTROKE_DELAY: Duration = Duration::from_millis(1); + + pub fn type_unicode_chunk(text: &str) -> Result<(), TypeError> { + if text.is_empty() { + return Ok(()); + } + for unit in text.encode_utf16() { + send_utf16_unit(unit, false)?; + send_utf16_unit(unit, true)?; + std::thread::sleep(INTER_KEYSTROKE_DELAY); + } + Ok(()) + } + + fn send_utf16_unit(unit: u16, key_up: bool) -> Result<(), TypeError> { + let flags = if key_up { + KEYEVENTF_UNICODE | KEYEVENTF_KEYUP + } else { + KEYEVENTF_UNICODE + }; + let input = INPUT { + r#type: INPUT_KEYBOARD, + Anonymous: INPUT_0 { + ki: KEYBDINPUT { + wVk: VIRTUAL_KEY(0), + wScan: unit, + dwFlags: KEYBD_EVENT_FLAGS(flags.0), + time: 0, + dwExtraInfo: 0, + }, + }, + }; + let sent = unsafe { SendInput(&[input], std::mem::size_of::() as i32) }; + if sent == 1 { + Ok(()) + } else { + Err(TypeError::SendInputFailed( + std::io::Error::last_os_error().to_string(), + )) + } + } + + /// Windows SendInput Unicode 绕过 TSF 与 IME,无需切换输入法。返回 `Ok(None)`, + /// `restore_input_source` 也是 no-op。 + pub async fn switch_to_ascii( + _app: &AppHandle, + ) -> Result, TisError> { + Ok(None) + } + + pub async fn restore_input_source( + _app: &AppHandle, + _prev: Option, + ) -> Result<(), TisError> { + Ok(()) + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Linux 实现(实验性) +// ═══════════════════════════════════════════════════════════════════════════ +#[cfg(target_os = "linux")] +mod linux_impl { + use super::{TisError, TypeError}; + use enigo::{Enigo, Keyboard, Settings}; + use tauri::{AppHandle, Runtime}; + + pub struct PreviousInputSource; + + /// 用 enigo 一次 `text()` 把整段 chunk 发出去。X11 上走 XTest 稳定;Wayland 上 + /// 看 compositor 是否给 libei 权限,stock GNOME-Wayland 通常拒绝 —— 此处 + /// 失败返回 `EnigoInit` / `EnigoText`,调用方应回落到一次性。 + /// + /// 不处理 fcitx / ibus 输入法切换 —— Linux 输入法栈与 X11 合成事件的交互非常 + /// 碎片化,v1 实验阶段直接交给用户保证当前输入源是英文键盘。 + pub fn type_unicode_chunk(text: &str) -> Result<(), TypeError> { + if text.is_empty() { + return Ok(()); + } + let mut enigo = + Enigo::new(&Settings::default()).map_err(|e| TypeError::EnigoInit(e.to_string()))?; + enigo + .text(text) + .map_err(|e| TypeError::EnigoText(e.to_string())) + } + + pub async fn switch_to_ascii( + _app: &AppHandle, + ) -> Result, TisError> { + Ok(None) + } + + pub async fn restore_input_source( + _app: &AppHandle, + _prev: Option, + ) -> Result<(), TisError> { + Ok(()) + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// 公共导出(按 cfg 分发到对应实现) +// ═══════════════════════════════════════════════════════════════════════════ +#[cfg(target_os = "macos")] +#[allow(unused_imports)] +pub use macos_impl::{restore_input_source, switch_to_ascii, type_unicode_chunk, PreviousInputSource}; + +#[cfg(target_os = "windows")] +#[allow(unused_imports)] +pub use windows_impl::{restore_input_source, switch_to_ascii, type_unicode_chunk, PreviousInputSource}; + +#[cfg(target_os = "linux")] +#[allow(unused_imports)] +pub use linux_impl::{restore_input_source, switch_to_ascii, type_unicode_chunk, PreviousInputSource}; diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts index 69e7c542..8cdb2016 100644 --- a/openless-all/app/src/i18n/en.ts +++ b/openless-all/app/src/i18n/en.ts @@ -484,6 +484,19 @@ export const en: typeof zhCN = { }, }, advanced: { + streamingInsertTitle: 'Streaming insertion', + streamingInsertTitleLinux: 'Streaming insertion (experimental)', + streamingInsertDesc: + 'When on, polished output streams to the cursor character by character as the LLM SSE arrives — perceived latency drops noticeably. Requires an OpenAI-compatible LLM provider (Ark / DeepSeek / etc.), not Raw / translation mode, and focus that is not a Secure Input field (password / 1Password). When any condition fails the path transparently falls back to one-shot paste — identical to having the toggle off.', + streamingInsertLabel: 'Enable streaming insertion', + streamingInsertHintMac: + 'During streaming the input source is temporarily switched to ABC (so CJK / Japanese IMEs cannot intercept the Unicode keystrokes) and restored when the session ends.', + streamingInsertHintWindows: + 'Uses SendInput Unicode to type characters directly, bypassing TSF / IME — no input-method switching needed.', + streamingInsertHintLinux: + 'Experimental: uses enigo + XTest to synthesize keystrokes. Stable on X11; on Wayland it depends on whether the compositor grants libei access — failures fall back automatically to one-shot insertion.', + streamingInsertSaveClipboardLabel: 'Also copy to clipboard', + streamingInsertSaveClipboardHint: 'After a successful streaming insert, write the final text to the system clipboard so Cmd+V can paste it again. Off = streaming never touches the clipboard.', localAsrTitle: 'Local ASR models (experimental)', localAsrDesc: 'Move transcription from cloud ASR to on-device inference. Offline / privacy-sensitive use only.', localAsrWarningShort: 'Local inference is slower; under-spec hardware may drop words.', diff --git a/openless-all/app/src/i18n/ja.ts b/openless-all/app/src/i18n/ja.ts index 1d905381..ff51143d 100644 --- a/openless-all/app/src/i18n/ja.ts +++ b/openless-all/app/src/i18n/ja.ts @@ -486,6 +486,19 @@ export const ja: typeof zhCN = { }, }, advanced: { + streamingInsertTitle: 'ストリーミング入力', + streamingInsertTitleLinux: 'ストリーミング入力(実験的)', + streamingInsertDesc: + 'ON にすると、整形済みテキストが SSE の到着に合わせて 1 文字ずつカーソル位置に挿入され、体感遅延が大幅に短くなります。条件:OpenAI 互換 LLM プロバイダ(Ark / DeepSeek 等)+ Raw / 翻訳モード以外 + フォーカスがパスワード欄など Secure Input 制御でないこと。いずれかの条件を満たさない場合は自動で従来の一括貼り付けに戻り、OFF と同じ挙動になります。', + streamingInsertLabel: 'ストリーミング入力を有効化', + streamingInsertHintMac: + 'ストリーミング中は一時的に ABC 入力ソースへ切り替わります(中国語 / 日本語 IME による Unicode 文字傍受を回避)。セッション終了時に自動で元に戻ります。', + streamingInsertHintWindows: + 'SendInput Unicode で文字イベントを直接送信し、TSF / IME を迂回します。入力ソースの切り替えは不要です。', + streamingInsertHintLinux: + '実験的:enigo + XTest によるキーボードイベント合成。X11 では安定動作、Wayland では compositor が libei を許可するかどうかに依存し、失敗時は自動的に一括挿入にフォールバックします。', + streamingInsertSaveClipboardLabel: 'クリップボードにも保存', + streamingInsertSaveClipboardHint: 'ストリーミング入力成功後、最終テキストをシステムクリップボードに書き込んで Cmd+V で再貼り付けできるようにします。OFF にすると、ストリーミング処理はクリップボードに一切触れません。', localAsrTitle: 'ローカル ASR モデル(実験的)', localAsrDesc: '転写をクラウドから本機推論に切り替えます。オフライン/プライバシー重視向け。', localAsrWarningShort: 'ローカル推論は遅く、スペック不足では欠字の可能性があります。', diff --git a/openless-all/app/src/i18n/ko.ts b/openless-all/app/src/i18n/ko.ts index eacdcfae..35dfb31c 100644 --- a/openless-all/app/src/i18n/ko.ts +++ b/openless-all/app/src/i18n/ko.ts @@ -486,6 +486,19 @@ export const ko: typeof zhCN = { }, }, advanced: { + streamingInsertTitle: '스트리밍 입력', + streamingInsertTitleLinux: '스트리밍 입력 (실험적)', + streamingInsertDesc: + '활성화하면 정제된 텍스트가 SSE 도착에 맞춰 글자 단위로 커서에 입력되어 체감 지연이 크게 줄어듭니다. 조건: OpenAI 호환 LLM 제공자 (Ark / DeepSeek 등) + Raw / 번역 모드가 아님 + 포커스가 비밀번호 입력란 등 Secure Input 컨트롤이 아님. 조건 중 하나라도 충족되지 않으면 기존 일괄 붙여넣기 경로로 자동 폴백되어, OFF 와 동일한 동작을 합니다.', + streamingInsertLabel: '스트리밍 입력 사용', + streamingInsertHintMac: + '스트리밍 중에는 입력 소스가 일시적으로 ABC 로 전환됩니다 (중국어 / 일본어 IME 의 Unicode 문자 가로채기 방지). 세션 종료 시 자동으로 원래 입력 소스로 복귀합니다.', + streamingInsertHintWindows: + 'SendInput Unicode 로 문자 이벤트를 직접 전송하여 TSF / IME 를 우회합니다. 입력 소스 전환이 필요하지 않습니다.', + streamingInsertHintLinux: + '실험적: enigo + XTest 로 키보드 이벤트를 합성합니다. X11 에서는 안정적이며, Wayland 에서는 compositor 가 libei 를 허용하는지에 따라 다르고, 실패 시 자동으로 일괄 삽입으로 폴백됩니다.', + streamingInsertSaveClipboardLabel: '클립보드에도 저장', + streamingInsertSaveClipboardHint: '스트리밍 입력 성공 후 최종 텍스트를 시스템 클립보드에 기록하여 Cmd+V 로 재붙여넣기를 할 수 있도록 합니다. 끄면 스트리밍 동안 클립보드를 일절 건드리지 않습니다.', localAsrTitle: '로컬 ASR 모델 (실험적)', localAsrDesc: '전사를 클라우드에서 로컬 추론으로 전환합니다. 오프라인 / 프라이버시용에만 권장됩니다.', localAsrWarningShort: '로컬 추론은 느리며, 사양 부족 시 글자 누락이 발생할 수 있습니다.', diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts index 2d7d32c1..987e0a31 100644 --- a/openless-all/app/src/i18n/zh-CN.ts +++ b/openless-all/app/src/i18n/zh-CN.ts @@ -482,6 +482,19 @@ export const zhCN = { }, }, advanced: { + streamingInsertTitle: '流式输入', + streamingInsertTitleLinux: '流式输入(实验)', + streamingInsertDesc: + '开启后润色 SSE 边到达边逐字落到光标,感知延迟显著降低。需要满足:OpenAI 兼容 LLM 提供商(Ark / DeepSeek 等)+ 非 Raw / 翻译模式 + 焦点不是密码框等 Secure Input 控件。任何条件不满足都自动回落到原一次性粘贴路径,行为与关闭等价。', + streamingInsertLabel: '启用流式输入', + streamingInsertHintMac: + '流式期间临时切换到 ABC 输入源(规避中文 / 日文 IME 拦截 Unicode 字符),session 结束自动切回。', + streamingInsertHintWindows: + 'SendInput Unicode 直接发字符事件,绕过 TSF / IME,不切输入法。', + streamingInsertHintLinux: + '实验性:enigo + XTest 模拟键盘事件,X11 稳定;Wayland 看 compositor 是否允许 libei,失败自动回落到一次性插入。', + streamingInsertSaveClipboardLabel: '同步写入剪贴板', + streamingInsertSaveClipboardHint: '流式输入成功后把这次的最终文本写到系统剪贴板,方便 Cmd+V 再次粘贴。关闭后流式过程完全不动剪贴板。', localAsrTitle: '本地 ASR 模型(实验性)', localAsrDesc: '把转写从云端切到本机推理。仅推荐离线 / 隐私敏感场景。', localAsrWarningShort: '本地推理较慢,配置不足时可能吞字。', diff --git a/openless-all/app/src/i18n/zh-TW.ts b/openless-all/app/src/i18n/zh-TW.ts index 25762095..35e82844 100644 --- a/openless-all/app/src/i18n/zh-TW.ts +++ b/openless-all/app/src/i18n/zh-TW.ts @@ -484,6 +484,19 @@ export const zhTW: typeof zhCN = { }, }, advanced: { + streamingInsertTitle: '流式輸入', + streamingInsertTitleLinux: '流式輸入(實驗)', + streamingInsertDesc: + '開啟後潤色 SSE 邊到達邊逐字落到游標,感知延遲顯著降低。需要滿足:OpenAI 相容 LLM 提供商(Ark / DeepSeek 等)+ 非 Raw / 翻譯模式 + 焦點不是密碼框等 Secure Input 控件。任何條件不滿足都自動回落到原一次性貼上路徑,行為與關閉等價。', + streamingInsertLabel: '啟用流式輸入', + streamingInsertHintMac: + '流式期間臨時切換到 ABC 輸入源(規避中文 / 日文 IME 攔截 Unicode 字元),session 結束自動切回。', + streamingInsertHintWindows: + 'SendInput Unicode 直接發字元事件,繞過 TSF / IME,不切輸入法。', + streamingInsertHintLinux: + '實驗性:enigo + XTest 模擬鍵盤事件,X11 穩定;Wayland 看 compositor 是否允許 libei,失敗自動回落到一次性插入。', + streamingInsertSaveClipboardLabel: '同步寫入剪貼簿', + streamingInsertSaveClipboardHint: '流式輸入成功後把這次的最終文字寫到系統剪貼簿,方便 Cmd+V 再次貼上。關閉後流式過程完全不動剪貼簿。', localAsrTitle: '本地 ASR 模型(實驗性)', localAsrDesc: '把轉寫從雲端切到本機推理。僅推薦離線 / 隱私敏感場景。', localAsrWarningShort: '本地推理較慢,配置不足時可能吞字。', diff --git a/openless-all/app/src/lib/ipc.ts b/openless-all/app/src/lib/ipc.ts index e974e393..b8e14f7a 100644 --- a/openless-all/app/src/lib/ipc.ts +++ b/openless-all/app/src/lib/ipc.ts @@ -82,6 +82,8 @@ const mockSettings: UserPreferences = { polishContextWindowMinutes: 5, startMinimized: false, updateChannel: 'stable', + streamingInsert: false, + streamingInsertSaveClipboard: true, }; const mockHotkeyCapability: HotkeyCapability = { diff --git a/openless-all/app/src/lib/stylePrefs.test.ts b/openless-all/app/src/lib/stylePrefs.test.ts index 3767bfaf..74a044cb 100644 --- a/openless-all/app/src/lib/stylePrefs.test.ts +++ b/openless-all/app/src/lib/stylePrefs.test.ts @@ -51,6 +51,8 @@ const previousPrefs: UserPreferences = { polishContextWindowMinutes: 5, startMinimized: false, updateChannel: 'stable', + streamingInsert: false, + streamingInsertSaveClipboard: true, }; const nextPrefs: UserPreferences = { diff --git a/openless-all/app/src/lib/types.ts b/openless-all/app/src/lib/types.ts index b4e6e164..b45242fa 100644 --- a/openless-all/app/src/lib/types.ts +++ b/openless-all/app/src/lib/types.ts @@ -202,6 +202,13 @@ export interface UserPreferences { /** 自动更新渠道。'stable'(默认)= plugin-updater 仅检查正式版; * 'beta' = Settings → About 出现手动下载 Beta 的入口。 */ updateChannel: UpdateChannel; + /** 流式输入:润色 SSE 一边到达一边逐字模拟键盘事件输出到当前焦点。开启后用户感知到 + * 的处理时延显著降低。v1 限定 macOS + OpenAI-compatible provider,其他配置自动回落 + * 到原一次性插入。默认 false 与历史行为一致。 */ + streamingInsert: boolean; + /** 流式输入成功后是否把最终润色文本写回剪贴板。开启后 Cmd+V 还能重复粘贴该次输出, + * 与一次性路径行为对齐。默认 true。 */ + streamingInsertSaveClipboard: boolean; } export interface MicrophoneDevice { diff --git a/openless-all/app/src/pages/Settings.tsx b/openless-all/app/src/pages/Settings.tsx index 90abd24a..d990a040 100644 --- a/openless-all/app/src/pages/Settings.tsx +++ b/openless-all/app/src/pages/Settings.tsx @@ -1630,6 +1630,7 @@ function AdvancedSection() { const os = detectOS(); const isMac = os === 'mac'; const isWin = os === 'win'; + const isLinux = os === 'linux'; const platformSupported = isMac || isWin; const switchSeqRef = useRef(0); const [busy, setBusy] = useState(false); @@ -1722,6 +1723,52 @@ function AdvancedSection() { )} + {/* ─── 流式输入(全平台 opt-in) ─────────────────────────────────── + 润色 SSE 一边到达一边逐字模拟键盘事件落到光标。开启后用户感知到的处理 + 时延显著降低,但有几个限制(不满足时自动回落原一次性插入路径): + - macOS:CGEvent Unicode + 临时切到 ABC 输入源(CJK / 日文 IME 拦截兜底) + - Windows:SendInput Unicode,绕过 TSF / IME,不需要切输入法 + - Linux(实验):enigo XTest;Wayland compositor 拒绝 libei 时失败回落 + - 仅 OpenAI-compatible provider 实装;Gemini / Codex 透明降级 + - 密码框 / 1Password / SSH prompt 等 Secure Input 框拒绝合成按键 → 失败回落 + 每个平台用各自的 hint key,互相不显示对方平台的细节。 */} + +
+ {t(isLinux + ? 'settings.advanced.streamingInsertTitleLinux' + : 'settings.advanced.streamingInsertTitle')} +
+
+ {t('settings.advanced.streamingInsertDesc')} +
+ + { + if (prefs) void updatePrefs({ ...prefs, streamingInsert: next }); + }} + /> + + + { + if (prefs) void updatePrefs({ ...prefs, streamingInsertSaveClipboard: next }); + }} + /> + +
+ {/* 标题 + 右上角 inline 警告小字(替换原琥珀大警告条)。 */}