From 14813322dd82cdfa1718e673b8a9d63283a9f916 Mon Sep 17 00:00:00 2001 From: H-Chris233 Date: Sat, 9 May 2026 13:19:22 +0800 Subject: [PATCH] Reduce coordinator refactor risk with focused modules coordinator.rs had accumulated dictation lifecycle, QA panel state, and shared recording resources in one file. This split keeps behavior unchanged while moving the dictation edge/session helpers, QA panel state helpers, and resource ownership helpers into child modules under coordinator/. Constraint: Issue #296 asks for module boundaries after Rust state-machine coverage is in place Rejected: Split every helper into separate public modules | larger churn with no behavior benefit Confidence: high Scope-risk: moderate Directive: Keep cross-module wiring in coordinator; leaf modules should remain coordinator-private Tested: cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml Tested: cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml --lib Tested: cargo test --manifest-path openless-all/app/src-tauri/backend-tests/Cargo.toml Co-authored-by: OmX --- openless-all/app/src-tauri/src/coordinator.rs | 1364 +---------------- .../src-tauri/src/coordinator/dictation.rs | 1050 +++++++++++++ .../app/src-tauri/src/coordinator/qa.rs | 124 ++ .../src-tauri/src/coordinator/resources.rs | 191 +++ 4 files changed, 1394 insertions(+), 1335 deletions(-) create mode 100644 openless-all/app/src-tauri/src/coordinator/dictation.rs create mode 100644 openless-all/app/src-tauri/src/coordinator/qa.rs create mode 100644 openless-all/app/src-tauri/src/coordinator/resources.rs diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index 16944c47..2905d733 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -25,10 +25,9 @@ use crate::asr::{ use crate::combo_hotkey::{ComboHotkeyError, ComboHotkeyEvent, ComboHotkeyMonitor}; use crate::coordinator_state::{ begin_cancel_session_state, begin_recording_abort_before_restore, begin_session_state, - finish_cancel_session_state, finish_starting_session_state, initial_session_id, new_session_id, - publish_abort_idle_after_restore, request_stop_during_starting_state, - start_processing_if_listening, startup_race_status, BeginOutcome, SessionId, SessionPhase, - SessionState, StartupRaceStatus, + finish_cancel_session_state, finish_starting_session_state, new_session_id, + publish_abort_idle_after_restore, start_processing_if_listening, startup_race_status, + BeginOutcome, SessionId, SessionPhase, SessionState, StartupRaceStatus, }; use crate::hotkey::{HotkeyEvent, HotkeyMonitor}; use crate::insertion::TextInserter; @@ -39,17 +38,34 @@ use crate::persistence::{ use crate::polish::{OpenAICompatibleConfig, OpenAICompatibleLLMProvider}; use crate::qa_hotkey::{QaHotkeyError, QaHotkeyEvent, QaHotkeyMonitor}; use crate::recorder::{Recorder, RecorderError}; -use crate::selection::{capture_selection, SelectionContext}; +use crate::selection::capture_selection; use crate::types::{ CapsulePayload, CapsuleState, ChineseScriptPreference, DictationSession, HotkeyCapability, - HotkeyMode, HotkeyStatus, HotkeyStatusState, InsertStatus, OutputLanguagePreference, - PolishMode, + HotkeyStatus, HotkeyStatusState, InsertStatus, OutputLanguagePreference, PolishMode, }; #[cfg(target_os = "windows")] use crate::windows_ime_ipc::ImeSubmitTarget; #[cfg(target_os = "windows")] use crate::windows_ime_session::{PreparedWindowsImeSession, WindowsImeSessionController}; +mod dictation; +mod qa; +mod resources; + +#[cfg(test)] +use dictation::dictation_error_code; +use dictation::{ + begin_session, cancel_session, end_session, handle_pressed, handle_pressed_edge, + handle_released, handle_released_edge, request_stop_during_starting, +}; +use qa::{close_qa_panel, handle_qa_hotkey_pressed, QaPhase, QaSessionState}; +#[cfg(test)] +use resources::discard_startup_resources_for_session; +use resources::{ + acquire_recording_mute, release_recording_mute, selected_microphone_device_name, + stop_microphone_preview_monitor, stop_qa_recorder, SessionResource, SharedRecordingMuteState, +}; + enum ActiveAsr { Volcengine(Arc), Whisper(Arc), @@ -68,38 +84,6 @@ fn asr_transcribe_uses_global_timeout(asr: &ActiveAsr) -> bool { } } -struct SessionResource { - session_id: SessionId, - resource: T, -} - -impl SessionResource { - fn new(session_id: SessionId, resource: T) -> Self { - Self { - session_id, - resource, - } - } - - fn into_inner(self) -> T { - self.resource - } -} - -struct SharedRecordingMuteState { - guard: Option, - holders: u32, -} - -impl SharedRecordingMuteState { - fn new() -> Self { - Self { - guard: None, - holders: 0, - } - } -} - pub struct Coordinator { inner: Arc, } @@ -156,51 +140,12 @@ struct Inner { qa_stream_cancelled: Arc, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum QaPhase { - Idle, - Recording, - Processing, -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ActionHotkeyKind { SwitchStyle, OpenApp, } -struct QaSessionState { - phase: QaPhase, - cancelled: bool, - selection: Option, - front_app: Option, - /// 用于忽略迟到的 RMS / runtime error。 - session_id: SessionId, - /// QA 浮窗是否被用户钉住(pinned)。pinned=true 时不自动隐藏。 - pinned: bool, - /// 浮窗是否对用户可见。Cmd+Shift+; 边沿 toggle 此 flag; - /// 主听写 hotkey(rightOption)边沿来时,看这个 flag 决定是走 QA 还是走 dictation。 - /// 详见 issue #118 v2。 - panel_visible: bool, - /// 多轮对话累积。每轮 user→assistant 加两条;关浮窗清空。 - messages: Vec, -} - -impl Default for QaSessionState { - fn default() -> Self { - Self { - phase: QaPhase::Idle, - cancelled: false, - selection: None, - front_app: None, - session_id: initial_session_id(), - pinned: false, - panel_visible: false, - messages: Vec::new(), - } - } -} - #[cfg(target_os = "windows")] #[derive(Debug)] struct PreparedWindowsImeSessionSlot { @@ -1438,82 +1383,6 @@ fn mark_translation_modifier_seen(inner: &Arc) { } } -async fn handle_qa_hotkey_pressed(inner: &Arc) { - // QA hotkey(默认 Cmd+Shift+;)现在只 toggle 浮窗可见性。 - // 浮窗内的录音 / 提问由 Option 边沿驱动(handle_pressed_edge → handle_qa_option_edge)。 - let visible = inner.qa_state.lock().panel_visible; - log::info!("[coord] QA hotkey edge (panel_visible={visible})"); - if visible { - close_qa_panel(inner); - } else { - open_qa_panel(inner); - } -} - -/// 浮窗可见时,主听写 hotkey(rightOption)边沿改打到这里: -/// Idle → 录音 / Recording → 停录音并提问。 -async fn handle_qa_option_edge(inner: &Arc) { - let phase = inner.qa_state.lock().phase; - log::info!("[coord] QA option edge (phase={phase:?})"); - match phase { - QaPhase::Idle => { - let _ = begin_qa_session(inner).await; - } - QaPhase::Recording => { - let _ = end_qa_session(inner).await; - } - // Processing 阶段再次按键忽略(避免与正在跑的 LLM 冲突)。 - QaPhase::Processing => {} - } -} - -fn open_qa_panel(inner: &Arc) { - { - let mut state = inner.qa_state.lock(); - state.panel_visible = true; - state.phase = QaPhase::Idle; - state.cancelled = false; - state.messages.clear(); - state.selection = None; - state.front_app = capture_frontmost_app(); - } - // 先把胶囊清干净,避免主听写上一次 Done 状态残留的 message/insertedChars - // 在 QA Done 阶段被 capsule UI 错误复用("已之一粘贴这个 0" 那种)。 - emit_capsule(inner, CapsuleState::Idle, 0.0, 0, None, None); - if let Some(app) = inner.app.lock().clone() { - crate::show_qa_window(&app, "idle"); - let _ = app.emit_to( - "qa", - "qa:state", - serde_json::json!({ - "kind": "idle", - "messages": Vec::::new(), - }), - ); - } - log::info!("[coord] QA panel opened (awaiting Option to record)"); -} - -fn close_qa_panel(inner: &Arc) { - cancel_qa_session(inner); - { - let mut state = inner.qa_state.lock(); - state.panel_visible = false; - state.pinned = false; - state.messages.clear(); - state.selection = None; - state.front_app = None; - state.phase = QaPhase::Idle; - state.cancelled = false; - } - if let Some(app) = inner.app.lock().clone() { - crate::hide_qa_window(&app); - } - // 胶囊一同收掉,避免浮窗关了胶囊还在显示。 - emit_capsule(inner, CapsuleState::Idle, 0.0, 0, None, None); - log::info!("[coord] QA panel closed, history cleared"); -} - fn hotkey_bridge_loop(inner: Arc, rx: mpsc::Receiver) { while let Ok(evt) = rx.recv() { if inner.shortcut_recording_active.load(Ordering::SeqCst) { @@ -1586,227 +1455,6 @@ fn reset_shortcut_held_state(inner: &Arc) { } } -async fn handle_pressed_edge(inner: &Arc) { - let was_held = inner.hotkey_trigger_held.swap(true, Ordering::SeqCst); - if !was_held { - // 路由:QA 浮窗可见时,rightOption 边沿走 QA;否则走主听写。详见 issue #118 v2。 - let panel_visible = inner.qa_state.lock().panel_visible; - if panel_visible { - handle_qa_option_edge(inner).await; - } else { - handle_pressed(inner).await; - } - } -} - -async fn handle_pressed(inner: &Arc) { - let mode = inner.prefs.get().hotkey.mode; - let phase = inner.state.lock().phase; - log::info!("[coord] hotkey pressed (mode={mode:?}, phase={phase:?})"); - match (mode, phase) { - (HotkeyMode::Toggle, SessionPhase::Idle) => { - let _ = begin_session(inner).await; - } - (HotkeyMode::Toggle, SessionPhase::Listening) => { - let _ = end_session(inner).await; - } - (HotkeyMode::Hold, SessionPhase::Idle) => { - let _ = begin_session(inner).await; - } - // Toggle 模式 Starting 阶段第二次按 → 用户想停。 - // 不能直接 end_session(ASR session 还没建好),存边沿,握手完成后立即触发。 - (HotkeyMode::Toggle, SessionPhase::Starting) => { - request_stop_during_starting(inner, "toggle stop edge"); - } - _ => {} - } -} - -async fn handle_released_edge(inner: &Arc) { - let was_held = inner.hotkey_trigger_held.swap(false, Ordering::SeqCst); - if was_held { - // QA 浮窗可见时,Option 行为是 press-toggle(不分 hold/release),release 边沿忽略。 - let panel_visible = inner.qa_state.lock().panel_visible; - if panel_visible { - return; - } - handle_released(inner).await; - } -} - -async fn handle_released(inner: &Arc) { - let mode = inner.prefs.get().hotkey.mode; - let phase = inner.state.lock().phase; - log::info!("[coord] hotkey released (mode={mode:?}, phase={phase:?})"); - if mode == HotkeyMode::Hold { - match phase { - SessionPhase::Listening => { - let _ = end_session(inner).await; - } - // Hold 模式 Starting 阶段松开 → 用户想停。同上:握手完成后再 end。 - SessionPhase::Starting => { - request_stop_during_starting(inner, "hold release edge"); - } - _ => {} - } - } -} - -fn request_stop_during_starting(inner: &Arc, reason: &str) { - { - let mut state = inner.state.lock(); - if !request_stop_during_starting_state(&mut state) { - return; - } - } - log::info!("[coord] {reason} during Starting — queued"); - stop_recorder_if_pending_start_stop(inner); -} - -fn take_session_resource( - slot: &mut Option>, - session_id: SessionId, -) -> Option { - if slot - .as_ref() - .map(|resource| resource.session_id == session_id) - .unwrap_or(false) - { - slot.take().map(SessionResource::into_inner) - } else { - None - } -} - -fn store_asr_for_session(inner: &Arc, session_id: SessionId, asr: ActiveAsr) { - *inner.asr.lock() = Some(SessionResource::new(session_id, asr)); -} - -fn take_asr_for_session(inner: &Arc, session_id: SessionId) -> Option { - let mut slot = inner.asr.lock(); - take_session_resource(&mut slot, session_id) -} - -fn cancel_active_asr(asr: ActiveAsr) { - match asr { - ActiveAsr::Volcengine(v) => v.cancel(), - ActiveAsr::Whisper(w) => w.cancel(), - #[cfg(target_os = "windows")] - ActiveAsr::FoundryLocalWhisper(local) => local.cancel(), - #[cfg(target_os = "macos")] - ActiveAsr::Local(local) => local.cancel(), - } -} - -fn cancel_asr_for_session(inner: &Arc, session_id: SessionId) { - if let Some(asr) = take_asr_for_session(inner, session_id) { - cancel_active_asr(asr); - } -} - -fn store_recorder_for_session(inner: &Arc, session_id: SessionId, recorder: Recorder) { - *inner.recorder.lock() = Some(SessionResource::new(session_id, recorder)); -} - -fn selected_microphone_device_name(inner: &Arc) -> Option { - let name = inner.prefs.get().microphone_device_name.trim().to_string(); - if name.is_empty() { - None - } else { - Some(name) - } -} - -fn stop_microphone_preview_monitor(inner: &Arc, owner: &str) { - let Some(app) = inner.app.lock().as_ref().cloned() else { - return; - }; - let state = app.state::(); - let recorder = state.lock().take(); - if let Some(recorder) = recorder { - log::info!("[recorder] stopping microphone preview monitor before {owner}"); - recorder.stop(); - } -} - -fn acquire_recording_mute(inner: &Arc, owner: &str) { - if !inner.prefs.get().mute_during_recording { - return; - } - let mut mute = inner.recording_mute.lock(); - if mute.holders == 0 { - match crate::audio_mute::AudioMuteGuard::activate() { - Ok(guard) => { - mute.guard = Some(guard); - log::info!("[audio-mute] system output muted for recording"); - } - Err(err) => { - log::warn!("[audio-mute] failed to mute output for {owner}: {err}"); - return; - } - } - } - mute.holders = mute.holders.saturating_add(1); - log::info!("[audio-mute] acquired by {owner}; holders={}", mute.holders); -} - -fn release_recording_mute(inner: &Arc, owner: &str) { - let mut mute = inner.recording_mute.lock(); - if mute.holders == 0 { - return; - } - mute.holders -= 1; - log::info!("[audio-mute] released by {owner}; holders={}", mute.holders); - if mute.holders == 0 { - mute.guard.take(); - log::info!("[audio-mute] system output mute restored after recording"); - } -} - -fn stop_qa_recorder(inner: &Arc) { - if let Some(rec) = inner.qa_recorder.lock().take() { - rec.stop(); - release_recording_mute(inner, "qa"); - } -} - -fn take_recorder_for_session(inner: &Arc, session_id: SessionId) -> Option { - let mut slot = inner.recorder.lock(); - take_session_resource(&mut slot, session_id) -} - -fn stop_recorder_for_session(inner: &Arc, session_id: SessionId) { - if let Some(recorder) = take_recorder_for_session(inner, session_id) { - recorder.stop(); - release_recording_mute(inner, "dictation"); - } -} - -fn discard_startup_resources_for_session(inner: &Arc, session_id: SessionId) { - stop_recorder_for_session(inner, session_id); - cancel_asr_for_session(inner, session_id); -} - -fn stop_recorder_if_pending_start_stop(inner: &Arc) { - let (should_stop, session_id) = { - let state = inner.state.lock(); - ( - state.phase == SessionPhase::Starting && state.pending_stop, - state.session_id, - ) - }; - if !should_stop { - return; - } - if let Some(rec) = take_recorder_for_session(inner, session_id) { - rec.stop(); - release_recording_mute(inner, "dictation"); - let elapsed = inner.state.lock().started_at.elapsed().as_millis() as u64; - emit_capsule(inner, CapsuleState::Transcribing, 0.0, elapsed, None, None); - log::info!("[coord] stopped recorder while ASR is still connecting"); - } -} - async fn handle_window_hotkey_event( inner: &Arc, event_type: String, @@ -1903,362 +1551,6 @@ fn window_key_matches_trigger(trigger: crate::types::HotkeyTrigger, key: &str, c // ─────────────────────────── session lifecycle ─────────────────────────── -async fn begin_session(inner: &Arc) -> Result<(), String> { - let current_session_id = { - let mut state = inner.state.lock(); - let Some(session_id) = - begin_session_state(&mut state, capture_focus_target(), capture_frontmost_app()) - else { - return Ok(()); - }; - if let Some(label) = state.front_app.as_deref() { - log::info!("[coord] front_app captured: {label}"); - } - session_id - }; - #[cfg(target_os = "windows")] - { - let prepared = inner.windows_ime.prepare_session(); - let mut slots = inner.prepared_windows_ime_session.lock(); - store_prepared_windows_ime_session(&mut slots, current_session_id, prepared); - } - // 翻译模式标志重置;hotkey 监听器在 Shift down 时再 set true。 - inner - .translation_modifier_seen - .store(false, Ordering::SeqCst); - - #[cfg(any(debug_assertions, test))] - if hotkey_injection_dry_run_enabled() { - emit_capsule(inner, CapsuleState::Recording, 0.0, 0, None, None); - inner.state.lock().phase = SessionPhase::Listening; - log::info!("[coord] session started (hotkey-injection dry-run)"); - return Ok(()); - } - - if let Err(message) = ensure_asr_credentials() { - log::warn!("[coord] ASR credential gate failed: {message}"); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - 0, - Some(message.clone()), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - return Err(message); - } - - let active_asr = CredentialsVault::get_active_asr(); - - if let Err(message) = ensure_microphone_permission(inner) { - log::warn!("[coord] microphone permission gate failed: {message}"); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - 0, - Some(message.clone()), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(message); - } - - // 不在这里 emit Recording capsule —— 让 start_recorder_for_starting 在 - // Recorder::start 成功后再发,确保「用户看到录音条」时 mic 已经在 capture。 - // 之前在这一行就 emit 会让用户看到录音条后立刻开口,但 mic 还在 cpal init - // 窗口(50-200ms)内 → 开头几个字物理上录不到。详见 issue 备注。 - #[cfg(target_os = "windows")] - if foundry::is_foundry_local_whisper(&active_asr) { - let prefs = inner.prefs.get(); - let model_alias = if foundry::model_alias_is_known(&prefs.foundry_local_asr_model) { - prefs.foundry_local_asr_model.clone() - } else { - foundry::DEFAULT_MODEL_ALIAS.to_string() - }; - let language_hint = prefs.foundry_local_asr_language_hint.trim().to_string(); - let language_hint = if language_hint.is_empty() { - None - } else { - Some(language_hint) - }; - let local = Arc::new(FoundryLocalWhisperAsr::new( - Arc::clone(&inner.foundry_local_runtime), - model_alias, - prefs.foundry_local_runtime_source.clone(), - language_hint, - )); - store_asr_for_session( - inner, - current_session_id, - ActiveAsr::FoundryLocalWhisper(Arc::clone(&local)), - ); - let consumer: Arc = local; - start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) - .await?; - return Ok(()); - } - - #[cfg(target_os = "macos")] - if crate::asr::local::is_local_qwen3(&active_asr) { - let local = match build_local_qwen3(inner).await { - Ok(l) => l, - Err(e) => { - log::error!("[coord] 本地 Qwen3-ASR 初始化失败: {e:#}"); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - 0, - Some(format!("本地模型初始化失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(format!("local ASR init failed: {e}")); - } - }; - store_asr_for_session( - inner, - current_session_id, - ActiveAsr::Local(Arc::clone(&local)), - ); - let consumer: Arc = local; - start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) - .await?; - return Ok(()); - } - - if is_whisper_compatible_provider(&active_asr) { - let (api_key, base_url, model) = read_whisper_credentials(); - // 用户辞書の有効フレーズを Whisper の `prompt` に流し込む。固有名詞や - // 専門用語の同音・近形誤認識を ASR 段階で抑える。Polish LLM 側には - // 既に system prompt として注入済みだが、Whisper 出力が大きく崩れる - // と Polish でも救えない(特に CJK で顕著)。Volcengine ASR は元々 - // hotword を受け取っており、UI 説明文も「ASR ホットワードと後処理 - // モデルのコンテキスト両方に渡される」と明示しているので、Whisper - // 互換プロバイダにも揃えるのが筋。 - let whisper_prompt = - crate::asr::whisper::build_prompt_from_phrases(&enabled_phrases(inner)); - let whisper = Arc::new(WhisperBatchASR::new( - api_key, - base_url, - model, - whisper_prompt, - )); - store_asr_for_session( - inner, - current_session_id, - ActiveAsr::Whisper(Arc::clone(&whisper)), - ); - let consumer: Arc = whisper; - start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) - .await?; - } else { - let hotwords = enabled_hotwords(inner); - let creds = read_volc_credentials(); - let asr = Arc::new(VolcengineStreamingASR::new(creds, hotwords)); - let bridge = Arc::new(DeferredAsrBridge::new()); - let consumer: Arc = bridge.clone(); - store_asr_for_session( - inner, - current_session_id, - ActiveAsr::Volcengine(Arc::clone(&asr)), - ); - start_recorder_for_starting(inner, current_session_id, &active_asr, consumer)?; - - if let Err(e) = asr.open_session().await { - log::error!("[coord] open ASR session failed: {e}"); - match startup_race_status_for_starting(inner, current_session_id) { - StartupRaceStatus::StaleContinuation => { - log::info!( - "[coord] stale ASR open_session error from session {current_session_id} — ignoring" - ); - asr.cancel(); - discard_startup_resources_for_session(inner, current_session_id); - restore_prepared_windows_ime_session(inner, current_session_id); - return Ok(()); - } - StartupRaceStatus::CancelRaced => { - asr.cancel(); - discard_startup_resources_for_session(inner, current_session_id); - restore_prepared_windows_ime_session(inner, current_session_id); - set_phase_idle_if_session_matches(inner, current_session_id); - return Ok(()); - } - StartupRaceStatus::ActiveStarting => {} - } - discard_startup_resources_for_session(inner, current_session_id); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - 0, - Some(format!("ASR 连接失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - set_phase_idle_if_session_matches(inner, current_session_id); - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(e.to_string()); - } - // open_session.await 期间用户可能按了 Esc / 改变心意。如果 cancel_session - // 已触发(cancelled=true 或 phase 被改回 Idle),别再装 ASR,直接善后。 - // audit HIGH #1。 - match startup_race_status_for_starting(inner, current_session_id) { - StartupRaceStatus::ActiveStarting => {} - StartupRaceStatus::CancelRaced => { - log::info!("[coord] cancel raced during ASR open_session — aborting begin"); - asr.cancel(); - discard_startup_resources_for_session(inner, current_session_id); - restore_prepared_windows_ime_session(inner, current_session_id); - set_phase_idle_if_session_matches(inner, current_session_id); - return Ok(()); - } - StartupRaceStatus::StaleContinuation => { - log::info!( - "[coord] stale ASR open_session continuation from session {current_session_id} — ignoring" - ); - asr.cancel(); - discard_startup_resources_for_session(inner, current_session_id); - restore_prepared_windows_ime_session(inner, current_session_id); - return Ok(()); - } - } - let target: Arc = asr; - let flushed_bytes = bridge.attach(target); - log::info!("[coord] ASR connected; flushed {flushed_bytes} deferred audio bytes"); - finish_starting_session(inner, current_session_id).await; - } - - Ok(()) -} - -fn start_recorder_for_starting( - inner: &Arc, - session_id: SessionId, - active_asr: &str, - consumer: Arc, -) -> Result<(), String> { - let inner_for_level = Arc::clone(inner); - // 节流:电平回调本身约 185 Hz(cpal 默认音频块),全部转发到前端会让 CSS - // transition 互相覆盖、视觉上"被平均"成静止。限制为 ~30 Hz(33ms 最少间隔), - // 配合 CSS 短 transition 让每次 emit 完整可见。 - let last_emit_at = Arc::new(Mutex::new(None::)); - const LEVEL_EMIT_MIN_INTERVAL_MS: u64 = 33; - let level_handler: Arc = Arc::new(move |level| { - let phase = inner_for_level.state.lock().phase; - if phase != SessionPhase::Listening && phase != SessionPhase::Starting { - return; - } - let now = Instant::now(); - { - let mut last = last_emit_at.lock(); - if let Some(prev) = *last { - if now.duration_since(prev).as_millis() < LEVEL_EMIT_MIN_INTERVAL_MS as u128 { - return; - } - } - *last = Some(now); - } - let elapsed = inner_for_level - .state - .lock() - .started_at - .elapsed() - .as_millis() as u64; - emit_capsule( - &inner_for_level, - CapsuleState::Recording, - level, - elapsed, - None, - None, - ); - }); - - let microphone_device_name = selected_microphone_device_name(inner); - stop_microphone_preview_monitor(inner, "dictation recorder"); - acquire_recording_mute(inner, "dictation"); - match Recorder::start(microphone_device_name, consumer, level_handler) { - Ok((rec, runtime_errors)) => { - store_recorder_for_session(inner, session_id, rec); - spawn_recorder_error_monitor(inner, runtime_errors); - // 不在这里 emit Recording capsule。 - // Recorder::start Ok 仅代表 cpal Stream::play 完成,不代表 audio - // 线程已经在向 consumer 推 PCM —— macOS CoreAudio AudioUnit 启动到 - // 第一帧 process_callback 中间有 50–200 ms 间隙(Windows 类似)。 - // 之前在这里立即 emit Recording 会让用户「看到录音条」就开口,但前几个 - // 字落在 cpal init 窗口里被吞,反映为短录音漏首字(用户报告)。 - // - // 现改为:level_handler 第一次被触发时才 emit Recording capsule。 - // recorder.rs::process_callback 的顺序是 consume_pcm_chunk → level_handler, - // 所以 level_handler 第一次执行 == PCM 已经真实流到 consumer。从这一刻 - // 起用户说什么都被录到。capsule 自然就晚 50–200 ms 出现,但出现 == - // mic 真的在录,匹配「麦先录、UI 再弹」的预期。 - // - // 原本的竞态保护交还给两条已有路径: - // - stop_recorder_if_pending_start_stop:短按时把 capsule 切到 - // Transcribing;recorder 已 stop,level_handler 不会再发火。 - // - level_handler 内部 phase 检查:cancel / 错误使 phase 不在 - // {Starting, Listening} 时直接 return,不会在错误状态上盖 - // Recording。 - stop_recorder_if_pending_start_stop(inner); - log::info!("[coord] recorder started (asr={active_asr}, phase=Starting)"); - } - Err(e) => { - log::error!("[coord] recorder start failed: {e}"); - cancel_asr_for_session(inner, session_id); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - 0, - Some(format!("录音启动失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, session_id); - release_recording_mute(inner, "dictation"); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(e.to_string()); - } - } - - Ok(()) -} - -fn spawn_recorder_error_monitor(inner: &Arc, rx: mpsc::Receiver) { - // 捕获当前 session_id:err 来时若 id 已经不一致说明是上一 session 的迟到事件, - // 不能去 abort 当前 active 的新 session(它录得好好的)。 - let captured_session_id = inner.state.lock().session_id; - let inner = Arc::clone(inner); - std::thread::Builder::new() - .name("openless-recorder-error-monitor".into()) - .spawn(move || { - if let Ok(err) = rx.recv() { - let current_session_id = inner.state.lock().session_id; - if captured_session_id != current_session_id { - log::warn!( - "[coord] recorder error from stale session {} dropped (current={}, err={})", - captured_session_id, - current_session_id, - err - ); - return; - } - log::error!("[coord] recorder runtime error: {err}"); - abort_recording_with_error(&inner, format!("录音中断: {err}")); - } - }) - .ok(); -} - /// QA 录音 runtime error 监听器。镜像 `spawn_recorder_error_monitor` 的语义但走 QA /// 收尾路径(`finish_qa_with_error` 替代 `abort_recording_with_error`)。 /// 用 qa_state.session_id 守卫 stale 事件。详见 issue #168。 @@ -2286,608 +1578,6 @@ fn spawn_qa_recorder_error_monitor(inner: &Arc, rx: mpsc::Receiver, message: String) { - let Some(abort) = ({ - let mut state = inner.state.lock(); - begin_recording_abort_before_restore(&mut state) - }) else { - return; - }; - - discard_startup_resources_for_session(inner, abort.session_id); - restore_prepared_windows_ime_session(inner, abort.session_id); - { - let mut state = inner.state.lock(); - publish_abort_idle_after_restore(&mut state, abort.session_id); - } - - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - abort.elapsed, - Some(message), - None, - ); - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); -} - -async fn start_recorder_and_enter_listening( - inner: &Arc, - session_id: SessionId, - active_asr: &str, - consumer: Arc, -) -> Result<(), String> { - start_recorder_for_starting(inner, session_id, active_asr, consumer)?; - finish_starting_session(inner, session_id).await; - Ok(()) -} - -async fn finish_starting_session(inner: &Arc, session_id: SessionId) { - // audit HIGH #1:转 Listening 之前在同一 lock 内检查 cancel race。 - // 之前是无条件 phase=Listening,会把 cancel_session 在 await 期间设的 Idle - // 反向覆盖回 Listening → 用户的 cancel 边沿被吞掉。 - let outcome = { - let mut state = inner.state.lock(); - finish_starting_session_state(&mut state, session_id) - }; - match outcome { - BeginOutcome::StaleContinuation => { - log::info!( - "[coord] stale recorder/ASR startup continuation from session {session_id} — ignoring" - ); - discard_startup_resources_for_session(inner, session_id); - restore_prepared_windows_ime_session(inner, session_id); - } - BeginOutcome::CancelRaced => { - log::info!("[coord] cancel raced during recorder/ASR startup — aborting begin"); - discard_startup_resources_for_session(inner, session_id); - restore_prepared_windows_ime_session(inner, session_id); - set_phase_idle_if_session_matches(inner, session_id); - } - BeginOutcome::Started | BeginOutcome::PendingStop => { - log::info!("[coord] session started"); - if matches!(outcome, BeginOutcome::PendingStop) { - log::info!("[coord] applying pending_stop edge → end_session immediately"); - let _ = end_session(inner).await; - } - } - } -} - -async fn end_session(inner: &Arc) -> Result<(), String> { - let current_session_id = { - let mut state = inner.state.lock(); - let Some(session_id) = start_processing_if_listening(&mut state) else { - return Ok(()); - }; - session_id - }; - - let elapsed = inner.state.lock().started_at.elapsed().as_millis() as u64; - emit_capsule(inner, CapsuleState::Transcribing, 0.0, elapsed, None, None); - - if let Some(rec) = take_recorder_for_session(inner, current_session_id) { - rec.stop(); - release_recording_mute(inner, "dictation"); - } - - let asr_opt = take_asr_for_session(inner, current_session_id); - let asr = match asr_opt { - Some(a) => a, - None => { - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - return Ok(()); - } - }; - - let uses_global_timeout = asr_transcribe_uses_global_timeout(&asr); - let raw = match asr { - ActiveAsr::Volcengine(asr) => { - debug_assert!(uses_global_timeout); - if let Err(e) = asr.send_last_frame().await { - log::error!("[coord] send last frame failed: {e}"); - } - // 添加全局超时保护:防止 await_final_result() 永远挂起 - let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); - match tokio::time::timeout(timeout_duration, asr.await_final_result()).await { - Ok(Ok(r)) => r, - Ok(Err(e)) => { - log::error!("[coord] await final failed: {e}"); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some(format!("识别失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(e.to_string()); - } - Err(_) => { - // 全局超时:最后的防线 - log::error!( - "[coord] 全局超时 {} 秒 - 强制恢复", - COORDINATOR_GLOBAL_TIMEOUT_SECS - ); - // 清理 ASR session,避免资源泄漏 - asr.cancel(); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some("识别超时".to_string()), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err("global timeout".to_string()); - } - } - } - ActiveAsr::Whisper(w) => { - debug_assert!(uses_global_timeout); - // Whisper 也添加类似的超时保护 - let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); - match tokio::time::timeout(timeout_duration, w.transcribe()).await { - Ok(Ok(r)) => r, - Ok(Err(e)) => { - log::error!("[coord] whisper transcribe failed: {e}"); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some(format!("识别失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(e.to_string()); - } - Err(_) => { - log::error!( - "[coord] whisper 全局超时 {} 秒", - COORDINATOR_GLOBAL_TIMEOUT_SECS - ); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some("识别超时".to_string()), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err("whisper global timeout".to_string()); - } - } - } - #[cfg(target_os = "windows")] - ActiveAsr::FoundryLocalWhisper(local) => { - debug_assert!(!uses_global_timeout); - match local - .transcribe(foundry_audio_transcribe_timeout_duration()) - .await - { - Ok(r) => { - schedule_foundry_local_asr_release(inner, current_session_id); - r - } - Err(e) => { - if inner.state.lock().cancelled { - log::info!( - "[coord] Foundry Local Whisper transcribe cancelled — discarding transcript" - ); - schedule_foundry_local_asr_release(inner, current_session_id); - restore_prepared_windows_ime_session(inner, current_session_id); - set_phase_idle_if_session_matches(inner, current_session_id); - return Ok(()); - } - log::error!("[coord] Foundry Local Whisper transcribe failed: {e:#}"); - schedule_foundry_local_asr_release(inner, current_session_id); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some(format!("本地识别失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(e.to_string()); - } - } - } - #[cfg(target_os = "macos")] - ActiveAsr::Local(local) => { - debug_assert!(uses_global_timeout); - // 与 Volcengine/Whisper 一致包一层 global timeout(来自 origin/main)。 - // 注:缓存命中时 transcribe 不含 load 时间;冷启动 load 已在 build_local_qwen3 - // 提前完成,所以 15s 给 transcribe 本身足够。 - let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); - let result = tokio::time::timeout(timeout_duration, local.transcribe()).await; - inner.local_asr_cache.touch(); - schedule_local_asr_release(inner); - match result { - Ok(Ok(r)) => r, - Ok(Err(e)) => { - log::error!("[coord] local Qwen3-ASR transcribe failed: {e:#}"); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some(format!("本地识别失败: {e}")), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err(e.to_string()); - } - Err(_) => { - log::error!( - "[coord] local Qwen3-ASR 全局超时 {} 秒", - COORDINATOR_GLOBAL_TIMEOUT_SECS - ); - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some("识别超时".to_string()), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err("local global timeout".to_string()); - } - } - } - }; - - // ASR 完成后 cancel 检查:用户在 transcribe 进行中按 Esc 时,这里就会命中。 - // 优先级高于 empty 检查 — 用户取消 → 静默丢弃,不写失败历史也不弹错误胶囊。 - if inner.state.lock().cancelled { - log::info!("[coord] cancel detected after ASR — discarding transcript"); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - return Ok(()); - } - - // ASR 返回空转写护栏(来自 PR #66):写一条 emptyTranscript 失败历史 + 错误胶囊, - // 与 main 上其它 error 路径保持一致(带 schedule_capsule_idle 让胶囊自动消失)。 - let mut raw = raw; - - #[cfg(any(debug_assertions, test))] - if raw.text.trim().is_empty() { - if let Some(debug_text) = debug_transcript_override_text() { - log::info!( - "[coord] using debug transcript override (chars={})", - debug_text.chars().count() - ); - raw.text = debug_text; - } - } - - if raw.text.trim().is_empty() { - let session = DictationSession { - id: Uuid::new_v4().to_string(), - created_at: Utc::now().to_rfc3339(), - raw_transcript: raw.text.clone(), - final_text: String::new(), - mode: inner.prefs.get().default_mode, - app_bundle_id: None, - app_name: None, - insert_status: InsertStatus::Failed, - error_code: Some("emptyTranscript".to_string()), - duration_ms: Some(raw.duration_ms), - dictionary_entry_count: Some(enabled_phrases(inner).len() as u32), - }; - if let Err(e) = inner.history.append_with_retention(session, inner.prefs.get().history_retention_days) { - log::error!("[coord] history append failed: {e}"); - } - emit_capsule( - inner, - CapsuleState::Error, - 0.0, - elapsed, - Some("ASR returned empty transcript".to_string()), - None, - ); - restore_prepared_windows_ime_session(inner, current_session_id); - inner.state.lock().phase = SessionPhase::Idle; - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - return Err("ASR returned empty transcript".to_string()); - } - - emit_capsule(inner, CapsuleState::Polishing, 0.0, elapsed, None, None); - - let prefs = inner.prefs.get(); - let mode = prefs.default_mode; - let hotword_strs = enabled_phrases(inner); - let working_languages = prefs.working_languages.clone(); - let chinese_script_preference = prefs.chinese_script_preference; - let output_language_preference = prefs.output_language_preference; - let front_app = inner.state.lock().front_app.clone(); - let translation_target = prefs.translation_target_language.trim().to_string(); - let translation_active = - inner.translation_modifier_seen.load(Ordering::SeqCst) && !translation_target.is_empty(); - // 对话感知 polish:拉最近 N 分钟的会话作为 LLM 上下文。仅在非翻译路径且非 Raw mode - // 才有意义(Raw 不走 LLM、翻译走单轮独立 prompt)。窗口=0 时 prior_turns 是空 Vec, - // polish 路径自动退化成单轮单消息——跟历史行为一致。 - let polish_context_window_minutes = prefs.polish_context_window_minutes; - let prior_turns: Vec<(String, String)> = if !translation_active - && mode != PolishMode::Raw - && polish_context_window_minutes > 0 - { - match inner - .history - .recent_within_minutes(polish_context_window_minutes) - { - Ok(sessions) => sessions - .into_iter() - // 只取实际成功润色过的会话作为上下文:失败的会话 final_text 是 raw 兜底, - // 喂回 LLM 会让模型以为"上一轮我什么都没做"——没意义且占 token。 - .filter(|s| s.error_code.is_none() && !s.final_text.trim().is_empty()) - .map(|s| (s.raw_transcript, s.final_text)) - .collect(), - Err(e) => { - log::warn!("[coord] fetch polish context failed: {e}; fall back to single-turn"); - Vec::new() - } - } - } else { - Vec::new() - }; - let (polished, polish_error) = if translation_active { - log::info!( - "[coord] translation mode → target=\u{300C}{}\u{300D} working={:?} front_app={:?}", - translation_target, - working_languages, - front_app - ); - translate_or_passthrough( - &raw, - &translation_target, - &working_languages, - chinese_script_preference, - output_language_preference, - front_app.as_deref(), - ) - .await - } else { - polish_or_passthrough( - &raw, - mode, - &hotword_strs, - &working_languages, - chinese_script_preference, - output_language_preference, - front_app.as_deref(), - &prior_turns, - ) - .await - }; - - // 仅在“ASR 直出文本”场景做强制简繁收敛,避免误伤成功的翻译/常规 LLM 输出: - // - 非翻译模式:mode=Raw(本来就不走润色)或润色失败回退 raw - // - 翻译模式:仅翻译失败回退 raw 时才收敛 - let should_force_script = if translation_active { - polish_error.is_some() - } else { - mode == PolishMode::Raw || polish_error.is_some() - }; - let polished = if should_force_script { - apply_chinese_script_preference(&polished, chinese_script_preference) - } else { - polished - }; - - // 原子化最后一次 cancel 检查 + 转 Inserting: - // 在同一 lock 内决定「丢弃」还是「进入 Inserting」。一旦设到 Inserting, - // cancel_session 就拒绝介入(Cmd+V 已发出,撤销不掉)。这是 audit HIGH #2 的修复, - // 之前 check 与 inserter.insert 之间有窗口期。 - let proceed_to_insert = { - let mut state = inner.state.lock(); - if state.cancelled { - state.phase = SessionPhase::Idle; - false - } else { - state.phase = SessionPhase::Inserting; - true - } - }; - if !proceed_to_insert { - log::info!( - "[coord] cancel detected before insert — discarding output (chars={})", - polished.chars().count() - ); - restore_prepared_windows_ime_session(inner, current_session_id); - return Ok(()); - } - - let focus_target = inner.state.lock().focus_target; - let focus_ready_for_paste = restore_focus_target_if_possible(focus_target); - let prefs = inner.prefs.get(); - let restore_clipboard = prefs.restore_clipboard_after_paste; - let allow_non_tsf_insertion_fallback = prefs.allow_non_tsf_insertion_fallback; - let status = if focus_ready_for_paste { - #[cfg(target_os = "windows")] - { - let ime_target = capture_ime_submit_target(); - insert_with_windows_ime_first( - inner, - current_session_id, - &polished, - restore_clipboard, - allow_non_tsf_insertion_fallback, - ime_target, - ) - .await - } - #[cfg(not(target_os = "windows"))] - { - inner.inserter.insert(&polished, restore_clipboard) - } - } else { - log::warn!( - "[coord] original insertion target is not foreground; copied output without paste" - ); - if allow_non_tsf_insertion_fallback { - inner.inserter.copy_fallback(&polished) - } else { - InsertStatus::Failed - } - }; - restore_prepared_windows_ime_session(inner, current_session_id); - let inserted_chars = polished.chars().count() as u32; - - // 累计每条 enabled 词条在最终文本中的命中次数。 - // 用 polished(最终插入的文本)扫描,与用户实际看到的输出一致。 - let total_hits: u64 = match inner.vocab.record_hits(&polished) { - Ok(n) => n, - Err(e) => { - log::error!("[coord] record_hits failed: {e}"); - 0 - } - }; - // 词汇本页面在打开时通常需要立即看到 hits 增长,否则用户得手动切走再切回来才刷新。 - // 命中数 > 0 时通知前端:Vocab 页面订阅 vocab:updated 即时 listVocab() 重新加载。 - if total_hits > 0 { - if let Some(app) = inner.app.lock().clone() { - let _ = app.emit("vocab:updated", total_hits); - } - } - - // polish 失败时在 history 里标记 polishFailed,让用户能在历史详情看到为什么这次输出 - // 不是预期的 mode 风格。即使失败也不丢词 — final_text 仍是原文(保留"用户的话不丢"语义)。 - let error_code = dictation_error_code( - status, - polish_error.is_some(), - focus_ready_for_paste, - allow_non_tsf_insertion_fallback, - ) - .map(str::to_string); - let tsf_required_insert_failed = error_code.as_deref() == Some("windowsImeTsfRequired"); - - let session = DictationSession { - id: Uuid::new_v4().to_string(), - created_at: Utc::now().to_rfc3339(), - raw_transcript: raw.text.clone(), - final_text: polished.clone(), - mode, - app_bundle_id: None, - app_name: None, - insert_status: status, - error_code, - duration_ms: Some(raw.duration_ms), - // 历史详情页的"X 个热词"显示:用本次实际命中次数(每个匹配实例算一次), - // 比"启用词条总数"更能反映本段口述命中了多少。u64 → u32 截断对单段听写足够。 - dictionary_entry_count: Some(total_hits.min(u32::MAX as u64) as u32), - }; - if let Err(e) = inner.history.append_with_retention(session, inner.prefs.get().history_retention_days) { - log::error!("[coord] history append failed: {e}"); - } - - let done_message = if tsf_required_insert_failed { - Some("TSF 未上屏,已禁止非 TSF 兜底".to_string()) - } else if polish_error.is_some() { - // polish 失败优先告知用户,即使 insert 成功也要让用户知道这版是原文 - Some("润色失败,已插入原文".to_string()) - } else { - match status { - InsertStatus::Inserted => None, - InsertStatus::PasteSent => Some("已尝试粘贴".to_string()), - InsertStatus::CopiedFallback => Some(if cfg!(target_os = "windows") { - "已复制,请 Ctrl+V".to_string() - } else { - "已复制,请粘贴".to_string() - }), - InsertStatus::Failed => Some("插入失败".to_string()), - } - }; - - emit_capsule( - inner, - CapsuleState::Done, - 0.0, - elapsed, - done_message, - Some(inserted_chars), - ); - - { - let mut state = inner.state.lock(); - state.phase = SessionPhase::Idle; - state.focus_target = None; - } - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); - - Ok(()) -} - -fn dictation_error_code( - status: InsertStatus, - polish_failed: bool, - focus_ready_for_paste: bool, - allow_non_tsf_insertion_fallback: bool, -) -> Option<&'static str> { - if !focus_ready_for_paste && status == InsertStatus::Failed { - Some("focusRestoreFailed") - } else if cfg!(target_os = "windows") - && focus_ready_for_paste - && !allow_non_tsf_insertion_fallback - && status == InsertStatus::Failed - { - Some("windowsImeTsfRequired") - } else if polish_failed { - Some("polishFailed") - } else { - None - } -} - -fn cancel_session(inner: &Arc) { - let Some(decision) = ({ - let mut state = inner.state.lock(); - let phase = state.phase; - let decision = begin_cancel_session_state(&mut state); - if phase == SessionPhase::Inserting { - log::info!("[coord] cancel ignored — already in Inserting phase, can't undo paste"); - } - decision - }) else { - return; - }; - - stop_recorder_for_session(inner, decision.session_id); - cancel_asr_for_session(inner, decision.session_id); - restore_prepared_windows_ime_session(inner, decision.session_id); - // Processing 阶段保持 phase=Processing 让 end_session 自己走完检查 + 收尾; - // 其他阶段直接转 Idle。 - if decision.phase != SessionPhase::Processing { - let mut state = inner.state.lock(); - finish_cancel_session_state(&mut state, decision); - } - emit_capsule(inner, CapsuleState::Cancelled, 0.0, 0, None, None); - log::info!("[coord] session cancelled (was {:?})", decision.phase); - schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); -} - #[cfg(target_os = "windows")] fn store_prepared_windows_ime_session( slots: &mut Vec, @@ -3822,7 +2512,10 @@ async fn end_qa_session(inner: &Arc) -> Result<(), String> { duration_ms: Some(raw.duration_ms), dictionary_entry_count: None, }; - if let Err(e) = inner.history.append_with_retention(session, inner.prefs.get().history_retention_days) { + if let Err(e) = inner + .history + .append_with_retention(session, inner.prefs.get().history_retention_days) + { log::error!("[coord] QA history append failed: {e}"); } } @@ -3953,8 +2646,9 @@ fn resolve_ark_endpoint_with_policy( #[cfg(test)] mod tests { + use super::dictation::abort_recording_with_error; use super::*; - use crate::types::HotkeyTrigger; + use crate::types::{HotkeyMode, HotkeyTrigger}; use once_cell::sync::Lazy; static ENV_LOCK: Lazy> = Lazy::new(|| tokio::sync::Mutex::new(())); diff --git a/openless-all/app/src-tauri/src/coordinator/dictation.rs b/openless-all/app/src-tauri/src/coordinator/dictation.rs new file mode 100644 index 00000000..a45a0c5f --- /dev/null +++ b/openless-all/app/src-tauri/src/coordinator/dictation.rs @@ -0,0 +1,1050 @@ +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use crate::coordinator_state::request_stop_during_starting_state; +use crate::types::HotkeyMode; + +use super::qa::handle_qa_option_edge; +use super::resources::*; +use super::*; + +pub(super) async fn handle_pressed_edge(inner: &Arc) { + let was_held = inner.hotkey_trigger_held.swap(true, Ordering::SeqCst); + if !was_held { + // 路由:QA 浮窗可见时,rightOption 边沿走 QA;否则走主听写。详见 issue #118 v2。 + let panel_visible = inner.qa_state.lock().panel_visible; + if panel_visible { + handle_qa_option_edge(inner).await; + } else { + handle_pressed(inner).await; + } + } +} + +pub(super) async fn handle_pressed(inner: &Arc) { + let mode = inner.prefs.get().hotkey.mode; + let phase = inner.state.lock().phase; + log::info!("[coord] hotkey pressed (mode={mode:?}, phase={phase:?})"); + match (mode, phase) { + (HotkeyMode::Toggle, SessionPhase::Idle) => { + let _ = begin_session(inner).await; + } + (HotkeyMode::Toggle, SessionPhase::Listening) => { + let _ = end_session(inner).await; + } + (HotkeyMode::Hold, SessionPhase::Idle) => { + let _ = begin_session(inner).await; + } + // Toggle 模式 Starting 阶段第二次按 → 用户想停。 + // 不能直接 end_session(ASR session 还没建好),存边沿,握手完成后立即触发。 + (HotkeyMode::Toggle, SessionPhase::Starting) => { + request_stop_during_starting(inner, "toggle stop edge"); + } + _ => {} + } +} + +pub(super) async fn handle_released_edge(inner: &Arc) { + let was_held = inner.hotkey_trigger_held.swap(false, Ordering::SeqCst); + if was_held { + // QA 浮窗可见时,Option 行为是 press-toggle(不分 hold/release),release 边沿忽略。 + let panel_visible = inner.qa_state.lock().panel_visible; + if panel_visible { + return; + } + handle_released(inner).await; + } +} + +pub(super) async fn handle_released(inner: &Arc) { + let mode = inner.prefs.get().hotkey.mode; + let phase = inner.state.lock().phase; + log::info!("[coord] hotkey released (mode={mode:?}, phase={phase:?})"); + if mode == HotkeyMode::Hold { + match phase { + SessionPhase::Listening => { + let _ = end_session(inner).await; + } + // Hold 模式 Starting 阶段松开 → 用户想停。同上:握手完成后再 end。 + SessionPhase::Starting => { + request_stop_during_starting(inner, "hold release edge"); + } + _ => {} + } + } +} + +pub(super) fn request_stop_during_starting(inner: &Arc, reason: &str) { + { + let mut state = inner.state.lock(); + if !request_stop_during_starting_state(&mut state) { + return; + } + } + log::info!("[coord] {reason} during Starting — queued"); + stop_recorder_if_pending_start_stop(inner); +} + +pub(super) async fn begin_session(inner: &Arc) -> Result<(), String> { + let current_session_id = { + let mut state = inner.state.lock(); + let Some(session_id) = + begin_session_state(&mut state, capture_focus_target(), capture_frontmost_app()) + else { + return Ok(()); + }; + if let Some(label) = state.front_app.as_deref() { + log::info!("[coord] front_app captured: {label}"); + } + session_id + }; + #[cfg(target_os = "windows")] + { + let prepared = inner.windows_ime.prepare_session(); + let mut slots = inner.prepared_windows_ime_session.lock(); + store_prepared_windows_ime_session(&mut slots, current_session_id, prepared); + } + // 翻译模式标志重置;hotkey 监听器在 Shift down 时再 set true。 + inner + .translation_modifier_seen + .store(false, Ordering::SeqCst); + + #[cfg(any(debug_assertions, test))] + if hotkey_injection_dry_run_enabled() { + emit_capsule(inner, CapsuleState::Recording, 0.0, 0, None, None); + inner.state.lock().phase = SessionPhase::Listening; + log::info!("[coord] session started (hotkey-injection dry-run)"); + return Ok(()); + } + + if let Err(message) = ensure_asr_credentials() { + log::warn!("[coord] ASR credential gate failed: {message}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + 0, + Some(message.clone()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + return Err(message); + } + + let active_asr = CredentialsVault::get_active_asr(); + + if let Err(message) = ensure_microphone_permission(inner) { + log::warn!("[coord] microphone permission gate failed: {message}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + 0, + Some(message.clone()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(message); + } + + // 不在这里 emit Recording capsule —— 让 start_recorder_for_starting 在 + // Recorder::start 成功后再发,确保「用户看到录音条」时 mic 已经在 capture。 + // 之前在这一行就 emit 会让用户看到录音条后立刻开口,但 mic 还在 cpal init + // 窗口(50-200ms)内 → 开头几个字物理上录不到。详见 issue 备注。 + #[cfg(target_os = "windows")] + if foundry::is_foundry_local_whisper(&active_asr) { + let prefs = inner.prefs.get(); + let model_alias = if foundry::model_alias_is_known(&prefs.foundry_local_asr_model) { + prefs.foundry_local_asr_model.clone() + } else { + foundry::DEFAULT_MODEL_ALIAS.to_string() + }; + let language_hint = prefs.foundry_local_asr_language_hint.trim().to_string(); + let language_hint = if language_hint.is_empty() { + None + } else { + Some(language_hint) + }; + let local = Arc::new(FoundryLocalWhisperAsr::new( + Arc::clone(&inner.foundry_local_runtime), + model_alias, + prefs.foundry_local_runtime_source.clone(), + language_hint, + )); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::FoundryLocalWhisper(Arc::clone(&local)), + ); + let consumer: Arc = local; + start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) + .await?; + return Ok(()); + } + + #[cfg(target_os = "macos")] + if crate::asr::local::is_local_qwen3(&active_asr) { + let local = match build_local_qwen3(inner).await { + Ok(l) => l, + Err(e) => { + log::error!("[coord] 本地 Qwen3-ASR 初始化失败: {e:#}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + 0, + Some(format!("本地模型初始化失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(format!("local ASR init failed: {e}")); + } + }; + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::Local(Arc::clone(&local)), + ); + let consumer: Arc = local; + start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) + .await?; + return Ok(()); + } + + if is_whisper_compatible_provider(&active_asr) { + let (api_key, base_url, model) = read_whisper_credentials(); + // 用户辞書の有効フレーズを Whisper の `prompt` に流し込む。固有名詞や + // 専門用語の同音・近形誤認識を ASR 段階で抑える。Polish LLM 側には + // 既に system prompt として注入済みだが、Whisper 出力が大きく崩れる + // と Polish でも救えない(特に CJK で顕著)。Volcengine ASR は元々 + // hotword を受け取っており、UI 説明文も「ASR ホットワードと後処理 + // モデルのコンテキスト両方に渡される」と明示しているので、Whisper + // 互換プロバイダにも揃えるのが筋。 + let whisper_prompt = + crate::asr::whisper::build_prompt_from_phrases(&enabled_phrases(inner)); + let whisper = Arc::new(WhisperBatchASR::new( + api_key, + base_url, + model, + whisper_prompt, + )); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::Whisper(Arc::clone(&whisper)), + ); + let consumer: Arc = whisper; + start_recorder_and_enter_listening(inner, current_session_id, &active_asr, consumer) + .await?; + } else { + let hotwords = enabled_hotwords(inner); + let creds = read_volc_credentials(); + let asr = Arc::new(VolcengineStreamingASR::new(creds, hotwords)); + let bridge = Arc::new(DeferredAsrBridge::new()); + let consumer: Arc = bridge.clone(); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::Volcengine(Arc::clone(&asr)), + ); + start_recorder_for_starting(inner, current_session_id, &active_asr, consumer)?; + + if let Err(e) = asr.open_session().await { + log::error!("[coord] open ASR session failed: {e}"); + match startup_race_status_for_starting(inner, current_session_id) { + StartupRaceStatus::StaleContinuation => { + log::info!( + "[coord] stale ASR open_session error from session {current_session_id} — ignoring" + ); + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + return Ok(()); + } + StartupRaceStatus::CancelRaced => { + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + StartupRaceStatus::ActiveStarting => {} + } + discard_startup_resources_for_session(inner, current_session_id); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + 0, + Some(format!("ASR 连接失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + // open_session.await 期间用户可能按了 Esc / 改变心意。如果 cancel_session + // 已触发(cancelled=true 或 phase 被改回 Idle),别再装 ASR,直接善后。 + // audit HIGH #1。 + match startup_race_status_for_starting(inner, current_session_id) { + StartupRaceStatus::ActiveStarting => {} + StartupRaceStatus::CancelRaced => { + log::info!("[coord] cancel raced during ASR open_session — aborting begin"); + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + StartupRaceStatus::StaleContinuation => { + log::info!( + "[coord] stale ASR open_session continuation from session {current_session_id} — ignoring" + ); + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + return Ok(()); + } + } + let target: Arc = asr; + let flushed_bytes = bridge.attach(target); + log::info!("[coord] ASR connected; flushed {flushed_bytes} deferred audio bytes"); + finish_starting_session(inner, current_session_id).await; + } + + Ok(()) +} + +pub(super) fn start_recorder_for_starting( + inner: &Arc, + session_id: SessionId, + active_asr: &str, + consumer: Arc, +) -> Result<(), String> { + let inner_for_level = Arc::clone(inner); + // 节流:电平回调本身约 185 Hz(cpal 默认音频块),全部转发到前端会让 CSS + // transition 互相覆盖、视觉上"被平均"成静止。限制为 ~30 Hz(33ms 最少间隔), + // 配合 CSS 短 transition 让每次 emit 完整可见。 + let last_emit_at = Arc::new(Mutex::new(None::)); + const LEVEL_EMIT_MIN_INTERVAL_MS: u64 = 33; + let level_handler: Arc = Arc::new(move |level| { + let phase = inner_for_level.state.lock().phase; + if phase != SessionPhase::Listening && phase != SessionPhase::Starting { + return; + } + let now = Instant::now(); + { + let mut last = last_emit_at.lock(); + if let Some(prev) = *last { + if now.duration_since(prev).as_millis() < LEVEL_EMIT_MIN_INTERVAL_MS as u128 { + return; + } + } + *last = Some(now); + } + let elapsed = inner_for_level + .state + .lock() + .started_at + .elapsed() + .as_millis() as u64; + emit_capsule( + &inner_for_level, + CapsuleState::Recording, + level, + elapsed, + None, + None, + ); + }); + + let microphone_device_name = selected_microphone_device_name(inner); + stop_microphone_preview_monitor(inner, "dictation recorder"); + acquire_recording_mute(inner, "dictation"); + match Recorder::start(microphone_device_name, consumer, level_handler) { + Ok((rec, runtime_errors)) => { + store_recorder_for_session(inner, session_id, rec); + spawn_recorder_error_monitor(inner, runtime_errors); + // 不在这里 emit Recording capsule。 + // Recorder::start Ok 仅代表 cpal Stream::play 完成,不代表 audio + // 线程已经在向 consumer 推 PCM —— macOS CoreAudio AudioUnit 启动到 + // 第一帧 process_callback 中间有 50–200 ms 间隙(Windows 类似)。 + // 之前在这里立即 emit Recording 会让用户「看到录音条」就开口,但前几个 + // 字落在 cpal init 窗口里被吞,反映为短录音漏首字(用户报告)。 + // + // 现改为:level_handler 第一次被触发时才 emit Recording capsule。 + // recorder.rs::process_callback 的顺序是 consume_pcm_chunk → level_handler, + // 所以 level_handler 第一次执行 == PCM 已经真实流到 consumer。从这一刻 + // 起用户说什么都被录到。capsule 自然就晚 50–200 ms 出现,但出现 == + // mic 真的在录,匹配「麦先录、UI 再弹」的预期。 + // + // 原本的竞态保护交还给两条已有路径: + // - stop_recorder_if_pending_start_stop:短按时把 capsule 切到 + // Transcribing;recorder 已 stop,level_handler 不会再发火。 + // - level_handler 内部 phase 检查:cancel / 错误使 phase 不在 + // {Starting, Listening} 时直接 return,不会在错误状态上盖 + // Recording。 + stop_recorder_if_pending_start_stop(inner); + log::info!("[coord] recorder started (asr={active_asr}, phase=Starting)"); + } + Err(e) => { + log::error!("[coord] recorder start failed: {e}"); + cancel_asr_for_session(inner, session_id); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + 0, + Some(format!("录音启动失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, session_id); + release_recording_mute(inner, "dictation"); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + } + + Ok(()) +} + +pub(super) fn spawn_recorder_error_monitor(inner: &Arc, rx: mpsc::Receiver) { + // 捕获当前 session_id:err 来时若 id 已经不一致说明是上一 session 的迟到事件, + // 不能去 abort 当前 active 的新 session(它录得好好的)。 + let captured_session_id = inner.state.lock().session_id; + let inner = Arc::clone(inner); + std::thread::Builder::new() + .name("openless-recorder-error-monitor".into()) + .spawn(move || { + if let Ok(err) = rx.recv() { + let current_session_id = inner.state.lock().session_id; + if captured_session_id != current_session_id { + log::warn!( + "[coord] recorder error from stale session {} dropped (current={}, err={})", + captured_session_id, + current_session_id, + err + ); + return; + } + log::error!("[coord] recorder runtime error: {err}"); + abort_recording_with_error(&inner, format!("录音中断: {err}")); + } + }) + .ok(); +} + +pub(super) fn abort_recording_with_error(inner: &Arc, message: String) { + let Some(abort) = ({ + let mut state = inner.state.lock(); + begin_recording_abort_before_restore(&mut state) + }) else { + return; + }; + + discard_startup_resources_for_session(inner, abort.session_id); + restore_prepared_windows_ime_session(inner, abort.session_id); + { + let mut state = inner.state.lock(); + publish_abort_idle_after_restore(&mut state, abort.session_id); + } + + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + abort.elapsed, + Some(message), + None, + ); + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); +} + +pub(super) async fn start_recorder_and_enter_listening( + inner: &Arc, + session_id: SessionId, + active_asr: &str, + consumer: Arc, +) -> Result<(), String> { + start_recorder_for_starting(inner, session_id, active_asr, consumer)?; + finish_starting_session(inner, session_id).await; + Ok(()) +} + +pub(super) async fn finish_starting_session(inner: &Arc, session_id: SessionId) { + // audit HIGH #1:转 Listening 之前在同一 lock 内检查 cancel race。 + // 之前是无条件 phase=Listening,会把 cancel_session 在 await 期间设的 Idle + // 反向覆盖回 Listening → 用户的 cancel 边沿被吞掉。 + let outcome = { + let mut state = inner.state.lock(); + finish_starting_session_state(&mut state, session_id) + }; + match outcome { + BeginOutcome::StaleContinuation => { + log::info!( + "[coord] stale recorder/ASR startup continuation from session {session_id} — ignoring" + ); + discard_startup_resources_for_session(inner, session_id); + restore_prepared_windows_ime_session(inner, session_id); + } + BeginOutcome::CancelRaced => { + log::info!("[coord] cancel raced during recorder/ASR startup — aborting begin"); + discard_startup_resources_for_session(inner, session_id); + restore_prepared_windows_ime_session(inner, session_id); + set_phase_idle_if_session_matches(inner, session_id); + } + BeginOutcome::Started | BeginOutcome::PendingStop => { + log::info!("[coord] session started"); + if matches!(outcome, BeginOutcome::PendingStop) { + log::info!("[coord] applying pending_stop edge → end_session immediately"); + let _ = end_session(inner).await; + } + } + } +} + +pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { + let current_session_id = { + let mut state = inner.state.lock(); + let Some(session_id) = start_processing_if_listening(&mut state) else { + return Ok(()); + }; + session_id + }; + + let elapsed = inner.state.lock().started_at.elapsed().as_millis() as u64; + emit_capsule(inner, CapsuleState::Transcribing, 0.0, elapsed, None, None); + + if let Some(rec) = take_recorder_for_session(inner, current_session_id) { + rec.stop(); + release_recording_mute(inner, "dictation"); + } + + let asr_opt = take_asr_for_session(inner, current_session_id); + let asr = match asr_opt { + Some(a) => a, + None => { + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + return Ok(()); + } + }; + + let uses_global_timeout = asr_transcribe_uses_global_timeout(&asr); + let raw = match asr { + ActiveAsr::Volcengine(asr) => { + debug_assert!(uses_global_timeout); + if let Err(e) = asr.send_last_frame().await { + log::error!("[coord] send last frame failed: {e}"); + } + // 添加全局超时保护:防止 await_final_result() 永远挂起 + let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); + match tokio::time::timeout(timeout_duration, asr.await_final_result()).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + log::error!("[coord] await final failed: {e}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + Err(_) => { + // 全局超时:最后的防线 + log::error!( + "[coord] 全局超时 {} 秒 - 强制恢复", + COORDINATOR_GLOBAL_TIMEOUT_SECS + ); + // 清理 ASR session,避免资源泄漏 + asr.cancel(); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("识别超时".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("global timeout".to_string()); + } + } + } + ActiveAsr::Whisper(w) => { + debug_assert!(uses_global_timeout); + // Whisper 也添加类似的超时保护 + let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); + match tokio::time::timeout(timeout_duration, w.transcribe()).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + log::error!("[coord] whisper transcribe failed: {e}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + Err(_) => { + log::error!( + "[coord] whisper 全局超时 {} 秒", + COORDINATOR_GLOBAL_TIMEOUT_SECS + ); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("识别超时".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("whisper global timeout".to_string()); + } + } + } + #[cfg(target_os = "windows")] + ActiveAsr::FoundryLocalWhisper(local) => { + debug_assert!(!uses_global_timeout); + match local + .transcribe(foundry_audio_transcribe_timeout_duration()) + .await + { + Ok(r) => { + schedule_foundry_local_asr_release(inner, current_session_id); + r + } + Err(e) => { + if inner.state.lock().cancelled { + log::info!( + "[coord] Foundry Local Whisper transcribe cancelled — discarding transcript" + ); + schedule_foundry_local_asr_release(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + log::error!("[coord] Foundry Local Whisper transcribe failed: {e:#}"); + schedule_foundry_local_asr_release(inner, current_session_id); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("本地识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + } + } + #[cfg(target_os = "macos")] + ActiveAsr::Local(local) => { + debug_assert!(uses_global_timeout); + // 与 Volcengine/Whisper 一致包一层 global timeout(来自 origin/main)。 + // 注:缓存命中时 transcribe 不含 load 时间;冷启动 load 已在 build_local_qwen3 + // 提前完成,所以 15s 给 transcribe 本身足够。 + let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); + let result = tokio::time::timeout(timeout_duration, local.transcribe()).await; + inner.local_asr_cache.touch(); + schedule_local_asr_release(inner); + match result { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + log::error!("[coord] local Qwen3-ASR transcribe failed: {e:#}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("本地识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + Err(_) => { + log::error!( + "[coord] local Qwen3-ASR 全局超时 {} 秒", + COORDINATOR_GLOBAL_TIMEOUT_SECS + ); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("识别超时".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("local global timeout".to_string()); + } + } + } + }; + + // ASR 完成后 cancel 检查:用户在 transcribe 进行中按 Esc 时,这里就会命中。 + // 优先级高于 empty 检查 — 用户取消 → 静默丢弃,不写失败历史也不弹错误胶囊。 + if inner.state.lock().cancelled { + log::info!("[coord] cancel detected after ASR — discarding transcript"); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + return Ok(()); + } + + // ASR 返回空转写护栏(来自 PR #66):写一条 emptyTranscript 失败历史 + 错误胶囊, + // 与 main 上其它 error 路径保持一致(带 schedule_capsule_idle 让胶囊自动消失)。 + let mut raw = raw; + + #[cfg(any(debug_assertions, test))] + if raw.text.trim().is_empty() { + if let Some(debug_text) = debug_transcript_override_text() { + log::info!( + "[coord] using debug transcript override (chars={})", + debug_text.chars().count() + ); + raw.text = debug_text; + } + } + + if raw.text.trim().is_empty() { + let session = DictationSession { + id: Uuid::new_v4().to_string(), + created_at: Utc::now().to_rfc3339(), + raw_transcript: raw.text.clone(), + final_text: String::new(), + mode: inner.prefs.get().default_mode, + app_bundle_id: None, + app_name: None, + insert_status: InsertStatus::Failed, + error_code: Some("emptyTranscript".to_string()), + duration_ms: Some(raw.duration_ms), + dictionary_entry_count: Some(enabled_phrases(inner).len() as u32), + }; + if let Err(e) = inner + .history + .append_with_retention(session, inner.prefs.get().history_retention_days) + { + log::error!("[coord] history append failed: {e}"); + } + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("ASR returned empty transcript".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("ASR returned empty transcript".to_string()); + } + + emit_capsule(inner, CapsuleState::Polishing, 0.0, elapsed, None, None); + + let prefs = inner.prefs.get(); + let mode = prefs.default_mode; + let hotword_strs = enabled_phrases(inner); + let working_languages = prefs.working_languages.clone(); + let chinese_script_preference = prefs.chinese_script_preference; + let output_language_preference = prefs.output_language_preference; + let front_app = inner.state.lock().front_app.clone(); + let translation_target = prefs.translation_target_language.trim().to_string(); + let translation_active = + inner.translation_modifier_seen.load(Ordering::SeqCst) && !translation_target.is_empty(); + // 对话感知 polish:拉最近 N 分钟的会话作为 LLM 上下文。仅在非翻译路径且非 Raw mode + // 才有意义(Raw 不走 LLM、翻译走单轮独立 prompt)。窗口=0 时 prior_turns 是空 Vec, + // polish 路径自动退化成单轮单消息——跟历史行为一致。 + let polish_context_window_minutes = prefs.polish_context_window_minutes; + let prior_turns: Vec<(String, String)> = if !translation_active + && mode != PolishMode::Raw + && polish_context_window_minutes > 0 + { + match inner + .history + .recent_within_minutes(polish_context_window_minutes) + { + Ok(sessions) => sessions + .into_iter() + // 只取实际成功润色过的会话作为上下文:失败的会话 final_text 是 raw 兜底, + // 喂回 LLM 会让模型以为"上一轮我什么都没做"——没意义且占 token。 + .filter(|s| s.error_code.is_none() && !s.final_text.trim().is_empty()) + .map(|s| (s.raw_transcript, s.final_text)) + .collect(), + Err(e) => { + log::warn!("[coord] fetch polish context failed: {e}; fall back to single-turn"); + Vec::new() + } + } + } else { + Vec::new() + }; + let (polished, polish_error) = if translation_active { + log::info!( + "[coord] translation mode → target=\u{300C}{}\u{300D} working={:?} front_app={:?}", + translation_target, + working_languages, + front_app + ); + translate_or_passthrough( + &raw, + &translation_target, + &working_languages, + chinese_script_preference, + output_language_preference, + front_app.as_deref(), + ) + .await + } else { + polish_or_passthrough( + &raw, + mode, + &hotword_strs, + &working_languages, + chinese_script_preference, + output_language_preference, + front_app.as_deref(), + &prior_turns, + ) + .await + }; + + // 仅在“ASR 直出文本”场景做强制简繁收敛,避免误伤成功的翻译/常规 LLM 输出: + // - 非翻译模式:mode=Raw(本来就不走润色)或润色失败回退 raw + // - 翻译模式:仅翻译失败回退 raw 时才收敛 + let should_force_script = if translation_active { + polish_error.is_some() + } else { + mode == PolishMode::Raw || polish_error.is_some() + }; + let polished = if should_force_script { + apply_chinese_script_preference(&polished, chinese_script_preference) + } else { + polished + }; + + // 原子化最后一次 cancel 检查 + 转 Inserting: + // 在同一 lock 内决定「丢弃」还是「进入 Inserting」。一旦设到 Inserting, + // cancel_session 就拒绝介入(Cmd+V 已发出,撤销不掉)。这是 audit HIGH #2 的修复, + // 之前 check 与 inserter.insert 之间有窗口期。 + let proceed_to_insert = { + let mut state = inner.state.lock(); + if state.cancelled { + state.phase = SessionPhase::Idle; + false + } else { + state.phase = SessionPhase::Inserting; + true + } + }; + if !proceed_to_insert { + log::info!( + "[coord] cancel detected before insert — discarding output (chars={})", + polished.chars().count() + ); + restore_prepared_windows_ime_session(inner, current_session_id); + return Ok(()); + } + + let focus_target = inner.state.lock().focus_target; + let focus_ready_for_paste = restore_focus_target_if_possible(focus_target); + let prefs = inner.prefs.get(); + let restore_clipboard = prefs.restore_clipboard_after_paste; + let allow_non_tsf_insertion_fallback = prefs.allow_non_tsf_insertion_fallback; + let status = if focus_ready_for_paste { + #[cfg(target_os = "windows")] + { + let ime_target = capture_ime_submit_target(); + insert_with_windows_ime_first( + inner, + current_session_id, + &polished, + restore_clipboard, + allow_non_tsf_insertion_fallback, + ime_target, + ) + .await + } + #[cfg(not(target_os = "windows"))] + { + inner.inserter.insert(&polished, restore_clipboard) + } + } else { + log::warn!( + "[coord] original insertion target is not foreground; copied output without paste" + ); + if allow_non_tsf_insertion_fallback { + inner.inserter.copy_fallback(&polished) + } else { + InsertStatus::Failed + } + }; + restore_prepared_windows_ime_session(inner, current_session_id); + let inserted_chars = polished.chars().count() as u32; + + // 累计每条 enabled 词条在最终文本中的命中次数。 + // 用 polished(最终插入的文本)扫描,与用户实际看到的输出一致。 + let total_hits: u64 = match inner.vocab.record_hits(&polished) { + Ok(n) => n, + Err(e) => { + log::error!("[coord] record_hits failed: {e}"); + 0 + } + }; + // 词汇本页面在打开时通常需要立即看到 hits 增长,否则用户得手动切走再切回来才刷新。 + // 命中数 > 0 时通知前端:Vocab 页面订阅 vocab:updated 即时 listVocab() 重新加载。 + if total_hits > 0 { + if let Some(app) = inner.app.lock().clone() { + let _ = app.emit("vocab:updated", total_hits); + } + } + + // polish 失败时在 history 里标记 polishFailed,让用户能在历史详情看到为什么这次输出 + // 不是预期的 mode 风格。即使失败也不丢词 — final_text 仍是原文(保留"用户的话不丢"语义)。 + let error_code = dictation_error_code( + status, + polish_error.is_some(), + focus_ready_for_paste, + allow_non_tsf_insertion_fallback, + ) + .map(str::to_string); + let tsf_required_insert_failed = error_code.as_deref() == Some("windowsImeTsfRequired"); + + let session = DictationSession { + id: Uuid::new_v4().to_string(), + created_at: Utc::now().to_rfc3339(), + raw_transcript: raw.text.clone(), + final_text: polished.clone(), + mode, + app_bundle_id: None, + app_name: None, + insert_status: status, + error_code, + duration_ms: Some(raw.duration_ms), + // 历史详情页的"X 个热词"显示:用本次实际命中次数(每个匹配实例算一次), + // 比"启用词条总数"更能反映本段口述命中了多少。u64 → u32 截断对单段听写足够。 + dictionary_entry_count: Some(total_hits.min(u32::MAX as u64) as u32), + }; + if let Err(e) = inner + .history + .append_with_retention(session, inner.prefs.get().history_retention_days) + { + log::error!("[coord] history append failed: {e}"); + } + + let done_message = if tsf_required_insert_failed { + Some("TSF 未上屏,已禁止非 TSF 兜底".to_string()) + } else if polish_error.is_some() { + // polish 失败优先告知用户,即使 insert 成功也要让用户知道这版是原文 + Some("润色失败,已插入原文".to_string()) + } else { + match status { + InsertStatus::Inserted => None, + InsertStatus::PasteSent => Some("已尝试粘贴".to_string()), + InsertStatus::CopiedFallback => Some(if cfg!(target_os = "windows") { + "已复制,请 Ctrl+V".to_string() + } else { + "已复制,请粘贴".to_string() + }), + InsertStatus::Failed => Some("插入失败".to_string()), + } + }; + + emit_capsule( + inner, + CapsuleState::Done, + 0.0, + elapsed, + done_message, + Some(inserted_chars), + ); + + { + let mut state = inner.state.lock(); + state.phase = SessionPhase::Idle; + state.focus_target = None; + } + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + + Ok(()) +} + +pub(super) fn dictation_error_code( + status: InsertStatus, + polish_failed: bool, + focus_ready_for_paste: bool, + allow_non_tsf_insertion_fallback: bool, +) -> Option<&'static str> { + if !focus_ready_for_paste && status == InsertStatus::Failed { + Some("focusRestoreFailed") + } else if cfg!(target_os = "windows") + && focus_ready_for_paste + && !allow_non_tsf_insertion_fallback + && status == InsertStatus::Failed + { + Some("windowsImeTsfRequired") + } else if polish_failed { + Some("polishFailed") + } else { + None + } +} + +pub(super) fn cancel_session(inner: &Arc) { + let Some(decision) = ({ + let mut state = inner.state.lock(); + let phase = state.phase; + let decision = begin_cancel_session_state(&mut state); + if phase == SessionPhase::Inserting { + log::info!("[coord] cancel ignored — already in Inserting phase, can't undo paste"); + } + decision + }) else { + return; + }; + + stop_recorder_for_session(inner, decision.session_id); + cancel_asr_for_session(inner, decision.session_id); + restore_prepared_windows_ime_session(inner, decision.session_id); + // Processing 阶段保持 phase=Processing 让 end_session 自己走完检查 + 收尾; + // 其他阶段直接转 Idle。 + if decision.phase != SessionPhase::Processing { + let mut state = inner.state.lock(); + finish_cancel_session_state(&mut state, decision); + } + emit_capsule(inner, CapsuleState::Cancelled, 0.0, 0, None, None); + log::info!("[coord] session cancelled (was {:?})", decision.phase); + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); +} diff --git a/openless-all/app/src-tauri/src/coordinator/qa.rs b/openless-all/app/src-tauri/src/coordinator/qa.rs new file mode 100644 index 00000000..14ca1376 --- /dev/null +++ b/openless-all/app/src-tauri/src/coordinator/qa.rs @@ -0,0 +1,124 @@ +use std::sync::Arc; + +use tauri::Emitter; + +use crate::coordinator_state::{initial_session_id, SessionId}; +use crate::selection::SelectionContext; +use crate::types::CapsuleState; + +use super::{ + begin_qa_session, cancel_qa_session, capture_frontmost_app, emit_capsule, end_qa_session, Inner, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum QaPhase { + Idle, + Recording, + Processing, +} + +pub(super) struct QaSessionState { + pub(super) phase: QaPhase, + pub(super) cancelled: bool, + pub(super) selection: Option, + pub(super) front_app: Option, + /// 用于忽略迟到的 RMS / runtime error。 + pub(super) session_id: SessionId, + /// QA 浮窗是否被用户钉住(pinned)。pinned=true 时不自动隐藏。 + pub(super) pinned: bool, + /// 浮窗是否对用户可见。Cmd+Shift+; 边沿 toggle 此 flag; + /// 主听写 hotkey(rightOption)边沿来时,看这个 flag 决定是走 QA 还是走 dictation。 + /// 详见 issue #118 v2。 + pub(super) panel_visible: bool, + /// 多轮对话累积。每轮 user→assistant 加两条;关浮窗清空。 + pub(super) messages: Vec, +} + +impl Default for QaSessionState { + fn default() -> Self { + Self { + phase: QaPhase::Idle, + cancelled: false, + selection: None, + front_app: None, + session_id: initial_session_id(), + pinned: false, + panel_visible: false, + messages: Vec::new(), + } + } +} + +pub(super) async fn handle_qa_hotkey_pressed(inner: &Arc) { + // QA hotkey(默认 Cmd+Shift+;)现在只 toggle 浮窗可见性。 + // 浮窗内的录音 / 提问由 Option 边沿驱动(handle_pressed_edge → handle_qa_option_edge)。 + let visible = inner.qa_state.lock().panel_visible; + log::info!("[coord] QA hotkey edge (panel_visible={visible})"); + if visible { + close_qa_panel(inner); + } else { + open_qa_panel(inner); + } +} + +pub(super) async fn handle_qa_option_edge(inner: &Arc) { + let phase = inner.qa_state.lock().phase; + log::info!("[coord] QA option edge (phase={phase:?})"); + match phase { + QaPhase::Idle => { + let _ = begin_qa_session(inner).await; + } + QaPhase::Recording => { + let _ = end_qa_session(inner).await; + } + // Processing 阶段再次按键忽略(避免与正在跑的 LLM 冲突)。 + QaPhase::Processing => {} + } +} + +pub(super) fn open_qa_panel(inner: &Arc) { + { + let mut state = inner.qa_state.lock(); + state.panel_visible = true; + state.phase = QaPhase::Idle; + state.cancelled = false; + state.messages.clear(); + state.selection = None; + state.front_app = capture_frontmost_app(); + } + // 先把胶囊清干净,避免主听写上一次 Done 状态残留的 message/insertedChars + // 在 QA Done 阶段被 capsule UI 错误复用("已之一粘贴这个 0" 那种)。 + emit_capsule(inner, CapsuleState::Idle, 0.0, 0, None, None); + if let Some(app) = inner.app.lock().clone() { + crate::show_qa_window(&app, "idle"); + let _ = app.emit_to( + "qa", + "qa:state", + serde_json::json!({ + "kind": "idle", + "messages": Vec::::new(), + }), + ); + } + log::info!("[coord] QA panel opened (awaiting Option to record)"); +} + +pub(super) fn close_qa_panel(inner: &Arc) { + cancel_qa_session(inner); + { + let mut state = inner.qa_state.lock(); + state.panel_visible = false; + state.pinned = false; + state.messages.clear(); + state.selection = None; + state.front_app = None; + state.phase = QaPhase::Idle; + state.cancelled = false; + } + if let Some(app) = inner.app.lock().clone() { + crate::hide_qa_window(&app); + } + // 胶囊一同收掉,避免浮窗关了胶囊还在显示。 + emit_capsule(inner, CapsuleState::Idle, 0.0, 0, None, None); + log::info!("[coord] QA panel closed, history cleared"); +} diff --git a/openless-all/app/src-tauri/src/coordinator/resources.rs b/openless-all/app/src-tauri/src/coordinator/resources.rs new file mode 100644 index 00000000..e04bcd3d --- /dev/null +++ b/openless-all/app/src-tauri/src/coordinator/resources.rs @@ -0,0 +1,191 @@ +use std::sync::Arc; + +use crate::coordinator_state::{SessionId, SessionPhase}; +use crate::recorder::Recorder; +use crate::types::CapsuleState; +use tauri::Manager; + +use super::{emit_capsule, ActiveAsr, Inner}; + +pub(super) struct SessionResource { + pub(super) session_id: SessionId, + resource: T, +} + +impl SessionResource { + pub(super) fn new(session_id: SessionId, resource: T) -> Self { + Self { + session_id, + resource, + } + } + + fn into_inner(self) -> T { + self.resource + } +} + +pub(super) struct SharedRecordingMuteState { + guard: Option, + holders: u32, +} + +impl SharedRecordingMuteState { + pub(super) fn new() -> Self { + Self { + guard: None, + holders: 0, + } + } +} + +pub(super) fn take_session_resource( + slot: &mut Option>, + session_id: SessionId, +) -> Option { + if slot + .as_ref() + .map(|resource| resource.session_id == session_id) + .unwrap_or(false) + { + slot.take().map(SessionResource::into_inner) + } else { + None + } +} + +pub(super) fn store_asr_for_session(inner: &Arc, session_id: SessionId, asr: ActiveAsr) { + *inner.asr.lock() = Some(SessionResource::new(session_id, asr)); +} + +pub(super) fn take_asr_for_session(inner: &Arc, session_id: SessionId) -> Option { + let mut slot = inner.asr.lock(); + take_session_resource(&mut slot, session_id) +} + +pub(super) fn cancel_active_asr(asr: ActiveAsr) { + match asr { + ActiveAsr::Volcengine(v) => v.cancel(), + ActiveAsr::Whisper(w) => w.cancel(), + #[cfg(target_os = "windows")] + ActiveAsr::FoundryLocalWhisper(local) => local.cancel(), + #[cfg(target_os = "macos")] + ActiveAsr::Local(local) => local.cancel(), + } +} + +pub(super) fn cancel_asr_for_session(inner: &Arc, session_id: SessionId) { + if let Some(asr) = take_asr_for_session(inner, session_id) { + cancel_active_asr(asr); + } +} + +pub(super) fn store_recorder_for_session( + inner: &Arc, + session_id: SessionId, + recorder: Recorder, +) { + *inner.recorder.lock() = Some(SessionResource::new(session_id, recorder)); +} + +pub(super) fn selected_microphone_device_name(inner: &Arc) -> Option { + let name = inner.prefs.get().microphone_device_name.trim().to_string(); + if name.is_empty() { + None + } else { + Some(name) + } +} + +pub(super) fn stop_microphone_preview_monitor(inner: &Arc, owner: &str) { + let Some(app) = inner.app.lock().as_ref().cloned() else { + return; + }; + let state = app.state::(); + let recorder = state.lock().take(); + if let Some(recorder) = recorder { + log::info!("[recorder] stopping microphone preview monitor before {owner}"); + recorder.stop(); + } +} + +pub(super) fn acquire_recording_mute(inner: &Arc, owner: &str) { + if !inner.prefs.get().mute_during_recording { + return; + } + let mut mute = inner.recording_mute.lock(); + if mute.holders == 0 { + match crate::audio_mute::AudioMuteGuard::activate() { + Ok(guard) => { + mute.guard = Some(guard); + log::info!("[audio-mute] system output muted for recording"); + } + Err(err) => { + log::warn!("[audio-mute] failed to mute output for {owner}: {err}"); + return; + } + } + } + mute.holders = mute.holders.saturating_add(1); + log::info!("[audio-mute] acquired by {owner}; holders={}", mute.holders); +} + +pub(super) fn release_recording_mute(inner: &Arc, owner: &str) { + let mut mute = inner.recording_mute.lock(); + if mute.holders == 0 { + return; + } + mute.holders -= 1; + log::info!("[audio-mute] released by {owner}; holders={}", mute.holders); + if mute.holders == 0 { + mute.guard.take(); + log::info!("[audio-mute] system output mute restored after recording"); + } +} + +pub(super) fn stop_qa_recorder(inner: &Arc) { + if let Some(rec) = inner.qa_recorder.lock().take() { + rec.stop(); + release_recording_mute(inner, "qa"); + } +} + +pub(super) fn take_recorder_for_session( + inner: &Arc, + session_id: SessionId, +) -> Option { + let mut slot = inner.recorder.lock(); + take_session_resource(&mut slot, session_id) +} + +pub(super) fn stop_recorder_for_session(inner: &Arc, session_id: SessionId) { + if let Some(recorder) = take_recorder_for_session(inner, session_id) { + recorder.stop(); + release_recording_mute(inner, "dictation"); + } +} + +pub(super) fn discard_startup_resources_for_session(inner: &Arc, session_id: SessionId) { + stop_recorder_for_session(inner, session_id); + cancel_asr_for_session(inner, session_id); +} + +pub(super) fn stop_recorder_if_pending_start_stop(inner: &Arc) { + let (should_stop, session_id) = { + let state = inner.state.lock(); + ( + state.phase == SessionPhase::Starting && state.pending_stop, + state.session_id, + ) + }; + if !should_stop { + return; + } + if let Some(rec) = take_recorder_for_session(inner, session_id) { + rec.stop(); + release_recording_mute(inner, "dictation"); + let elapsed = inner.state.lock().started_at.elapsed().as_millis() as u64; + emit_capsule(inner, CapsuleState::Transcribing, 0.0, elapsed, None, None); + log::info!("[coord] stopped recorder while ASR is still connecting"); + } +}