From 9551786cae12223822945fb4d07cdd8bb07bb2d0 Mon Sep 17 00:00:00 2001 From: H-Chris233 Date: Sat, 9 May 2026 21:23:53 +0800 Subject: [PATCH 1/3] Enable DashScope realtime ASR without changing recorder flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 百炼传统实时识别协议可以直接消费 OpenLess 已有的 16 kHz mono PCM,所以新增独立 ASR provider 并复用现有 DeferredAsrBridge / coordinator 会话状态机,而不是引入另一条录音链路。Settings 侧增加百炼选项、默认北京 endpoint、默认 fun-asr-realtime 模型,并允许填写可选 vocabulary_id 下发已创建的百炼热词表。 Constraint: issue #384 要求接入百炼 DashScope 实时 ASR,并保持北京/新加坡 endpoint 可配置。 Constraint: 凭据仍必须走系统凭据库,新增 asr.vocabulary_id 只作为 active ASR provider 的可选 provider 设置。 Rejected: 同时实现 qwen3-asr-flash OpenAI Realtime 协议 | 协议和结果事件不同,超出最小更改范围。 Rejected: 自动从本地词汇表创建百炼热词表 | 需要远端资源生命周期和用户确认,不能静默替用户管理供应商资源。 Confidence: high Scope-risk: moderate Directive: 百炼 provider 当前走传统 /api-ws/v1/inference 协议;不要把 qwen realtime 事件格式混进同一实现。 Tested: cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml Tested: cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml --lib Tested: cargo test --manifest-path openless-all/app/src-tauri/backend-tests/Cargo.toml Tested: cd openless-all/app && npm run build Not-tested: 需要真实 DashScope API Key 和麦克风环境才能完成线上 macOS dictation 验证。 Related: #384 Co-authored-by: OmX --- openless-all/app/src-tauri/src/asr/bailian.rs | 588 ++++++++++++++++++ openless-all/app/src-tauri/src/asr/mod.rs | 2 + openless-all/app/src-tauri/src/commands.rs | 59 ++ openless-all/app/src-tauri/src/coordinator.rs | 38 +- .../src-tauri/src/coordinator/dictation.rs | 116 +++- .../src-tauri/src/coordinator/resources.rs | 1 + openless-all/app/src-tauri/src/persistence.rs | 16 +- openless-all/app/src/i18n/en.ts | 3 + openless-all/app/src/i18n/ja.ts | 3 + openless-all/app/src/i18n/ko.ts | 3 + openless-all/app/src/i18n/zh-CN.ts | 3 + openless-all/app/src/i18n/zh-TW.ts | 3 + openless-all/app/src/pages/Overview.tsx | 1 + openless-all/app/src/pages/Settings.tsx | 20 +- 14 files changed, 848 insertions(+), 8 deletions(-) create mode 100644 openless-all/app/src-tauri/src/asr/bailian.rs diff --git a/openless-all/app/src-tauri/src/asr/bailian.rs b/openless-all/app/src-tauri/src/asr/bailian.rs new file mode 100644 index 00000000..bf7ef454 --- /dev/null +++ b/openless-all/app/src-tauri/src/asr/bailian.rs @@ -0,0 +1,588 @@ +//! Alibaba Cloud Bailian / DashScope realtime ASR client. +//! +//! Uses the classic DashScope realtime recognition WebSocket protocol +//! (`/api-ws/v1/inference`) because it accepts raw 16 kHz mono PCM frames and +//! matches OpenLess' recorder output directly. The Qwen OpenAI Realtime line is +//! a different protocol and is intentionally left for a follow-up provider. + +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use futures_util::{SinkExt, StreamExt}; +use parking_lot::Mutex as ParkingMutex; +use serde_json::{json, Value}; +use tokio::net::TcpStream; +use tokio::runtime::Handle; +use tokio::sync::{mpsc, oneshot, Mutex as AsyncMutex, Notify}; +use tokio_tungstenite::tungstenite::client::IntoClientRequest; +use tokio_tungstenite::tungstenite::http::header::HeaderValue; +use tokio_tungstenite::tungstenite::Message; +use tokio_tungstenite::{connect_async, MaybeTlsStream, WebSocketStream}; +use uuid::Uuid; + +use super::{AudioConsumer, RawTranscript}; + +pub const PROVIDER_ID: &str = "bailian"; +pub const DEFAULT_ENDPOINT: &str = "wss://dashscope.aliyuncs.com/api-ws/v1/inference/"; +pub const DEFAULT_MODEL: &str = "fun-asr-realtime"; + +/// 100 ms of 16 kHz / 16-bit / mono PCM. +pub const TARGET_AUDIO_CHUNK_BYTES: usize = 3_200; +const BYTES_PER_MS: u64 = 32; +const FINAL_RESULT_TIMEOUT: Duration = Duration::from_secs(12); + +type WsStream = WebSocketStream>; +type WsSink = futures_util::stream::SplitSink; +type SharedWriter = Arc>>; + +#[derive(Clone, Debug)] +pub struct BailianCredentials { + pub api_key: String, + pub endpoint: String, + pub model: String, + pub vocabulary_id: Option, +} + +impl BailianCredentials { + pub fn normalized_endpoint(&self) -> String { + if self.endpoint.trim().is_empty() { + return DEFAULT_ENDPOINT.to_string(); + } + self.endpoint.trim().to_string() + } + + pub fn normalized_model(&self) -> String { + let model = self.model.trim(); + if model.is_empty() { + DEFAULT_MODEL.to_string() + } else { + model.to_string() + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum BailianASRError { + #[error("credentials missing")] + CredentialsMissing, + #[error("connection failed: {0}")] + ConnectionFailed(String), + #[error("send failed: {0}")] + SendFailed(String), + #[error("task failed: {0}")] + TaskFailed(String), + #[error("no final result")] + NoFinalResult, + #[error("final result timed out")] + FinalResultTimeout, +} + +enum SendItem { + Audio(Vec), + Finish(oneshot::Sender>), +} + +#[derive(Default)] +struct SyncState { + task_id: String, + pending_audio: Vec, + audio_scratch: Vec, + bytes_received: u64, + task_started: bool, + task_finished: bool, + runtime: Option, + start: Option, + final_tx: Option>>, + send_tx: Option>, + final_segments: Vec, + last_result_text: String, +} + +pub struct BailianRealtimeASR { + credentials: BailianCredentials, + state: ParkingMutex, + writer: SharedWriter, + final_rx: ParkingMutex>>>, + task_started: Arc, +} + +impl BailianRealtimeASR { + pub fn new(credentials: BailianCredentials) -> Self { + Self { + credentials, + state: ParkingMutex::new(SyncState::default()), + writer: Arc::new(AsyncMutex::new(None)), + final_rx: ParkingMutex::new(None), + task_started: Arc::new(Notify::new()), + } + } + + pub async fn open_session(self: &Arc) -> Result<(), BailianASRError> { + if self.credentials.api_key.trim().is_empty() { + return Err(BailianASRError::CredentialsMissing); + } + + let task_id = Uuid::new_v4().simple().to_string(); + let endpoint = self.credentials.normalized_endpoint(); + let mut request = endpoint + .into_client_request() + .map_err(|e| BailianASRError::ConnectionFailed(e.to_string()))?; + request.headers_mut().insert( + "Authorization", + HeaderValue::from_str(&format!("bearer {}", self.credentials.api_key.trim())) + .map_err(|e| BailianASRError::ConnectionFailed(e.to_string()))?, + ); + + let (ws, _resp) = connect_async(request) + .await + .map_err(|e| BailianASRError::ConnectionFailed(e.to_string()))?; + let (write, read) = ws.split(); + *self.writer.lock().await = Some(write); + + let (final_tx, final_rx) = oneshot::channel(); + let (send_tx, mut send_rx) = mpsc::unbounded_channel::(); + { + let mut st = self.state.lock(); + *st = SyncState::default(); + st.task_id = task_id.clone(); + st.runtime = Some(Handle::current()); + st.start = Some(Instant::now()); + st.final_tx = Some(final_tx); + st.send_tx = Some(send_tx); + } + *self.final_rx.lock() = Some(final_rx); + + let writer_for_worker = Arc::clone(&self.writer); + let task_id_for_worker = task_id.clone(); + tokio::spawn(async move { + while let Some(item) = send_rx.recv().await { + match item { + SendItem::Audio(chunk) => { + if let Err(e) = send_binary(&writer_for_worker, chunk).await { + log::error!("[bailian-asr] audio frame send failed: {e}"); + } + } + SendItem::Finish(done) => { + let result = + send_text(&writer_for_worker, finish_task_message(&task_id_for_worker)) + .await + .map_err(|e| BailianASRError::SendFailed(e.to_string())); + let _ = done.send(result); + } + } + } + }); + + send_text( + &self.writer, + run_task_message( + &task_id, + &self.credentials.normalized_model(), + self.credentials.vocabulary_id.as_deref(), + ), + ) + .await?; + + let weak_self = Arc::downgrade(self); + tokio::spawn(async move { + let mut read = read; + while let Some(msg) = read.next().await { + let Some(this) = weak_self.upgrade() else { + break; + }; + match msg { + Ok(Message::Text(text)) => { + if !this.handle_text_message(&text) { + break; + } + } + Ok(Message::Close(_)) => { + this.finish_with_partial_or_error(BailianASRError::NoFinalResult); + break; + } + Ok(_) => {} + Err(e) => { + log::error!("[bailian-asr] receive loop error: {e}"); + this.finish_with_partial_or_error(BailianASRError::ConnectionFailed( + e.to_string(), + )); + break; + } + } + } + }); + + Ok(()) + } + + pub async fn send_last_frame(&self) -> Result<(), BailianASRError> { + let started = { + let st = self.state.lock(); + st.task_started || st.task_finished + }; + if !started { + tokio::time::timeout(Duration::from_secs(5), self.task_started.notified()) + .await + .map_err(|_| BailianASRError::FinalResultTimeout)?; + } + let (send_tx, tail_chunks) = { + let mut st = self.state.lock(); + let send_tx = st.send_tx.clone(); + if !st.pending_audio.is_empty() { + let pending = std::mem::take(&mut st.pending_audio); + st.audio_scratch.extend_from_slice(&pending); + } + let tail = if st.audio_scratch.is_empty() { + Vec::new() + } else { + vec![std::mem::take(&mut st.audio_scratch)] + }; + (send_tx, tail) + }; + let Some(send_tx) = send_tx else { + return Ok(()); + }; + for chunk in tail_chunks { + let _ = send_tx.send(SendItem::Audio(chunk)); + } + let (done_tx, done_rx) = oneshot::channel(); + send_tx + .send(SendItem::Finish(done_tx)) + .map_err(|_| BailianASRError::SendFailed("send worker closed".to_string()))?; + done_rx + .await + .map_err(|_| BailianASRError::SendFailed("finish ack dropped".to_string()))? + } + + pub async fn await_final_result(&self) -> Result { + let rx = self.final_rx.lock().take(); + let Some(rx) = rx else { + return Err(BailianASRError::NoFinalResult); + }; + tokio::time::timeout(FINAL_RESULT_TIMEOUT, rx) + .await + .map_err(|_| BailianASRError::FinalResultTimeout)? + .map_err(|_| BailianASRError::NoFinalResult)? + } + + pub fn cancel(&self) { + let mut st = self.state.lock(); + st.pending_audio.clear(); + st.audio_scratch.clear(); + st.send_tx.take(); + st.final_tx.take(); + st.task_finished = true; + drop(st); + let writer = Arc::clone(&self.writer); + if let Ok(handle) = Handle::try_current() { + handle.spawn(async move { + let _ = close_writer(&writer).await; + }); + } else { + std::thread::spawn(move || { + if let Ok(rt) = tokio::runtime::Runtime::new() { + rt.block_on(async move { + let _ = close_writer(&writer).await; + }); + } + }); + } + } + + fn handle_text_message(&self, text: &str) -> bool { + let value: Value = match serde_json::from_str(text) { + Ok(v) => v, + Err(e) => { + log::warn!("[bailian-asr] invalid json event: {e}"); + return true; + } + }; + let event = value + .get("header") + .and_then(|h| h.get("event")) + .and_then(Value::as_str) + .unwrap_or_default(); + match event { + "task-started" => { + self.mark_task_started(); + true + } + "result-generated" => { + self.record_result(&value); + true + } + "task-finished" => { + self.finish_success(); + false + } + "task-failed" => { + let message = value + .get("header") + .and_then(|h| h.get("error_message")) + .and_then(Value::as_str) + .unwrap_or("task failed") + .to_string(); + self.finish_error(BailianASRError::TaskFailed(message)); + false + } + _ => true, + } + } + + fn mark_task_started(&self) { + let (send_tx, chunks) = { + let mut st = self.state.lock(); + st.task_started = true; + if !st.pending_audio.is_empty() { + let pending = std::mem::take(&mut st.pending_audio); + st.audio_scratch.extend_from_slice(&pending); + } + let send_tx = st.send_tx.clone(); + let chunks = drain_audio_chunks(&mut st.audio_scratch); + (send_tx, chunks) + }; + if let Some(tx) = send_tx { + for chunk in chunks { + let _ = tx.send(SendItem::Audio(chunk)); + } + } + self.task_started.notify_waiters(); + } + + fn record_result(&self, value: &Value) { + let sentence = value + .get("payload") + .and_then(|p| p.get("output")) + .and_then(|o| o.get("sentence")); + let Some(sentence) = sentence else { + return; + }; + let Some(text) = sentence.get("text").and_then(Value::as_str) else { + return; + }; + let trimmed = text.trim(); + if trimmed.is_empty() { + return; + } + let is_sentence_final = sentence.get("end_time").is_some(); + let mut st = self.state.lock(); + st.last_result_text = trimmed.to_string(); + if is_sentence_final && st.final_segments.last().map(|s| s.as_str()) != Some(trimmed) { + st.final_segments.push(trimmed.to_string()); + } + } + + fn finish_success(&self) { + let (tx, text, duration_ms) = { + let mut st = self.state.lock(); + if st.task_finished { + return; + } + st.task_finished = true; + st.send_tx.take(); + let text = if st.final_segments.is_empty() { + st.last_result_text.clone() + } else { + st.final_segments.join("") + }; + let duration_ms = if st.bytes_received > 0 { + st.bytes_received / BYTES_PER_MS + } else { + st.start + .map(|start| start.elapsed().as_millis() as u64) + .unwrap_or_default() + }; + (st.final_tx.take(), text, duration_ms) + }; + if let Some(tx) = tx { + let _ = tx.send(Ok(RawTranscript { text, duration_ms })); + } + self.close_on_runtime(); + } + + fn finish_with_partial_or_error(&self, error: BailianASRError) { + let has_partial = { + let st = self.state.lock(); + !st.last_result_text.trim().is_empty() || !st.final_segments.is_empty() + }; + if has_partial { + self.finish_success(); + } else { + self.finish_error(error); + } + } + + fn finish_error(&self, error: BailianASRError) { + let tx = { + let mut st = self.state.lock(); + if st.task_finished { + return; + } + st.task_finished = true; + st.send_tx.take(); + st.final_tx.take() + }; + if let Some(tx) = tx { + let _ = tx.send(Err(error)); + } + self.close_on_runtime(); + } + + fn close_on_runtime(&self) { + let writer = Arc::clone(&self.writer); + if let Some(handle) = self.state.lock().runtime.clone() { + handle.spawn(async move { + let _ = close_writer(&writer).await; + }); + } + } +} + +impl AudioConsumer for BailianRealtimeASR { + fn consume_pcm_chunk(&self, pcm: &[u8]) { + if pcm.is_empty() { + return; + } + let (send_tx, chunks) = { + let mut st = self.state.lock(); + st.bytes_received = st.bytes_received.saturating_add(pcm.len() as u64); + if !st.task_started { + st.pending_audio.extend_from_slice(pcm); + return; + } + st.audio_scratch.extend_from_slice(pcm); + let chunks = drain_audio_chunks(&mut st.audio_scratch); + (st.send_tx.clone(), chunks) + }; + if let Some(tx) = send_tx { + for chunk in chunks { + let _ = tx.send(SendItem::Audio(chunk)); + } + } + } +} + +fn drain_audio_chunks(buffer: &mut Vec) -> Vec> { + let mut chunks = Vec::new(); + while buffer.len() >= TARGET_AUDIO_CHUNK_BYTES { + chunks.push(buffer.drain(..TARGET_AUDIO_CHUNK_BYTES).collect()); + } + chunks +} + +fn run_task_message(task_id: &str, model: &str, vocabulary_id: Option<&str>) -> String { + let mut parameters = json!({ + "sample_rate": 16000, + "format": "pcm" + }); + if let Some(vocabulary_id) = vocabulary_id.map(str::trim).filter(|id| !id.is_empty()) { + parameters["vocabulary_id"] = Value::String(vocabulary_id.to_string()); + } + + json!({ + "header": { + "action": "run-task", + "task_id": task_id, + "streaming": "duplex" + }, + "payload": { + "task_group": "audio", + "task": "asr", + "function": "recognition", + "model": model, + "parameters": parameters, + "input": {} + } + }) + .to_string() +} + +fn finish_task_message(task_id: &str) -> String { + json!({ + "header": { + "action": "finish-task", + "task_id": task_id, + "streaming": "duplex" + }, + "payload": { "input": {} } + }) + .to_string() +} + +async fn send_text(writer: &SharedWriter, text: String) -> Result<(), BailianASRError> { + let mut guard = writer.lock().await; + let Some(ws) = guard.as_mut() else { + return Err(BailianASRError::ConnectionFailed( + "websocket writer not available".to_string(), + )); + }; + ws.send(Message::Text(text)) + .await + .map_err(|e| BailianASRError::SendFailed(e.to_string())) +} + +async fn send_binary(writer: &SharedWriter, data: Vec) -> Result<(), BailianASRError> { + let mut guard = writer.lock().await; + let Some(ws) = guard.as_mut() else { + return Err(BailianASRError::ConnectionFailed( + "websocket writer not available".to_string(), + )); + }; + ws.send(Message::Binary(data)) + .await + .map_err(|e| BailianASRError::SendFailed(e.to_string())) +} + +async fn close_writer(writer: &SharedWriter) -> Result<(), BailianASRError> { + let mut guard = writer.lock().await; + if let Some(mut ws) = guard.take() { + let _ = ws.close().await; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn credentials_apply_default_endpoint_and_model() { + let creds = BailianCredentials { + api_key: "sk-test".to_string(), + endpoint: "".to_string(), + model: "".to_string(), + vocabulary_id: None, + }; + assert_eq!(creds.normalized_endpoint(), DEFAULT_ENDPOINT); + assert_eq!(creds.normalized_model(), DEFAULT_MODEL); + } + + #[test] + fn run_task_message_uses_pcm_16k() { + let value: Value = + serde_json::from_str(&run_task_message("abc", DEFAULT_MODEL, None)).unwrap(); + assert_eq!(value["header"]["action"], "run-task"); + assert_eq!(value["payload"]["model"], DEFAULT_MODEL); + assert_eq!(value["payload"]["parameters"]["sample_rate"], 16000); + assert_eq!(value["payload"]["parameters"]["format"], "pcm"); + assert!(value["payload"]["parameters"]["vocabulary_id"].is_null()); + } + + #[test] + fn run_task_message_includes_optional_vocabulary_id() { + let value: Value = + serde_json::from_str(&run_task_message("abc", DEFAULT_MODEL, Some(" vocab-123 "))) + .unwrap(); + assert_eq!(value["payload"]["parameters"]["vocabulary_id"], "vocab-123"); + } + + #[test] + fn drain_audio_chunks_keeps_tail_buffered() { + let mut buffer = vec![1u8; TARGET_AUDIO_CHUNK_BYTES * 2 + 17]; + let chunks = drain_audio_chunks(&mut buffer); + assert_eq!(chunks.len(), 2); + assert_eq!(chunks[0].len(), TARGET_AUDIO_CHUNK_BYTES); + assert_eq!(chunks[1].len(), TARGET_AUDIO_CHUNK_BYTES); + assert_eq!(buffer.len(), 17); + } +} diff --git a/openless-all/app/src-tauri/src/asr/mod.rs b/openless-all/app/src-tauri/src/asr/mod.rs index 203cdea9..4433556d 100644 --- a/openless-all/app/src-tauri/src/asr/mod.rs +++ b/openless-all/app/src-tauri/src/asr/mod.rs @@ -5,12 +5,14 @@ //! `frame.rs` (binary frame codec) and the session lifecycle in //! `volcengine.rs`. +pub mod bailian; mod frame; pub mod local; pub mod volcengine; pub mod wav; pub mod whisper; +pub use bailian::{BailianCredentials, BailianRealtimeASR}; pub use volcengine::{VolcengineCredentials, VolcengineStreamingASR}; pub use whisper::WhisperBatchASR; diff --git a/openless-all/app/src-tauri/src/commands.rs b/openless-all/app/src-tauri/src/commands.rs index 723d9ea6..9a79386f 100644 --- a/openless-all/app/src-tauri/src/commands.rs +++ b/openless-all/app/src-tauri/src/commands.rs @@ -381,6 +381,9 @@ fn asr_configured_for_provider(provider: &str, snap: &CredentialsSnapshot) -> bo // 本地 ASR 不依赖云端凭据。 return true; } + if provider == crate::asr::bailian::PROVIDER_ID { + return configured(&snap.asr_api_key); + } configured(&snap.asr_endpoint) && configured(&snap.asr_model) } @@ -539,6 +542,11 @@ pub async fn validate_provider_credentials(kind: String) -> Result Result { + if kind == "asr" && CredentialsVault::get_active_asr() == crate::asr::bailian::PROVIDER_ID { + return Ok(ProviderModelsResult { + models: vec![crate::asr::bailian::DEFAULT_MODEL.to_string()], + }); + } let config = read_openai_provider_config(&kind)?; fetch_provider_models(&config) .await @@ -619,6 +627,10 @@ async fn validate_asr_provider() -> Result<(), String> { return Ok(()); } + if active_asr == crate::asr::bailian::PROVIDER_ID { + return validate_bailian_asr_provider().await; + } + let config = read_openai_provider_config("asr")?; let model = CredentialsVault::get(CredentialAccount::AsrModel) .map_err(|e| e.to_string())? @@ -627,6 +639,44 @@ async fn validate_asr_provider() -> Result<(), String> { validate_asr_transcription(&config, model.trim()).await } +async fn validate_bailian_asr_provider() -> Result<(), String> { + let api_key = CredentialsVault::get(CredentialAccount::AsrApiKey) + .map_err(|e| e.to_string())? + .unwrap_or_default(); + if api_key.trim().is_empty() { + return Err("API Key 为空".to_string()); + } + let endpoint = CredentialsVault::get(CredentialAccount::AsrEndpoint) + .map_err(|e| e.to_string())? + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| crate::asr::bailian::DEFAULT_ENDPOINT.to_string()); + let model = CredentialsVault::get(CredentialAccount::AsrModel) + .map_err(|e| e.to_string())? + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| crate::asr::bailian::DEFAULT_MODEL.to_string()); + let vocabulary_id = CredentialsVault::get(CredentialAccount::AsrVocabularyId) + .map_err(|e| e.to_string())? + .filter(|s| !s.trim().is_empty()); + let asr = std::sync::Arc::new(crate::asr::BailianRealtimeASR::new( + crate::asr::BailianCredentials { + api_key, + endpoint, + model, + vocabulary_id, + }, + )); + asr.open_session().await.map_err(|e| e.to_string())?; + crate::asr::AudioConsumer::consume_pcm_chunk( + &*asr, + &vec![0u8; crate::asr::bailian::TARGET_AUDIO_CHUNK_BYTES], + ); + asr.send_last_frame().await.map_err(|e| e.to_string())?; + asr.await_final_result() + .await + .map(|_| ()) + .map_err(|e| e.to_string()) +} + fn active_asr_is_keyless_for_validation(provider: &str) -> bool { provider == crate::asr::local::PROVIDER_ID || active_foundry_asr_is_supported(provider) } @@ -821,6 +871,7 @@ fn parse_account(s: &str) -> Result { "asr.api_key" => Ok(CredentialAccount::AsrApiKey), "asr.endpoint" => Ok(CredentialAccount::AsrEndpoint), "asr.model" => Ok(CredentialAccount::AsrModel), + "asr.vocabulary_id" => Ok(CredentialAccount::AsrVocabularyId), _ => Err(format!("unknown account: {s}")), } } @@ -1784,6 +1835,10 @@ mod tests { ..snapshot() }; assert!(!asr_configured_for_provider("whisper", &whisper_key_only)); + assert!(asr_configured_for_provider( + crate::asr::bailian::PROVIDER_ID, + &whisper_key_only + )); let whisper_keyless_ready = CredentialsSnapshot { asr_endpoint: Some("https://api.openai.com/v1".into()), @@ -1794,6 +1849,10 @@ mod tests { "whisper", &whisper_keyless_ready )); + assert!(!asr_configured_for_provider( + crate::asr::bailian::PROVIDER_ID, + &whisper_keyless_ready + )); assert!(asr_configured_for_provider( crate::asr::local::PROVIDER_ID, diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index 5eb88e0c..801bbfae 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -19,8 +19,8 @@ use uuid::Uuid; #[cfg(target_os = "windows")] use crate::asr::local::{foundry, FoundryLocalRuntime, FoundryLocalWhisperAsr}; use crate::asr::{ - DictionaryHotword, RawTranscript, VolcengineCredentials, VolcengineStreamingASR, - WhisperBatchASR, + BailianCredentials, BailianRealtimeASR, DictionaryHotword, RawTranscript, + VolcengineCredentials, VolcengineStreamingASR, WhisperBatchASR, }; use crate::combo_hotkey::{ComboHotkeyError, ComboHotkeyEvent, ComboHotkeyMonitor}; use crate::coordinator_state::{ @@ -69,6 +69,7 @@ use resources::{ enum ActiveAsr { Volcengine(Arc), Whisper(Arc), + Bailian(Arc), #[cfg(target_os = "windows")] FoundryLocalWhisper(Arc), /// 本地 Qwen3-ASR;只在 macOS + 模型已下载时可达。 @@ -1785,7 +1786,7 @@ fn ensure_asr_credentials() -> Result<(), String> { return Ok(()); } - if is_whisper_compatible_provider(&active_asr) { + if is_whisper_compatible_provider(&active_asr) || is_bailian_provider(&active_asr) { let api_key = CredentialsVault::get(CredentialAccount::AsrApiKey) .ok() .flatten() @@ -1918,6 +1919,10 @@ fn is_whisper_compatible_provider(id: &str) -> bool { matches!(id, "whisper" | "siliconflow" | "zhipu" | "groq") } +fn is_bailian_provider(id: &str) -> bool { + id == crate::asr::bailian::PROVIDER_ID +} + fn apply_chinese_script_preference(text: &str, pref: ChineseScriptPreference) -> String { if text.is_empty() { return String::new(); @@ -2100,6 +2105,33 @@ fn read_whisper_credentials() -> (String, String, String) { (api_key, base_url, model) } +fn read_bailian_credentials() -> BailianCredentials { + let api_key = CredentialsVault::get(CredentialAccount::AsrApiKey) + .ok() + .flatten() + .unwrap_or_default(); + let endpoint = CredentialsVault::get(CredentialAccount::AsrEndpoint) + .ok() + .flatten() + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| crate::asr::bailian::DEFAULT_ENDPOINT.to_string()); + let model = CredentialsVault::get(CredentialAccount::AsrModel) + .ok() + .flatten() + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| crate::asr::bailian::DEFAULT_MODEL.to_string()); + let vocabulary_id = CredentialsVault::get(CredentialAccount::AsrVocabularyId) + .ok() + .flatten() + .filter(|s| !s.trim().is_empty()); + BailianCredentials { + api_key, + endpoint, + model, + vocabulary_id, + } +} + fn read_volc_credentials() -> VolcengineCredentials { let app_id = CredentialsVault::get(CredentialAccount::VolcengineAppKey) .ok() diff --git a/openless-all/app/src-tauri/src/coordinator/dictation.rs b/openless-all/app/src-tauri/src/coordinator/dictation.rs index a45a0c5f..5cc6105e 100644 --- a/openless-all/app/src-tauri/src/coordinator/dictation.rs +++ b/openless-all/app/src-tauri/src/coordinator/dictation.rs @@ -216,7 +216,77 @@ pub(super) async fn begin_session(inner: &Arc) -> Result<(), String> { return Ok(()); } - if is_whisper_compatible_provider(&active_asr) { + if is_bailian_provider(&active_asr) { + let asr = Arc::new(BailianRealtimeASR::new(read_bailian_credentials())); + let bridge = Arc::new(DeferredAsrBridge::new()); + let consumer: Arc = bridge.clone(); + store_asr_for_session( + inner, + current_session_id, + ActiveAsr::Bailian(Arc::clone(&asr)), + ); + start_recorder_for_starting(inner, current_session_id, &active_asr, consumer)?; + + if let Err(e) = asr.open_session().await { + log::error!("[coord] open Bailian ASR session failed: {e}"); + match startup_race_status_for_starting(inner, current_session_id) { + StartupRaceStatus::StaleContinuation => { + log::info!( + "[coord] stale Bailian ASR open_session error from session {current_session_id} — ignoring" + ); + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + return Ok(()); + } + StartupRaceStatus::CancelRaced => { + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + StartupRaceStatus::ActiveStarting => {} + } + discard_startup_resources_for_session(inner, current_session_id); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + 0, + Some(format!("ASR 连接失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + match startup_race_status_for_starting(inner, current_session_id) { + StartupRaceStatus::ActiveStarting => {} + StartupRaceStatus::CancelRaced => { + log::info!("[coord] cancel raced during Bailian ASR open_session — aborting begin"); + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + set_phase_idle_if_session_matches(inner, current_session_id); + return Ok(()); + } + StartupRaceStatus::StaleContinuation => { + log::info!( + "[coord] stale Bailian ASR open_session continuation from session {current_session_id} — ignoring" + ); + asr.cancel(); + discard_startup_resources_for_session(inner, current_session_id); + restore_prepared_windows_ime_session(inner, current_session_id); + return Ok(()); + } + } + let target: Arc = asr; + let flushed_bytes = bridge.attach(target); + log::info!("[coord] Bailian ASR connected; flushed {flushed_bytes} deferred audio bytes"); + finish_starting_session(inner, current_session_id).await; + } else if is_whisper_compatible_provider(&active_asr) { let (api_key, base_url, model) = read_whisper_credentials(); // 用户辞書の有効フレーズを Whisper の `prompt` に流し込む。固有名詞や // 専門用語の同音・近形誤認識を ASR 段階で抑える。Polish LLM 側には @@ -627,6 +697,50 @@ pub(super) async fn end_session(inner: &Arc) -> Result<(), String> { } } } + ActiveAsr::Bailian(asr) => { + debug_assert!(uses_global_timeout); + if let Err(e) = asr.send_last_frame().await { + log::error!("[coord] Bailian send last frame failed: {e}"); + } + let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS); + match tokio::time::timeout(timeout_duration, asr.await_final_result()).await { + Ok(Ok(r)) => r, + Ok(Err(e)) => { + log::error!("[coord] Bailian await final failed: {e}"); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some(format!("识别失败: {e}")), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err(e.to_string()); + } + Err(_) => { + log::error!( + "[coord] Bailian 全局超时 {} 秒", + COORDINATOR_GLOBAL_TIMEOUT_SECS + ); + asr.cancel(); + emit_capsule( + inner, + CapsuleState::Error, + 0.0, + elapsed, + Some("识别超时".to_string()), + None, + ); + restore_prepared_windows_ime_session(inner, current_session_id); + inner.state.lock().phase = SessionPhase::Idle; + schedule_capsule_idle(inner, CAPSULE_AUTO_HIDE_DELAY_MS); + return Err("bailian global timeout".to_string()); + } + } + } #[cfg(target_os = "windows")] ActiveAsr::FoundryLocalWhisper(local) => { debug_assert!(!uses_global_timeout); diff --git a/openless-all/app/src-tauri/src/coordinator/resources.rs b/openless-all/app/src-tauri/src/coordinator/resources.rs index e04bcd3d..14d60062 100644 --- a/openless-all/app/src-tauri/src/coordinator/resources.rs +++ b/openless-all/app/src-tauri/src/coordinator/resources.rs @@ -67,6 +67,7 @@ pub(super) fn cancel_active_asr(asr: ActiveAsr) { match asr { ActiveAsr::Volcengine(v) => v.cancel(), ActiveAsr::Whisper(w) => w.cancel(), + ActiveAsr::Bailian(b) => b.cancel(), #[cfg(target_os = "windows")] ActiveAsr::FoundryLocalWhisper(local) => local.cancel(), #[cfg(target_os = "macos")] diff --git a/openless-all/app/src-tauri/src/persistence.rs b/openless-all/app/src-tauri/src/persistence.rs index a70362e5..d2175dd4 100644 --- a/openless-all/app/src-tauri/src/persistence.rs +++ b/openless-all/app/src-tauri/src/persistence.rs @@ -166,7 +166,7 @@ fn read_or_default Deserialize<'de> + Default>(path: &Path) -> Resul // "version": 1, // "active": { "asr": "", "llm": "" }, // "providers": { -// "asr": { "": { "appKey", "accessKey", "resourceId", "apiKey", "baseURL", "model" } }, +// "asr": { "": { "appKey", "accessKey", "resourceId", "apiKey", "baseURL", "model", "vocabularyId" } }, // "llm": { "": { "displayName", "apiKey", "baseURL", "model", "temperature", "extraHeaders" } } // } // } @@ -244,6 +244,8 @@ struct CredsAsrEntry { accessKey: Option, #[serde(skip_serializing_if = "Option::is_none")] resourceId: Option, + #[serde(skip_serializing_if = "Option::is_none")] + vocabularyId: Option, } impl CredsAsrEntry { @@ -254,6 +256,7 @@ impl CredsAsrEntry { && self.appKey.as_deref().unwrap_or("").is_empty() && self.accessKey.as_deref().unwrap_or("").is_empty() && self.resourceId.as_deref().unwrap_or("").is_empty() + && self.vocabularyId.as_deref().unwrap_or("").is_empty() } } @@ -532,7 +535,7 @@ fn migrate_legacy_sources_for_update() -> Result { fn load_credentials() -> CredsRoot { match load_keyring_credentials() { Ok(Some(root)) => { - // 不在这里调 remove_legacy_keyring_credentials() —— 它内部对 9 个 + // 不在这里调 remove_legacy_keyring_credentials() —— 它内部对每个 // 旧 account 各做一次 keyring delete,每次 delete 在 macOS Keychain // 上仍要触发 ACL 检查。第一次成功 load 时 legacy entries 通常已经 // 被 migrate_legacy_sources_for_update 清理过了;这里若再无脑跑, @@ -632,6 +635,7 @@ fn lookup_account(root: &CredsRoot, account: CredentialAccount) -> Option asr.and_then(|e| pick(&e.apiKey)), CredentialAccount::AsrEndpoint => asr.and_then(|e| pick(&e.baseURL)), CredentialAccount::AsrModel => asr.and_then(|e| pick(&e.model)), + CredentialAccount::AsrVocabularyId => asr.and_then(|e| pick(&e.vocabularyId)), } } @@ -676,6 +680,10 @@ fn write_account(root: &mut CredsRoot, account: CredentialAccount, value: Option let entry = root.providers.asr.entry(asr_id).or_default(); entry.model = normalized; } + CredentialAccount::AsrVocabularyId => { + let entry = root.providers.asr.entry(asr_id).or_default(); + entry.vocabularyId = normalized; + } } } @@ -983,6 +991,8 @@ pub enum CredentialAccount { AsrEndpoint, /// Active ASR provider's model name. AsrModel, + /// Active ASR provider's optional hotword vocabulary ID. + AsrVocabularyId, } impl CredentialAccount { @@ -1000,6 +1010,7 @@ impl CredentialAccount { CredentialAccount::AsrApiKey => "asr.api_key", CredentialAccount::AsrEndpoint => "asr.endpoint", CredentialAccount::AsrModel => "asr.model", + CredentialAccount::AsrVocabularyId => "asr.vocabulary_id", } } @@ -1014,6 +1025,7 @@ impl CredentialAccount { CredentialAccount::AsrApiKey, CredentialAccount::AsrEndpoint, CredentialAccount::AsrModel, + CredentialAccount::AsrVocabularyId, ] } } diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts index 17feb523..084df369 100644 --- a/openless-all/app/src/i18n/en.ts +++ b/openless-all/app/src/i18n/en.ts @@ -353,6 +353,7 @@ export const en: typeof zhCN = { codingPlanX: 'CodingPlanX', custom: 'Custom', asrVolcengine: 'Volcengine bigasr', + asrBailian: 'Alibaba Bailian realtime ASR', asrSiliconflow: 'SiliconFlow SenseVoice', asrZhipu: 'Zhipu GLM-ASR', asrGroq: 'Groq Whisper-large-v3', @@ -378,6 +379,8 @@ export const en: typeof zhCN = { apiKeyLabel: 'API Key', baseUrlLabel: 'Base URL', modelLabel: 'Model', + bailianVocabularyIdLabel: 'Hotword Vocabulary ID (optional)', + bailianVocabularyIdNote: 'If you have created a DashScope hotword vocabulary, enter its vocab-... ID. Leave blank to skip hotwords.', appIdLabel: 'App ID', accessKeyLabel: 'Access Key', resourceIdLabel: 'Resource ID', diff --git a/openless-all/app/src/i18n/ja.ts b/openless-all/app/src/i18n/ja.ts index b5b3ec28..e3e8684a 100644 --- a/openless-all/app/src/i18n/ja.ts +++ b/openless-all/app/src/i18n/ja.ts @@ -355,6 +355,7 @@ export const ja: typeof zhCN = { codingPlanX: 'CodingPlanX', custom: 'カスタム', asrVolcengine: 'Volcengine bigasr', + asrBailian: 'Alibaba Bailian リアルタイム ASR', asrSiliconflow: 'SiliconFlow SenseVoice', asrZhipu: 'Zhipu GLM-ASR', asrGroq: 'Groq Whisper-large-v3', @@ -380,6 +381,8 @@ export const ja: typeof zhCN = { apiKeyLabel: 'API キー', baseUrlLabel: 'エンドポイント', modelLabel: 'モデル', + bailianVocabularyIdLabel: 'ホットワード Vocabulary ID(任意)', + bailianVocabularyIdNote: 'DashScope でホットワード辞書を作成済みの場合は vocab-... ID を入力します。空欄なら送信しません。', appIdLabel: 'App ID(アプリケーション ID)', accessKeyLabel: 'Access Key', resourceIdLabel: 'Resource ID', diff --git a/openless-all/app/src/i18n/ko.ts b/openless-all/app/src/i18n/ko.ts index 9518e2af..086f5f1c 100644 --- a/openless-all/app/src/i18n/ko.ts +++ b/openless-all/app/src/i18n/ko.ts @@ -355,6 +355,7 @@ export const ko: typeof zhCN = { codingPlanX: 'CodingPlanX', custom: '사용자 정의', asrVolcengine: 'Volcengine bigasr', + asrBailian: 'Alibaba Bailian 실시간 ASR', asrSiliconflow: 'SiliconFlow SenseVoice', asrZhipu: 'Zhipu GLM-ASR', asrGroq: 'Groq Whisper-large-v3', @@ -380,6 +381,8 @@ export const ko: typeof zhCN = { apiKeyLabel: 'API 키', baseUrlLabel: '엔드포인트', modelLabel: '모델', + bailianVocabularyIdLabel: '핫워드 Vocabulary ID(선택)', + bailianVocabularyIdNote: 'DashScope에서 핫워드 사전을 만들었다면 vocab-... ID를 입력하세요. 비워 두면 핫워드를 전송하지 않습니다.', appIdLabel: 'App ID(애플리케이션 ID)', accessKeyLabel: 'Access Key', resourceIdLabel: 'Resource ID', diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts index 265449a2..c025274d 100644 --- a/openless-all/app/src/i18n/zh-CN.ts +++ b/openless-all/app/src/i18n/zh-CN.ts @@ -351,6 +351,7 @@ export const zhCN = { codingPlanX: 'CodingPlanX', custom: '自定义', asrVolcengine: '火山引擎 bigasr', + asrBailian: '阿里云百炼实时 ASR', asrSiliconflow: '硅基流动 SenseVoice', asrZhipu: '智谱 GLM-ASR', asrGroq: 'Groq Whisper-large-v3', @@ -376,6 +377,8 @@ export const zhCN = { apiKeyLabel: 'API 密钥', baseUrlLabel: '接口地址', modelLabel: '模型', + bailianVocabularyIdLabel: '热词 Vocabulary ID(可选)', + bailianVocabularyIdNote: '如已在百炼创建热词表,可填写 vocab-...;留空则不下发热词。', appIdLabel: 'App ID(应用 ID)', accessKeyLabel: 'Access Key', resourceIdLabel: '资源 ID', diff --git a/openless-all/app/src/i18n/zh-TW.ts b/openless-all/app/src/i18n/zh-TW.ts index 21dc2d80..4ab3b5a7 100644 --- a/openless-all/app/src/i18n/zh-TW.ts +++ b/openless-all/app/src/i18n/zh-TW.ts @@ -353,6 +353,7 @@ export const zhTW: typeof zhCN = { codingPlanX: 'CodingPlanX', custom: '自定義', asrVolcengine: '火山引擎 bigasr', + asrBailian: '阿里雲百煉即時 ASR', asrSiliconflow: '硅基流動 SenseVoice', asrZhipu: '智譜 GLM-ASR', asrGroq: 'Groq Whisper-large-v3', @@ -378,6 +379,8 @@ export const zhTW: typeof zhCN = { apiKeyLabel: 'API 密鑰', baseUrlLabel: '接口地址', modelLabel: '模型', + bailianVocabularyIdLabel: '熱詞 Vocabulary ID(可選)', + bailianVocabularyIdNote: '如已在百煉建立熱詞表,可填寫 vocab-...;留空則不下發熱詞。', appIdLabel: 'App ID(應用 ID)', accessKeyLabel: 'Access Key', resourceIdLabel: '資源 ID', diff --git a/openless-all/app/src/pages/Overview.tsx b/openless-all/app/src/pages/Overview.tsx index e80427ff..8316bb68 100644 --- a/openless-all/app/src/pages/Overview.tsx +++ b/openless-all/app/src/pages/Overview.tsx @@ -25,6 +25,7 @@ interface OverviewProps { const ASR_NAME_KEY_BY_ID: Record = { volcengine: 'asrVolcengine', + bailian: 'asrBailian', siliconflow: 'asrSiliconflow', zhipu: 'asrZhipu', groq: 'asrGroq', diff --git a/openless-all/app/src/pages/Settings.tsx b/openless-all/app/src/pages/Settings.tsx index 60b9ede3..565578f8 100644 --- a/openless-all/app/src/pages/Settings.tsx +++ b/openless-all/app/src/pages/Settings.tsx @@ -1182,13 +1182,15 @@ type LlmPresetId = typeof LLM_PRESETS[number]['id']; const ASR_DEFAULT_RESOURCE_ID = 'volc.seedasr.sauc.duration'; -// `volcengine` 走自建流式客户端;其余走 OpenAI 兼容 `/audio/transcriptions` -// (`coordinator.rs::is_whisper_compatible_provider`)。新增兼容厂商: +// `volcengine` / `bailian` 走自建流式客户端;其余走 OpenAI 兼容 +// `/audio/transcriptions`(`coordinator.rs::is_whisper_compatible_provider`)。 +// 新增兼容厂商: // 1. 在这里加一项 `{ id, nameKey, baseUrl, model }`; // 2. `coordinator.rs::is_whisper_compatible_provider` 加同名 id; // 3. 在 i18n 的 `settings.providers.presets.` 加文案。 const ASR_PRESETS = [ { id: 'volcengine', nameKey: 'asrVolcengine', baseUrl: '', model: '' }, + { id: 'bailian', nameKey: 'asrBailian', baseUrl: 'wss://dashscope.aliyuncs.com/api-ws/v1/inference/', model: 'fun-asr-realtime' }, { id: 'siliconflow', nameKey: 'asrSiliconflow', baseUrl: 'https://api.siliconflow.cn/v1', model: 'FunAudioLLM/SenseVoiceSmall' }, { id: 'zhipu', nameKey: 'asrZhipu', baseUrl: 'https://open.bigmodel.cn/api/paas/v4', model: 'glm-asr-2512' }, { id: 'groq', nameKey: 'asrGroq', baseUrl: 'https://api.groq.com/openai/v1', model: 'whisper-large-v3-turbo' }, @@ -1393,6 +1395,20 @@ function ProvidersSection() { defaultValue={asrPreset?.baseUrl || undefined} /> + {committedAsrProvider === 'bailian' && ( + <> + +
+ {t('settings.providers.bailianVocabularyIdNote')} +
+ + )} setAsrModelRevision(v => v + 1)} /> )} From 00f150a0910c7b4f5edbd2253a0a61aa4b743b03 Mon Sep 17 00:00:00 2001 From: H-Chris233 Date: Sat, 9 May 2026 21:59:55 +0800 Subject: [PATCH 2/3] Prevent stale Bailian startup resources after connect failure Bailian startup stores the ASR instance before opening the remote session so recorder startup audio can be buffered. If the websocket setup then fails in the active startup path, the provider must be cancelled before the coordinator returns to idle. The partial-result fallback remains intentional and is now documented as matching the existing Volcengine no-loss behavior.\n\nConstraint: OpenLess ASR paths prefer not losing already recognized user speech.\nRejected: Treat every websocket close with partial text as failure | would diverge from Volcengine fallback semantics.\nConfidence: high\nScope-risk: narrow\nTested: cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml\nTested: cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml --lib\nTested: cargo test --manifest-path openless-all/app/src-tauri/backend-tests/Cargo.toml\nCo-authored-by: OmX --- openless-all/app/src-tauri/src/asr/bailian.rs | 1 + openless-all/app/src-tauri/src/coordinator/dictation.rs | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/openless-all/app/src-tauri/src/asr/bailian.rs b/openless-all/app/src-tauri/src/asr/bailian.rs index bf7ef454..aa2ecfab 100644 --- a/openless-all/app/src-tauri/src/asr/bailian.rs +++ b/openless-all/app/src-tauri/src/asr/bailian.rs @@ -406,6 +406,7 @@ impl BailianRealtimeASR { !st.last_result_text.trim().is_empty() || !st.final_segments.is_empty() }; if has_partial { + // 与 Volcengine 保持一致:连接异常但已有 partial 时优先兜底返回,避免丢失用户已识别出的内容。 self.finish_success(); } else { self.finish_error(error); diff --git a/openless-all/app/src-tauri/src/coordinator/dictation.rs b/openless-all/app/src-tauri/src/coordinator/dictation.rs index 5cc6105e..b250410b 100644 --- a/openless-all/app/src-tauri/src/coordinator/dictation.rs +++ b/openless-all/app/src-tauri/src/coordinator/dictation.rs @@ -246,7 +246,9 @@ pub(super) async fn begin_session(inner: &Arc) -> Result<(), String> { set_phase_idle_if_session_matches(inner, current_session_id); return Ok(()); } - StartupRaceStatus::ActiveStarting => {} + StartupRaceStatus::ActiveStarting => { + asr.cancel(); + } } discard_startup_resources_for_session(inner, current_session_id); emit_capsule( From 0a2b6bd14c207ff643ed202e7f85a777643b7f39 Mon Sep 17 00:00:00 2001 From: H-Chris233 Date: Sat, 9 May 2026 23:30:39 +0800 Subject: [PATCH 3/3] Avoid missed Bailian task-start notifications Bailian uses Notify::notify_waiters when the remote task starts. send_last_frame must register the Notified future before checking the started flag so a task-started event cannot land between the check and waiter registration.\n\nConstraint: DashScope stop should not falsely time out when task-started races with user stop.\nRejected: Replace Notify with polling | broader and less direct than the Tokio enable pattern.\nConfidence: high\nScope-risk: narrow\nTested: cargo check --manifest-path openless-all/app/src-tauri/Cargo.toml\nTested: cargo test --manifest-path openless-all/app/src-tauri/Cargo.toml --lib asr::bailian::tests\nCo-authored-by: OmX --- openless-all/app/src-tauri/src/asr/bailian.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/openless-all/app/src-tauri/src/asr/bailian.rs b/openless-all/app/src-tauri/src/asr/bailian.rs index aa2ecfab..484a9125 100644 --- a/openless-all/app/src-tauri/src/asr/bailian.rs +++ b/openless-all/app/src-tauri/src/asr/bailian.rs @@ -216,12 +216,15 @@ impl BailianRealtimeASR { } pub async fn send_last_frame(&self) -> Result<(), BailianASRError> { - let started = { + let started = self.task_started.notified(); + tokio::pin!(started); + started.as_mut().enable(); + let ready = { let st = self.state.lock(); st.task_started || st.task_finished }; - if !started { - tokio::time::timeout(Duration::from_secs(5), self.task_started.notified()) + if !ready { + tokio::time::timeout(Duration::from_secs(5), started) .await .map_err(|_| BailianASRError::FinalResultTimeout)?; }