Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion openless-all/app/src-tauri/src/asr/local/local_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ impl LocalQwenAsr {
}
}

/// 当前缓冲音频时长(毫秒)。Coordinator 在 transcribe() 调用前读取,
/// 用来给本地 Qwen ASR 计算动态超时(max(15, ceil(audio_s × 0.6) + 10))。
/// 不消费缓冲。
pub fn buffer_duration_ms(&self) -> u64 {
(self.buffer.lock().len() as u64 / 2) * 1000 / 16_000
}

/// stop 时调用:把 buffer 的 i16 PCM 转 f32,跑流式转写,token 实时
/// 通过事件吐到前端胶囊;最终文本一起返回供 polish/insert。
pub async fn transcribe(self: Arc<Self>) -> Result<RawTranscript> {
Expand All @@ -52,7 +59,13 @@ impl LocalQwenAsr {
});
}
let duration_ms = (pcm_bytes.len() as u64 / 2) * 1000 / 16_000;
let samples_f32 = i16_le_bytes_to_f32(&pcm_bytes);
let mut samples_f32 = i16_le_bytes_to_f32(&pcm_bytes);
// `transcribe_stream` 内部按 2s chunk 切片;末 chunk < 2s 且缓冲没有
// 静默尾巴时,C 引擎不会把它当作"语音已结束",该 chunk 的转写结果
// 会被丢弃,导致末段内容消失。这里追加 0.5s 静默(@16kHz = 8000 个
// f32 零值)作为收尾信号。`duration_ms` 仍按原始缓冲长度计算(上面
// 一行),padding 不计入。
samples_f32.extend(std::iter::repeat(0.0f32).take(8_000));

// 注册 token 回调:每个稳定 token 抛 `local-asr-token` 事件。
// capsule 前端按 sessionId 累积显示。
Expand Down
47 changes: 47 additions & 0 deletions openless-all/app/src-tauri/src/coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3096,6 +3096,42 @@ mod tests {
);
}

#[test]
fn local_qwen_timeout_floors_at_global_timeout_for_short_audio() {
// 5s 录音:5 × 0.6 = 3, +10 = 13, max(15) = 15。短录音保留 15s 兜底。
assert_eq!(
local_qwen_transcribe_timeout(5.0),
std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS)
);
}

#[test]
fn local_qwen_timeout_scales_with_audio_duration() {
// 60s 录音:60 × 0.6 = 36, +10 = 46s。覆盖 RTF ≈ 0.5 的边界。
assert_eq!(
local_qwen_transcribe_timeout(60.0),
std::time::Duration::from_secs(46)
);
}

#[test]
fn local_qwen_timeout_ceils_partial_seconds() {
// 10.1s 录音:10.1 × 0.6 = 6.06, ceil = 7, +10 = 17, max(15) = 17。
assert_eq!(
local_qwen_transcribe_timeout(10.1),
std::time::Duration::from_secs(17)
);
}

#[test]
fn local_qwen_timeout_handles_zero_duration() {
// 0 时长(空 buffer 边界):0 × 0.6 = 0, +10 = 10, max(15) = 15。
assert_eq!(
local_qwen_transcribe_timeout(0.0),
std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS)
);
}

#[cfg(target_os = "windows")]
#[test]
fn foundry_release_uses_foundry_keep_loaded_preference() {
Expand Down Expand Up @@ -3597,6 +3633,17 @@ fn foundry_audio_transcribe_timeout_duration() -> std::time::Duration {
std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS)
}

/// 本地 Qwen3-ASR 的动态转写超时。固定 15 秒在长录音(≥ 30s)+ 慢机器
/// (RTF ≈ 0.3–0.5)上必然超时把整段内容丢掉。改用 max(15, ceil(audio_s
/// × 0.6) + 10):基础保留 15s 兜住短录音;长录音按音频长度的 0.6 倍 +
/// 10s 余量,覆盖 RTF ≤ 0.5 的机器。
fn local_qwen_transcribe_timeout(audio_secs: f64) -> std::time::Duration {
let secs = ((audio_secs * 0.6).ceil() as u64)
.saturating_add(10)
.max(COORDINATOR_GLOBAL_TIMEOUT_SECS);
std::time::Duration::from_secs(secs)
}

/// 检查 begin_session 的 await 间隙是否被 cancel_session 打断。
/// 必须在持有 state lock 的瞬间读,结果一拿就过期,所以用 helper 名字提醒只在
/// 「准备做下一步副作用前」用。
Expand Down
21 changes: 15 additions & 6 deletions openless-all/app/src-tauri/src/coordinator/dictation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1120,10 +1120,18 @@ pub(super) async fn end_session(inner: &Arc<Inner>) -> Result<(), String> {
#[cfg(target_os = "macos")]
ActiveAsr::Local(local) => {
debug_assert!(uses_global_timeout);
// 与 Volcengine/Whisper 一致包一层 global timeout(来自 origin/main)。
// 注:缓存命中时 transcribe 不含 load 时间;冷启动 load 已在 build_local_qwen3
// 提前完成,所以 15s 给 transcribe 本身足够。
let timeout_duration = std::time::Duration::from_secs(COORDINATOR_GLOBAL_TIMEOUT_SECS);
// 缓存命中时 transcribe 不含 load 时间;冷启动 load 已在 build_local_qwen3
// 提前完成。但 transcribe 本身受音频长度影响:用户实测 RTF ≈ 0.3,慢机
// 可达 0.5;15s 固定超时在 ≥ 30s 录音上会把整段结果丢掉。改用动态
// 超时 max(15, ceil(audio_s × 0.6) + 10),公式与单测见
// `local_qwen_transcribe_timeout`。
let audio_secs = (local.buffer_duration_ms() as f64) / 1000.0;
let timeout_duration = local_qwen_transcribe_timeout(audio_secs);
log::info!(
"[coord] local Qwen3-ASR transcribe: audio={:.2}s timeout={}s",
audio_secs,
timeout_duration.as_secs()
);
let result = tokio::time::timeout(timeout_duration, local.transcribe()).await;
inner.local_asr_cache.touch();
schedule_local_asr_release(inner);
Expand All @@ -1146,8 +1154,9 @@ pub(super) async fn end_session(inner: &Arc<Inner>) -> Result<(), String> {
}
Err(_) => {
log::error!(
"[coord] local Qwen3-ASR 全局超时 {} 秒",
COORDINATOR_GLOBAL_TIMEOUT_SECS
"[coord] local Qwen3-ASR 动态超时 {}s(音频 {:.2}s)",
timeout_duration.as_secs(),
audio_secs
);
emit_capsule(
inner,
Expand Down
Loading