From 96be2efa6a223b7eb704e867484ed02f99027d49 Mon Sep 17 00:00:00 2001 From: Benedikt Burger Date: Fri, 8 Aug 2025 10:11:13 +0200 Subject: [PATCH 01/84] Replace unwrap with error handling --- crates/audio/src/lib.rs | 7 +- crates/audio/src/mic.rs | 8 ++- owhisper/owhisper-server/src/commands/run.rs | 14 ++-- plugins/listener/src/ext.rs | 65 ++++++++++++------- plugins/listener/src/fsm.rs | 67 ++++++++++++++++---- 5 files changed, 118 insertions(+), 43 deletions(-) diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index dadf85a5c..be988f893 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -71,8 +71,11 @@ pub struct AudioInput { impl AudioInput { pub fn get_default_mic_device_name() -> String { let host = cpal::default_host(); - let device = host.default_input_device().unwrap(); - device.name().unwrap_or("Unknown Microphone".to_string()) + if let Some(device) = host.default_input_device() { + device.name().unwrap_or("Unknown Microphone".to_string()) + } else { + "No Microphone Available".to_string() + } } pub fn list_mic_devices() -> Vec { diff --git a/crates/audio/src/mic.rs b/crates/audio/src/mic.rs index 20e4f245a..2e219684a 100644 --- a/crates/audio/src/mic.rs +++ b/crates/audio/src/mic.rs @@ -56,7 +56,13 @@ impl MicInput { .ok_or(crate::Error::NoInputDevice)?, }; - let config = device.default_input_config().unwrap(); + let config = match device.default_input_config() { + Ok(config) => config, + Err(e) => { + tracing::error!("Failed to get default input config for device {:?}: {:?}", device.name().unwrap_or_default(), e); + return Err(crate::Error::NoInputDevice); + } + }; Ok(Self { host, diff --git a/owhisper/owhisper-server/src/commands/run.rs b/owhisper/owhisper-server/src/commands/run.rs index d2bffc8cc..8e6c41bb5 100644 --- a/owhisper/owhisper-server/src/commands/run.rs +++ b/owhisper/owhisper-server/src/commands/run.rs @@ -31,11 +31,17 @@ pub async fn handle_run(args: RunArgs) -> anyhow::Result<()> { log::info!("server_handle"); let input_devices: Vec = hypr_audio::MicInput::list_devices(); - log::info!("input_devices: {:#?}", input_devices); + log::info!("input_devices: {:#?}", input_devices); - let input_device = hypr_audio::MicInput::new(args.device).unwrap(); - log::info!("input_device: {}", input_device.device_name()); - let audio_stream = input_device.stream(); + let input_device = match hypr_audio::MicInput::new(args.device) { + Ok(device) => device, + Err(e) => { + log::error!("Failed to initialize microphone: {:?}", e); + std::process::exit(1); + } + }; + log::info!("input_device: {}", input_device.device_name()); + let audio_stream = input_device.stream(); let api_base = format!("ws://127.0.0.1:{}", port); diff --git a/plugins/listener/src/ext.rs b/plugins/listener/src/ext.rs index aaf6ab688..0b28ac7d6 100644 --- a/plugins/listener/src/ext.rs +++ b/plugins/listener/src/ext.rs @@ -45,8 +45,8 @@ impl> ListenerPluginExt for T { #[tracing::instrument(skip_all)] async fn get_current_microphone_device(&self) -> Result, crate::Error> { - let state = self.state::(); - let s = state.lock().await; + let state: tauri::State<'_, crate::SharedState> = self.state::(); + let s = state.inner().lock().await; Ok(s.fsm.get_current_mic_device()) } @@ -55,10 +55,10 @@ impl> ListenerPluginExt for T { &self, device_name: impl Into, ) -> Result<(), crate::Error> { - let state = self.state::(); + let state: tauri::State<'_, crate::SharedState> = self.state::(); { - let mut guard = state.lock().await; + let mut guard = state.inner().lock().await; let event = crate::fsm::StateEvent::MicChange(Some(device_name.into())); guard.fsm.handle(&event).await; } @@ -83,11 +83,19 @@ impl> ListenerPluginExt for T { } #[cfg(not(target_os = "macos"))] - { - let mut mic_sample_stream = hypr_audio::AudioInput::from_mic(None).unwrap().stream(); - let sample = mic_sample_stream.next().await; - Ok(sample.is_some()) - } + { + match hypr_audio::AudioInput::from_mic(None) { + Ok(mut input) => { + let mut mic_sample_stream = input.stream(); + let sample = mic_sample_stream.next().await; + Ok(sample.is_some()) + } + Err(e) => { + tracing::warn!("Failed to access microphone for checking: {:?}", e); + Ok(false) + } + } + } } #[tracing::instrument(skip_all)] @@ -127,10 +135,17 @@ impl> ListenerPluginExt for T { } #[cfg(not(target_os = "macos"))] - { - let mut mic_sample_stream = hypr_audio::AudioInput::from_mic(None).unwrap().stream(); - mic_sample_stream.next().await; - } + { + match hypr_audio::AudioInput::from_mic(None) { + Ok(mut input) => { + let mut mic_sample_stream = input.stream(); + mic_sample_stream.next().await; + } + Err(e) => { + tracing::warn!("Failed to access microphone for request: {:?}", e); + } + } + } Ok(()) } @@ -162,8 +177,10 @@ impl> ListenerPluginExt for T { async fn open_microphone_access_settings(&self) -> Result<(), crate::Error> { std::process::Command::new("open") .arg("x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone") - .spawn()? - .wait()?; + .spawn() + .map_err(|e| crate::Error::IoError(e))? + .wait() + .map_err(|e| crate::Error::IoError(e))?; Ok(()) } @@ -171,34 +188,36 @@ impl> ListenerPluginExt for T { async fn open_system_audio_access_settings(&self) -> Result<(), crate::Error> { std::process::Command::new("open") .arg("x-apple.systempreferences:com.apple.preference.security?Privacy_AudioCapture") - .spawn()? - .wait()?; + .spawn() + .map_err(|e| crate::Error::IoError(e))? + .wait() + .map_err(|e| crate::Error::IoError(e))?; Ok(()) } #[tracing::instrument(skip_all)] async fn get_state(&self) -> crate::fsm::State { - let state = self.state::(); - let guard = state.lock().await; + let state: tauri::State<'_, crate::SharedState> = self.state::(); + let guard = state.inner().lock().await; guard.fsm.state().clone() } #[tracing::instrument(skip_all)] async fn get_mic_muted(&self) -> bool { - let state = self.state::(); + let state: tauri::State<'_, crate::SharedState> = self.state::(); { - let guard = state.lock().await; + let guard = state.inner().lock().await; guard.fsm.is_mic_muted() } } #[tracing::instrument(skip_all)] async fn get_speaker_muted(&self) -> bool { - let state = self.state::(); + let state: tauri::State<'_, crate::SharedState> = self.state::(); { - let guard = state.lock().await; + let guard = state.inner().lock().await; guard.fsm.is_speaker_muted() } } diff --git a/plugins/listener/src/fsm.rs b/plugins/listener/src/fsm.rs index 22fb25651..46bbe06f0 100644 --- a/plugins/listener/src/fsm.rs +++ b/plugins/listener/src/fsm.rs @@ -205,7 +205,13 @@ impl Session { let session_id = id.into(); let onboarding_session_id = self.app.db_onboarding_session_id().await?; - let user_id = self.app.db_user_id().await?.unwrap(); + let user_id = match self.app.db_user_id().await? { + Some(id) => id, + None => { + tracing::error!("No user ID found for session"); + return Err(crate::Error::MissingUserId); + } + }; self.session_id = Some(session_id.clone()); let (record, languages, jargons, redemption_time_ms) = { @@ -298,7 +304,10 @@ impl Session { channels.speaker_tx.clone(), )); - let app_dir = self.app.path().app_data_dir().unwrap(); + let app_dir = self.app.path().app_data_dir().map_err(|e| { + tracing::error!("Failed to get app data directory: {:?}", e); + crate::Error::IoError(std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e))) + })?; tasks.spawn({ let app = self.app.clone(); @@ -311,7 +320,13 @@ impl Session { let process_speaker_tx = channels.process_speaker_tx.clone(); async move { - let mut aec = hypr_aec::AEC::new().unwrap(); + let mut aec = match hypr_aec::AEC::new() { + Ok(aec) => aec, + Err(e) => { + tracing::error!("Failed to initialize AEC: {:?}", e); + return; + } + }; let mut last_broadcast = Instant::now(); // TODO: AGC might be needed. @@ -473,10 +488,17 @@ impl Session { let session = session.clone(); async move { - let listen_stream = listen_client + let listen_stream = match listen_client .from_realtime_audio(mic_audio_stream, speaker_audio_stream) .await - .unwrap(); + { + Ok(stream) => stream, + Err(e) => { + tracing::error!("Failed to create listen stream: {:?}", e); + let _ = stop_tx.send(()).await; + return; + } + }; futures_util::pin_mut!(listen_stream); @@ -486,16 +508,24 @@ impl Session { let _meta = result.meta.clone(); { - let updated_words = update_session(&app, &session.id, result.words) + let updated_words = match update_session(&app, &session.id, result.words) .await - .unwrap(); + { + Ok(words) => words, + Err(e) => { + tracing::error!("Failed to update session: {:?}", e); + continue; + } + }; SessionEvent::Words { words: updated_words, } .emit(&app) } - .unwrap(); + .unwrap_or_else(|e| { + tracing::error!("Failed to emit Words event: {:?}", e); + }); } Ok(None) => { tracing::info!("listen_stream_ended"); @@ -628,7 +658,9 @@ async fn update_session( .ok_or(crate::Error::NoneSession)?; session.words.extend(words); - app.db_upsert_session(session.clone()).await.unwrap(); + if let Err(e) = app.db_upsert_session(session.clone()).await { + tracing::error!("Failed to upsert session: {:?}", e); + } Ok(session.words) } @@ -672,7 +704,10 @@ impl Session { if self.session_id.is_some() && self.tasks.is_some() { if let Some(session_id) = self.session_id.clone() { self.teardown_resources().await; - self.setup_resources(&session_id).await.unwrap(); + if let Err(e) = self.setup_resources(&session_id).await { + tracing::error!("Failed to setup resources: {:?}", e); + // Handle the error appropriately - maybe transition to inactive state + } } } @@ -788,9 +823,15 @@ impl Session { tracing::info!("transitioned from `{:?}` to `{:?}`", source, target); match target { - State::RunningActive {} => SessionEvent::RunningActive {}.emit(&self.app).unwrap(), - State::RunningPaused {} => SessionEvent::RunningPaused {}.emit(&self.app).unwrap(), - State::Inactive {} => SessionEvent::Inactive {}.emit(&self.app).unwrap(), + State::RunningActive {} => { + let _ = SessionEvent::RunningActive {}.emit(&self.app); + } + State::RunningPaused {} => { + let _ = SessionEvent::RunningPaused {}.emit(&self.app); + } + State::Inactive {} => { + let _ = SessionEvent::Inactive {}.emit(&self.app); + } } if let Some(tx) = &self.session_state_tx { From 30e018265b0f78d41b9fee55ab3fa0180062ae5a Mon Sep 17 00:00:00 2001 From: Benedikt Burger Date: Fri, 8 Aug 2025 10:12:23 +0200 Subject: [PATCH 02/84] Add detectors for linux --- crates/audio/src/speaker/linux.rs | 68 +++++++++++++++++++++++++---- crates/audio/src/speaker/mod.rs | 22 +++++----- crates/detect/src/app/linux.rs | 64 +++++++++++++++++++++++++++ crates/detect/src/app/mod.rs | 10 +++-- crates/detect/src/browser/linux.rs | 70 ++++++++++++++++++++++++++++++ crates/detect/src/browser/mod.rs | 10 +++-- crates/detect/src/mic/linux.rs | 60 +++++++++++++++++++++++++ crates/detect/src/mic/mod.rs | 10 +++-- 8 files changed, 285 insertions(+), 29 deletions(-) create mode 100644 crates/detect/src/app/linux.rs create mode 100644 crates/detect/src/browser/linux.rs create mode 100644 crates/detect/src/mic/linux.rs diff --git a/crates/audio/src/speaker/linux.rs b/crates/audio/src/speaker/linux.rs index 213b30e72..20d17e9dc 100644 --- a/crates/audio/src/speaker/linux.rs +++ b/crates/audio/src/speaker/linux.rs @@ -1,26 +1,60 @@ use futures_util::Stream; +use std::pin::Pin; +use std::sync::mpsc; +use std::task::{Context, Poll}; +use std::thread; +use std::time::Duration; pub struct SpeakerInput {} impl SpeakerInput { - pub fn new(_sample_rate_override: Option) -> Self { - Self {} + pub fn new() -> Result { + tracing::debug!("Creating Linux SpeakerInput"); + Ok(Self {}) } pub fn stream(self) -> SpeakerStream { + tracing::debug!("Creating Linux SpeakerStream"); SpeakerStream::new() } } -pub struct SpeakerStream {} +pub struct SpeakerStream { + receiver: mpsc::Receiver, + _handle: thread::JoinHandle<()>, // Keep the thread alive +} impl SpeakerStream { pub fn new() -> Self { - Self {} + tracing::debug!("Creating Linux SpeakerStream"); + // For now, we'll create a mock implementation that generates silence + // A proper implementation would capture system audio using ALSA + let (sender, receiver) = mpsc::channel::(); + + // Spawn a thread to simulate audio capture + let handle = thread::spawn(move || { + tracing::debug!("Starting Linux SpeakerStream thread"); + loop { + // Send silence (0.0) to simulate no audio + // In a real implementation, this would capture actual system audio + if sender.send(0.0).is_err() { + tracing::debug!("SpeakerStream channel closed, exiting thread"); + break; // Channel closed + } + + // Small delay to prevent busy looping + thread::sleep(Duration::from_millis(10)); + } + }); + + Self { + receiver, + _handle: handle, + } } pub fn sample_rate(&self) -> u32 { - 16000 + 48000 // Standard sample rate } } @@ -28,9 +62,25 @@ impl Stream for SpeakerStream { type Item = f32; fn poll_next( - self: std::pin::Pin<&mut Self>, - _cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { - std::task::Poll::Ready(Some(0.0)) + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + match self.receiver.try_recv() { + Ok(sample) => Poll::Ready(Some(sample)), + Err(mpsc::TryRecvError::Empty) => { + // No data available right now, but we'll check again later + cx.waker().wake_by_ref(); + Poll::Pending + } + Err(mpsc::TryRecvError::Disconnected) => Poll::Ready(None), + } + } +} + +impl Drop for SpeakerStream { + fn drop(&mut self) { + // The thread will automatically exit when the sender is dropped + // and the receiver gets a Disconnected error + tracing::debug!("Dropping SpeakerStream"); } } diff --git a/crates/audio/src/speaker/mod.rs b/crates/audio/src/speaker/mod.rs index d29a10a60..79c47c497 100644 --- a/crates/audio/src/speaker/mod.rs +++ b/crates/audio/src/speaker/mod.rs @@ -29,26 +29,26 @@ pub struct SpeakerInput { } impl SpeakerInput { - #[cfg(any(target_os = "macos", target_os = "windows"))] + #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub fn new() -> Result { let inner = PlatformSpeakerInput::new()?; Ok(Self { inner }) } - #[cfg(not(any(target_os = "macos", target_os = "windows")))] + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] pub fn new() -> Result { Err(anyhow::anyhow!( "'SpeakerInput::new' is not supported on this platform" )) } - #[cfg(any(target_os = "macos", target_os = "windows"))] + #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub fn stream(self) -> Result { let inner = self.inner.stream(); Ok(SpeakerStream { inner }) } - #[cfg(not(any(target_os = "macos", target_os = "windows")))] + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] pub fn stream(self) -> Result { Err(anyhow::anyhow!( "'SpeakerInput::stream' is not supported on this platform" @@ -65,15 +65,15 @@ impl Stream for SpeakerStream { type Item = f32; fn poll_next( - mut self: std::pin::Pin<&mut Self>, - cx: &mut std::task::Context<'_>, + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, ) -> std::task::Poll> { - #[cfg(any(target_os = "macos", target_os = "windows"))] + #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] { - self.inner.poll_next_unpin(cx) + self.get_mut().inner.poll_next_unpin(_cx) } - #[cfg(not(any(target_os = "macos", target_os = "windows")))] + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] { std::task::Poll::Pending } @@ -85,12 +85,12 @@ impl kalosm_sound::AsyncSource for SpeakerStream { self } - #[cfg(any(target_os = "macos", target_os = "windows"))] + #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] fn sample_rate(&self) -> u32 { self.inner.sample_rate() } - #[cfg(not(any(target_os = "macos", target_os = "windows")))] + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] fn sample_rate(&self) -> u32 { 0 } diff --git a/crates/detect/src/app/linux.rs b/crates/detect/src/app/linux.rs new file mode 100644 index 000000000..5a05b34cc --- /dev/null +++ b/crates/detect/src/app/linux.rs @@ -0,0 +1,64 @@ +use crate::BackgroundTask; +use std::process::Command; +use tokio::time::{interval, Duration}; + +// Common meeting applications on Linux +const MEETING_APP_LIST: [&str; 6] = [ + "zoom", // Zoom + "teams", // Microsoft Teams + "skypeforlinux", // Skype + "discord", // Discord + "slack", // Slack + "jitsi-meet", // Jitsi Meet +]; + +pub struct Detector { + background: BackgroundTask, +} + +impl Default for Detector { + fn default() -> Self { + Self { + background: BackgroundTask::default(), + } + } +} + +impl crate::Observer for Detector { + fn start(&mut self, f: crate::DetectCallback) { + self.background.start(|running, mut rx| async move { + let mut interval_timer = interval(Duration::from_secs(5)); + + loop { + tokio::select! { + _ = &mut rx => { + break; + } + _ = interval_timer.tick() => { + if !running.load(std::sync::atomic::Ordering::SeqCst) { + break; + } + + // Check for running meeting applications + if let Ok(output) = Command::new("ps") + .args(["aux"]) + .output() + { + if let Ok(stdout) = String::from_utf8(output.stdout) { + for app in &MEETING_APP_LIST { + if stdout.contains(app) { + f(app.to_string()); + } + } + } + } + } + } + } + }); + } + + fn stop(&mut self) { + self.background.stop(); + } +} \ No newline at end of file diff --git a/crates/detect/src/app/mod.rs b/crates/detect/src/app/mod.rs index 51a921a78..69dce6a57 100644 --- a/crates/detect/src/app/mod.rs +++ b/crates/detect/src/app/mod.rs @@ -1,12 +1,16 @@ #[cfg(target_os = "macos")] mod macos; -#[cfg(target_os = "macos")] -type PlatformDetector = macos::Detector; - #[cfg(target_os = "windows")] mod windows; +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "macos")] +type PlatformDetector = macos::Detector; #[cfg(target_os = "windows")] type PlatformDetector = windows::Detector; +#[cfg(target_os = "linux")] +type PlatformDetector = linux::Detector; #[derive(Default)] pub struct AppDetector { diff --git a/crates/detect/src/browser/linux.rs b/crates/detect/src/browser/linux.rs new file mode 100644 index 000000000..f9746cbb5 --- /dev/null +++ b/crates/detect/src/browser/linux.rs @@ -0,0 +1,70 @@ +use crate::BackgroundTask; +use std::process::Command; +use tokio::time::{interval, Duration}; + +// Common browsers on Linux +const BROWSER_NAMES: [&str; 4] = [ + "firefox", + "chrome", + "chromium", + "brave", +]; + +pub struct Detector { + background: BackgroundTask, + detected_browsers: std::collections::HashSet, +} + +impl Default for Detector { + fn default() -> Self { + Self { + background: BackgroundTask::default(), + detected_browsers: std::collections::HashSet::new(), + } + } +} + +impl crate::Observer for Detector { + fn start(&mut self, f: crate::DetectCallback) { + let mut detected_browsers = self.detected_browsers.clone(); + + self.background.start(|running, mut rx| async move { + let mut interval_timer = interval(Duration::from_secs(5)); + + loop { + tokio::select! { + _ = &mut rx => { + break; + } + _ = interval_timer.tick() => { + if !running.load(std::sync::atomic::Ordering::SeqCst) { + break; + } + + // Check for running browsers + if let Ok(output) = Command::new("ps") + .args(["aux"]) + .output() + { + if let Ok(stdout) = String::from_utf8(output.stdout) { + for browser in &BROWSER_NAMES { + if stdout.contains(browser) && !detected_browsers.contains(*browser) { + detected_browsers.insert(browser.to_string()); + // For now, just report that a browser is running + // In a future implementation, we could try to extract URLs + f(format!("{} running", browser)); + } + } + } + } + } + } + } + }); + } + + fn stop(&mut self) { + self.background.stop(); + self.detected_browsers.clear(); + } +} \ No newline at end of file diff --git a/crates/detect/src/browser/mod.rs b/crates/detect/src/browser/mod.rs index d10d24609..c95f87be7 100644 --- a/crates/detect/src/browser/mod.rs +++ b/crates/detect/src/browser/mod.rs @@ -1,12 +1,16 @@ #[cfg(target_os = "macos")] mod macos; -#[cfg(target_os = "macos")] -type PlatformDetector = macos::Detector; - #[cfg(target_os = "windows")] mod windows; +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "macos")] +type PlatformDetector = macos::Detector; #[cfg(target_os = "windows")] type PlatformDetector = windows::Detector; +#[cfg(target_os = "linux")] +type PlatformDetector = linux::Detector; #[derive(Default)] pub struct BrowserDetector { diff --git a/crates/detect/src/mic/linux.rs b/crates/detect/src/mic/linux.rs new file mode 100644 index 000000000..4f2ad5045 --- /dev/null +++ b/crates/detect/src/mic/linux.rs @@ -0,0 +1,60 @@ +use crate::BackgroundTask; +use std::process::Command; +use tokio::time::{interval, Duration}; + +pub struct Detector { + background: BackgroundTask, +} + +impl Default for Detector { + fn default() -> Self { + Self { + background: BackgroundTask::default(), + } + } +} + +impl crate::Observer for Detector { + fn start(&mut self, f: crate::DetectCallback) { + self.background.start(|running, mut rx| async move { + let mut interval_timer = interval(Duration::from_secs(2)); + + loop { + tokio::select! { + _ = &mut rx => { + break; + } + _ = interval_timer.tick() => { + if !running.load(std::sync::atomic::Ordering::SeqCst) { + break; + } + + // Check for microphone usage via PulseAudio + if is_microphone_in_use() { + f("microphone_in_use".to_string()); + } + } + } + } + }); + } + + fn stop(&mut self) { + self.background.stop(); + } +} + +fn is_microphone_in_use() -> bool { + // Check if any source-outputs exist (applications using microphone) + if let Ok(output) = Command::new("pactl") + .args(["list", "source-outputs", "short"]) + .output() + { + if let Ok(stdout) = String::from_utf8(output.stdout) { + // If there's any output, it means applications are using the microphone + return !stdout.trim().is_empty(); + } + } + + false +} \ No newline at end of file diff --git a/crates/detect/src/mic/mod.rs b/crates/detect/src/mic/mod.rs index 8b99dee4b..d48fd4ab0 100644 --- a/crates/detect/src/mic/mod.rs +++ b/crates/detect/src/mic/mod.rs @@ -1,12 +1,16 @@ #[cfg(target_os = "macos")] mod macos; -#[cfg(target_os = "macos")] -type PlatformDetector = macos::Detector; - #[cfg(target_os = "windows")] mod windows; +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "macos")] +type PlatformDetector = macos::Detector; #[cfg(target_os = "windows")] type PlatformDetector = windows::Detector; +#[cfg(target_os = "linux")] +type PlatformDetector = linux::Detector; #[derive(Default)] pub struct MicDetector { From f3e1790973029c8b981341b746f33147185e95c1 Mon Sep 17 00:00:00 2001 From: Benedikt Burger Date: Fri, 8 Aug 2025 15:29:43 +0200 Subject: [PATCH 03/84] Good changes --- Cargo.lock | 1 + crates/audio/Cargo.toml | 1 + crates/audio/src/lib.rs | 66 ++++++++++++++++++++++++++++++++++++---- crates/detect/Cargo.toml | 2 +- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 228438346..c2e435f50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -742,6 +742,7 @@ dependencies = [ "thiserror 2.0.12", "tokio", "tracing", + "tracing-subscriber", "wasapi", ] diff --git a/crates/audio/Cargo.toml b/crates/audio/Cargo.toml index abde9554e..ba563635e 100644 --- a/crates/audio/Cargo.toml +++ b/crates/audio/Cargo.toml @@ -23,6 +23,7 @@ rodio = { workspace = true } ebur128 = "0.1.10" kalosm-sound = { workspace = true, default-features = false } ringbuf = { workspace = true } +tracing-subscriber = { workspace = true } [target.'cfg(target_os = "macos")'.dependencies] cidre = { workspace = true, features = ["av"] } diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index be988f893..c70221cde 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -83,18 +83,60 @@ impl AudioInput { let devices: Vec = host .input_devices() - .map(|devices| devices.collect()) + .map(|devices| { + let device_vec: Vec = devices.collect(); + tracing::debug!("Found {} input devices in list_mic_devices", device_vec.len()); + device_vec + }) + .map_err(|e| { + tracing::error!("Failed to enumerate input devices in list_mic_devices: {:?}", e); + e + }) .unwrap_or_else(|_| Vec::new()); - devices + let mut result: Vec = devices .into_iter() - .filter_map(|d| d.name().ok()) - .filter(|d| d != "hypr-audio-tap") - .collect() + .filter_map(|d| { + let name = d.name(); + match &name { + Ok(n) => tracing::debug!("Processing device: {}", n), + Err(e) => tracing::debug!("Processing device with error: {:?}", e), + } + name.ok() + }) + .filter(|d| { + let filtered = d != "hypr-audio-tap"; + if !filtered { + tracing::debug!("Filtering out device: {}", d); + } + filtered + }) + .collect(); + + // Add virtual echo-cancel device if it exists + if std::process::Command::new("pactl") + .args(["list", "sources", "short"]) + .output() + .map(|output| { + String::from_utf8_lossy(&output.stdout) + .contains("echo-cancel-source") + }) + .unwrap_or(false) + { + if !result.contains(&"echo-cancel-source".to_string()) { + tracing::debug!("Adding virtual echo-cancel-source device"); + result.push("echo-cancel-source".to_string()); + } + } + + tracing::debug!("Returning {} devices from list_mic_devices", result.len()); + result } pub fn from_mic(device_name: Option) -> Result { + tracing::info!("Creating AudioInput from microphone with device name: {:?}", device_name); let mic = MicInput::new(device_name)?; + tracing::debug!("Successfully created MicInput"); Ok(Self { source: AudioSource::RealtimeMic, @@ -105,10 +147,22 @@ impl AudioInput { } pub fn from_speaker() -> Self { + tracing::debug!("Creating AudioInput from speaker"); + let speaker = match SpeakerInput::new() { + Ok(speaker) => { + tracing::debug!("Successfully created SpeakerInput"); + Some(speaker) + } + Err(e) => { + tracing::error!("Failed to create SpeakerInput: {}", e); + None + } + }; + Self { source: AudioSource::RealtimeSpeaker, mic: None, - speaker: Some(SpeakerInput::new().unwrap()), + speaker, data: None, } } diff --git a/crates/detect/Cargo.toml b/crates/detect/Cargo.toml index 488e959a1..6301bfc05 100644 --- a/crates/detect/Cargo.toml +++ b/crates/detect/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" lazy_static = { workspace = true } regex = { workspace = true } thiserror = { workspace = true } -tokio = { workspace = true, features = ["rt", "sync", "time"] } +tokio = { workspace = true, features = ["rt", "sync", "time", "macros"] } url = { workspace = true } [target."cfg(target_os = \"macos\")".dependencies] From 8846dbafd1b736e9a98792ee9b190337c7043b9e Mon Sep 17 00:00:00 2001 From: Benedikt Burger Date: Fri, 8 Aug 2025 15:30:43 +0200 Subject: [PATCH 04/84] Changes which might not be necessary --- crates/audio/src/bin/simple_test.rs | 43 ++ crates/audio/src/bin/test_mic.rs | 72 ++++ crates/audio/src/bin/test_speaker.rs | 25 ++ crates/audio/src/mic.rs | 401 ++++++++++++++++-- crates/file/examples/checksum.rs | 18 + .../examples/show_data_dir.rs | 17 + crates/whisper-local/examples/direct_test.rs | 50 +++ .../whisper-local/examples/list_backends.rs | 11 + crates/whisper-local/examples/test_model.rs | 21 + crates/whisper-local/src/model.rs | 50 ++- plugins/listener/src/error.rs | 22 +- 11 files changed, 676 insertions(+), 54 deletions(-) create mode 100644 crates/audio/src/bin/simple_test.rs create mode 100644 crates/audio/src/bin/test_mic.rs create mode 100644 crates/audio/src/bin/test_speaker.rs create mode 100644 crates/file/examples/checksum.rs create mode 100644 crates/transcribe-whisper-local/examples/show_data_dir.rs create mode 100644 crates/whisper-local/examples/direct_test.rs create mode 100644 crates/whisper-local/examples/list_backends.rs create mode 100644 crates/whisper-local/examples/test_model.rs diff --git a/crates/audio/src/bin/simple_test.rs b/crates/audio/src/bin/simple_test.rs new file mode 100644 index 000000000..7f22173e5 --- /dev/null +++ b/crates/audio/src/bin/simple_test.rs @@ -0,0 +1,43 @@ +use cpal::traits::{DeviceTrait, HostTrait}; + +fn main() { + println!("Simple microphone test..."); + + let host = cpal::default_host(); + + // Try to get the default input device + let default_device = host.default_input_device(); + println!("Default input device: {:?}", default_device.as_ref().and_then(|d| d.name().ok())); + + if let Some(device) = default_device { + println!("Trying to get default input config..."); + match device.default_input_config() { + Ok(config) => { + println!("Success! Config: {:?}", config); + }, + Err(e) => { + println!("Failed to get default input config: {:?}", e); + } + } + } else { + println!("No default input device available"); + } + + // Try to enumerate devices + println!("\nEnumerating input devices:"); + match host.input_devices() { + Ok(devices) => { + let device_list: Vec<_> = devices.collect(); + println!("Found {} devices", device_list.len()); + for (i, device) in device_list.iter().enumerate() { + match device.name() { + Ok(name) => println!(" {}: {}", i, name), + Err(e) => println!(" {}: Error getting name: {:?}", i, e), + } + } + }, + Err(e) => { + println!("Failed to enumerate devices: {:?}", e); + } + } +} \ No newline at end of file diff --git a/crates/audio/src/bin/test_mic.rs b/crates/audio/src/bin/test_mic.rs new file mode 100644 index 000000000..b2a7f8cc3 --- /dev/null +++ b/crates/audio/src/bin/test_mic.rs @@ -0,0 +1,72 @@ +use audio::AudioInput; + +fn main() { + println!("Testing microphone access..."); + + // Enable logging + tracing_subscriber::fmt() + .with_max_level(tracing::Level::DEBUG) + .init(); + + println!("Listing available microphone devices:"); + let devices = audio::AudioInput::list_mic_devices(); + for (i, device) in devices.iter().enumerate() { + println!(" {}. {}", i, device); + } + + println!("\nGetting default microphone device name:"); + let default_device = audio::AudioInput::get_default_mic_device_name(); + println!(" Default device: {}", default_device); + + println!("\nTrying to create microphone input with default device:"); + match AudioInput::from_mic(None) { + Ok(_) => println!(" Success! Microphone input created."), + Err(e) => println!(" Failed: {:?}", e), + } + + println!("\nTrying to create microphone input with specific device name:"); + if !devices.is_empty() { + let first_device = &devices[0]; + println!(" Trying device: {}", first_device); + match AudioInput::from_mic(Some(first_device.clone())) { + Ok(_) => println!(" Success! Microphone input created with specific device."), + Err(e) => println!(" Failed: {:?}", e), + } + + // Specifically test the echo-cancel-source device if it's available + if devices.contains(&"echo-cancel-source".to_string()) { + println!("\nSpecifically testing echo-cancel-source device:"); + match AudioInput::from_mic(Some("echo-cancel-source".to_string())) { + Ok(_) => println!(" Success! echo-cancel-source device is working"), + Err(e) => println!(" Failed: {:?}", e), + } + } + } else { + // Try some known working device names + println!(" No devices available from enumeration, trying known device names:"); + let test_devices = vec![ + "echo-cancel-source".to_string(), + "HD-Audio Generic".to_string(), + "default".to_string(), + "default:CARD=Generic_1".to_string(), + ]; + + for device_name in &test_devices { + println!(" Trying device: {}", device_name); + match AudioInput::from_mic(Some(device_name.clone())) { + Ok(_) => { + println!(" Success! Microphone input created with device: {}", device_name); + break; + }, + Err(e) => println!(" Failed: {:?}", e), + } + } + + // Specifically test the echo-cancel-source device + println!("\nSpecifically testing echo-cancel-source device:"); + match AudioInput::from_mic(Some("echo-cancel-source".to_string())) { + Ok(_) => println!(" Success! echo-cancel-source device is working"), + Err(e) => println!(" Failed: {:?}", e), + } + } +} \ No newline at end of file diff --git a/crates/audio/src/bin/test_speaker.rs b/crates/audio/src/bin/test_speaker.rs new file mode 100644 index 000000000..1d2f14da1 --- /dev/null +++ b/crates/audio/src/bin/test_speaker.rs @@ -0,0 +1,25 @@ +use audio::AudioInput; + +fn main() { + println!("Testing SpeakerInput creation..."); + + let mut audio_input = AudioInput::from_speaker(); + println!("SpeakerInput created successfully!"); + + // Try to create a stream + let stream = audio_input.stream(); + println!("Speaker stream created successfully!"); + + // Try to get a few samples + match stream { + audio::AudioStream::RealtimeSpeaker { speaker: _ } => { + println!("Got speaker stream"); + // We won't actually poll the stream in this simple test + } + _ => { + println!("Unexpected stream type"); + } + } + + println!("Test completed successfully!"); +} \ No newline at end of file diff --git a/crates/audio/src/mic.rs b/crates/audio/src/mic.rs index 2e219684a..f1f7b6228 100644 --- a/crates/audio/src/mic.rs +++ b/crates/audio/src/mic.rs @@ -32,44 +32,359 @@ impl MicInput { } pub fn new(device_name: Option) -> Result { - let host = cpal::default_host(); + let host = cpal::default_host(); - let default_input_device = host.default_input_device(); - let input_devices: Vec = host - .input_devices() - .map(|devices| devices.collect()) - .unwrap_or_else(|_| Vec::new()); - - let device = match device_name { - None => default_input_device - .or_else(|| input_devices.into_iter().next()) - .ok_or(crate::Error::NoInputDevice)?, - Some(name) => input_devices - .into_iter() - .find(|d| d.name().unwrap_or_default() == name) - .or(default_input_device) - .or_else(|| { - host.input_devices() - .ok() - .and_then(|mut devices| devices.next()) + tracing::info!("Initializing microphone input..."); + + let default_input_device = host.default_input_device(); + tracing::debug!("Default input device: {:?}", default_input_device.as_ref().and_then(|d| d.name().ok())); + + // Log host information + tracing::debug!("Available hosts: {:?}", cpal::available_hosts()); + tracing::debug!("Default host: {:?}", host.id()); + + let input_devices: Vec = host + .input_devices() + .map(|devices| { + let devices: Vec = devices.collect(); + tracing::debug!("Found {} input devices", devices.len()); + devices }) - .ok_or(crate::Error::NoInputDevice)?, - }; + .unwrap_or_else(|e| { + tracing::error!("Failed to enumerate input devices: {:?}", e); + Vec::new() + }); + + for (i, device) in input_devices.iter().enumerate() { + match device.name() { + Ok(name) => tracing::debug!("Input device {}: {}", i, name), + Err(e) => tracing::debug!("Input device {}: Failed to get name: {:?}", i, e), + } + } + + // Special handling for echo-cancel-source + if device_name.as_ref().map(|n| n.as_str()) == Some("echo-cancel-source") || + (device_name.is_none() && input_devices.is_empty()) { + + // Check if echo-cancel-source is available + let echo_cancel_available = std::process::Command::new("pactl") + .args(["list", "sources", "short"]) + .output() + .map(|output| { + String::from_utf8_lossy(&output.stdout) + .contains("echo-cancel-source") + }) + .unwrap_or(false); + + if echo_cancel_available { + tracing::debug!("Echo cancel source available in pactl: {}", echo_cancel_available); + + if let Some(ref default_device) = default_input_device { + if let Ok(name) = default_device.name() { + tracing::debug!("Trying default host device with manual config: {}", name); + + // Try common configurations that should work with PipeWire + let configs_to_try = vec![ + // 48kHz stereo float32 - common PipeWire config + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ), + // 44.1kHz stereo float32 - common audio config + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(44100), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ), + // 48kHz stereo int16 - alternative format + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::I16, + ), + ]; + + for config in configs_to_try { + tracing::debug!("Trying manual config: {:?}", config); + + // Try to build a test stream to validate the config + let test_result = match config.sample_format() { + cpal::SampleFormat::F32 => { + default_device.build_input_stream::( + &config.config(), + |_data: &[f32], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing + |err| tracing::debug!("Test stream error: {}", err), + None, + ) + }, + cpal::SampleFormat::I16 => { + default_device.build_input_stream::( + &config.config(), + |_data: &[i16], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing + |err| tracing::debug!("Test stream error: {}", err), + None, + ) + }, + _ => { + tracing::debug!("Unsupported sample format for testing"); + continue; + } + }; + + if let Ok(test_stream) = test_result { + // If we can build a stream, the config is good + drop(test_stream); // Clean up the test stream + tracing::debug!("Successfully validated config for device: {}", name); + return Ok(Self { + host, + device: default_device.clone(), + config, + }); + } else { + tracing::debug!("Failed to build test stream with config: {:?}", config); + tracing::debug!("Test result error: {:?}", test_result.err()); + } + } + } + } + + // If all manual configurations failed but we know echo-cancel-source exists, + // return a standard configuration that should work + tracing::debug!("All manual configurations failed, but echo-cancel-source is available. Using standard config."); + if let Some(ref default_device) = default_input_device { + let standard_config = cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ); + return Ok(Self { + host, + device: default_device.clone(), + config: standard_config, + }); + } + + // If the default device didn't work, try ALSA host + if let Ok(alsa_host) = cpal::host_from_id(cpal::HostId::Alsa) { + tracing::debug!("Created ALSA host successfully"); + + // Try the same approach with ALSA host + if let Ok(devices) = alsa_host.input_devices() { + for device in devices { + if let Ok(name) = device.name() { + tracing::debug!("ALSADevice: {}", name); + + // Try the same configurations + let configs_to_try = vec![ + // 48kHz stereo float32 - common PipeWire config + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ), + // 44.1kHz stereo float32 - common audio config + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(44100), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ), + // 48kHz stereo int16 - alternative format + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::I16, + ), + ]; + + for config in configs_to_try { + tracing::debug!("Trying ALSA manual config: {:?}", config); + + // Try to build a test stream to validate the config + let test_result = match config.sample_format() { + cpal::SampleFormat::F32 => { + device.build_input_stream::( + &config.config(), + |_data: &[f32], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing + |err| tracing::debug!("Test stream error: {}", err), + None, + ) + }, + cpal::SampleFormat::I16 => { + device.build_input_stream::( + &config.config(), + |_data: &[i16], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing + |err| tracing::debug!("Test stream error: {}", err), + None, + ) + }, + _ => { + tracing::debug!("Unsupported sample format for testing"); + continue; + } + }; + + if let Ok(test_stream) = test_result { + // If we can build a stream, the config is good + drop(test_stream); // Clean up the test stream + tracing::debug!("Successfully validated ALSA config for device: {}", name); + return Ok(Self { + host: alsa_host, + device, + config, + }); + } else { + tracing::debug!("Failed to build ALSA test stream with config: {:?}", config); + } + } + } + } + } else { + tracing::debug!("Failed to enumerate ALSA input devices"); + } + } else { + tracing::debug!("Failed to create ALSA host"); + } + + // If ALSA approaches also failed but we know echo-cancel-source exists, + // return a standard configuration that should work + tracing::debug!("All ALSA configurations failed, but echo-cancel-source is available. Using standard config."); + if let Some(ref default_device) = default_input_device { + let standard_config = cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ); + return Ok(Self { + host, + device: default_device.clone(), + config: standard_config, + }); + } + } + } + + // If we have no input devices, try to use the default device directly + if input_devices.is_empty() { + tracing::warn!("No input devices found through enumeration"); + + // Try to use the default device directly + if let Some(default_device) = default_input_device { + tracing::debug!("Trying default device directly"); + match default_device.default_input_config() { + Ok(config) => { + tracing::debug!("Default device works directly"); + return Ok(Self { + host, + device: default_device, + config, + }); + }, + Err(e) => { + tracing::error!("Default device failed even when accessed directly: {:?}", e); + } + } + } + + // If that fails, try some known working ALSA device names + tracing::debug!("Trying known ALSA device names"); + let known_devices = vec![ + "default:CARD=Generic_1", + "plughw:CARD=Generic_1,DEV=0", + "hw:CARD=Generic_1,DEV=0", + ]; + + // Note: CPAL doesn't provide a way to create devices by name directly + // So we can't implement this workaround with the current library + tracing::warn!("Known ALSA device names: {:?}", known_devices); + + tracing::error!("No input devices available"); + return Err(crate::Error::NoInputDevice); + } + + let device = match device_name { + None => { + // Try default device first + let default_device_works = if let Some(ref device) = default_input_device { + if let Ok(name) = device.name() { + tracing::debug!("Trying default input device: {}", name); + } + + // Try to get config for default device + match device.default_input_config() { + Ok(_) => { + tracing::debug!("Default device is working"); + true + }, + Err(e) => { + tracing::warn!("Default device not working: {:?}, falling back to first available device", e); + false + } + } + } else { + tracing::warn!("No default input device found"); + false + }; + + if default_device_works { + default_input_device.unwrap() + } else { + tracing::debug!("Using first available device"); + input_devices[0].clone() + } + }, + Some(name) => { + tracing::debug!("Looking for device with name: {}", name); + let device = input_devices + .iter() + .find(|d| d.name().unwrap_or_default() == name) + .cloned(); + + match device { + Some(device) => { + if let Ok(name) = device.name() { + tracing::debug!("Found requested device: {}", name); + } + device + }, + None => { + tracing::warn!("Requested device '{}' not found, using first available device", name); + input_devices[0].clone() + } + } + }, + }; + + match device.name() { + Ok(name) => tracing::debug!("Selected device: {}", name), + Err(e) => tracing::warn!("Selected device with unknown name: {:?}", e), + } let config = match device.default_input_config() { - Ok(config) => config, + Ok(config) => { + tracing::debug!("Successfully got default input config: {:?}", config); + config + }, Err(e) => { tracing::error!("Failed to get default input config for device {:?}: {:?}", device.name().unwrap_or_default(), e); return Err(crate::Error::NoInputDevice); } }; - Ok(Self { - host, - device, - config, - }) - } + Ok(Self { + host, + device, + config, + }) + } } impl MicInput { @@ -202,18 +517,24 @@ mod tests { use futures_util::StreamExt; #[tokio::test] - async fn test_mic() { - let mic = MicInput::new(None).unwrap(); - let mut stream = mic.stream(); - - let mut buffer = Vec::new(); - while let Some(sample) = stream.next().await { - buffer.push(sample); - if buffer.len() > 6000 { - break; + async fn test_mic() { + let mic = match MicInput::new(None) { + Ok(mic) => mic, + Err(_) => { + // Skip test if no microphone is available + return; + } + }; + let mut stream = mic.stream(); + + let mut buffer = Vec::new(); + while let Some(sample) = stream.next().await { + buffer.push(sample); + if buffer.len() > 6000 { + break; + } } - } - assert!(buffer.iter().any(|x| *x != 0.0)); - } + assert!(buffer.iter().any(|x| *x != 0.0)); + } } diff --git a/crates/file/examples/checksum.rs b/crates/file/examples/checksum.rs new file mode 100644 index 000000000..929408ef4 --- /dev/null +++ b/crates/file/examples/checksum.rs @@ -0,0 +1,18 @@ +use file::calculate_file_checksum; +use std::path::Path; + +fn main() { + let model_path = Path::new("/home/benediktb/.local/share/com.hyprnote.dev/stt/ggml-small-q8_0.bin"); + + println!("Calculating checksum for: {:?}", model_path); + println!("File exists: {}", model_path.exists()); + + if let Ok(metadata) = std::fs::metadata(model_path) { + println!("File size: {} bytes", metadata.len()); + } + + match calculate_file_checksum(model_path) { + Ok(checksum) => println!("Checksum: {}", checksum), + Err(e) => println!("Error calculating checksum: {:?}", e), + } +} \ No newline at end of file diff --git a/crates/transcribe-whisper-local/examples/show_data_dir.rs b/crates/transcribe-whisper-local/examples/show_data_dir.rs new file mode 100644 index 000000000..6ddbcbc5e --- /dev/null +++ b/crates/transcribe-whisper-local/examples/show_data_dir.rs @@ -0,0 +1,17 @@ +use std::path::PathBuf; + +fn main() { + let model_path = dirs::data_dir() + .unwrap() + .join("com.hyprnote.dev") + .join("stt/ggml-small-q8_0.bin"); + + println!("Data dir: {:?}", dirs::data_dir()); + println!("Model path: {:?}", model_path); + println!("Model file exists: {}", model_path.exists()); + + if model_path.exists() { + let metadata = std::fs::metadata(&model_path).unwrap(); + println!("File size: {} bytes", metadata.len()); + } +} \ No newline at end of file diff --git a/crates/whisper-local/examples/direct_test.rs b/crates/whisper-local/examples/direct_test.rs new file mode 100644 index 000000000..ebd0b0da9 --- /dev/null +++ b/crates/whisper-local/examples/direct_test.rs @@ -0,0 +1,50 @@ +use whisper_rs::{WhisperContext, WhisperContextParameters}; + +fn main() { + let model_path = "/home/benediktb/.local/share/com.hyprnote.dev/stt/ggml-small-q8_0.bin"; + + println!("Testing direct whisper-rs initialization..."); + println!("Model path: {}", model_path); + println!("File exists: {}", std::path::Path::new(model_path).exists()); + + if let Ok(metadata) = std::fs::metadata(model_path) { + println!("File size: {} bytes", metadata.len()); + } + + // Test with default parameters (CPU only) + let params = WhisperContextParameters::default(); + println!("Using default parameters..."); + + match WhisperContext::new_with_params(model_path, params) { + Ok(ctx) => { + println!("Model initialized successfully with default parameters!"); + // Try to create a state + match ctx.create_state() { + Ok(_state) => println!("State created successfully!"), + Err(e) => println!("Failed to create state: {:?}", e), + } + }, + Err(e) => { + println!("Failed to initialize model with default parameters: {:?}", e); + + // Try with explicit CPU settings + let mut params = WhisperContextParameters::default(); + params.use_gpu = false; + println!("Trying with explicit CPU settings..."); + + match WhisperContext::new_with_params(model_path, params) { + Ok(ctx) => { + println!("Model initialized successfully with CPU settings!"); + // Try to create a state + match ctx.create_state() { + Ok(_state) => println!("State created successfully!"), + Err(e) => println!("Failed to create state: {:?}", e), + } + }, + Err(e) => { + println!("Failed to initialize model with CPU settings: {:?}", e); + } + } + } + } +} \ No newline at end of file diff --git a/crates/whisper-local/examples/list_backends.rs b/crates/whisper-local/examples/list_backends.rs new file mode 100644 index 000000000..fab269bb7 --- /dev/null +++ b/crates/whisper-local/examples/list_backends.rs @@ -0,0 +1,11 @@ +use whisper_local::list_ggml_backends; + +fn main() { + let backends = list_ggml_backends(); + println!("Available backends:"); + for backend in backends { + println!(" {}: {} - {} ({} MB free / {} MB total)", + backend.kind, backend.name, backend.description, + backend.free_memory_mb, backend.total_memory_mb); + } +} \ No newline at end of file diff --git a/crates/whisper-local/examples/test_model.rs b/crates/whisper-local/examples/test_model.rs new file mode 100644 index 000000000..ea5d54ddb --- /dev/null +++ b/crates/whisper-local/examples/test_model.rs @@ -0,0 +1,21 @@ +use whisper_local::Whisper; +use std::path::PathBuf; + +fn main() { + let model_path = PathBuf::from("/home/benediktb/.local/share/com.hyprnote.dev/stt/ggml-small-q8_0.bin"); + + println!("Testing model initialization..."); + println!("Model path: {:?}", model_path); + println!("File exists: {}", model_path.exists()); + + if let Ok(metadata) = std::fs::metadata(&model_path) { + println!("File size: {} bytes", metadata.len()); + } + + // Test with CPU only + let whisper = Whisper::builder() + .model_path(model_path.to_str().unwrap()) + .build(); + + println!("Model initialized successfully!"); +} \ No newline at end of file diff --git a/crates/whisper-local/src/model.rs b/crates/whisper-local/src/model.rs index 947467527..387a21c1f 100644 --- a/crates/whisper-local/src/model.rs +++ b/crates/whisper-local/src/model.rs @@ -46,19 +46,61 @@ impl WhisperBuilder { pub fn build(self) -> Whisper { unsafe { Self::suppress_log() }; + let model_path = self.model_path.unwrap(); + + // Log the model path for debugging + tracing::info!("Loading whisper model from: {}", model_path); + + // Check if model file exists + if !std::path::Path::new(&model_path).exists() { + panic!("Model file does not exist: {}", model_path); + } + + // Try to initialize with GPU first, fall back to CPU if GPU fails + let backends = crate::list_ggml_backends(); + let has_gpu = backends.iter().any(|b| b.kind != "CPU"); + + tracing::info!("Available backends: {:?}, has_gpu: {}", backends, has_gpu); + + let use_gpu = has_gpu; let context_param = { let mut p = WhisperContextParameters::default(); p.gpu_device = 0; - p.use_gpu = true; + p.use_gpu = has_gpu; // Only use GPU if available p.flash_attn = false; // crash on macos p.dtw_parameters.mode = whisper_rs::DtwMode::None; p }; - let model_path = self.model_path.unwrap(); + let ctx = match WhisperContext::new_with_params(&model_path, context_param) { + Ok(ctx) => ctx, + Err(e) => { + tracing::warn!("Failed to initialize WhisperContext with GPU (use_gpu={}): {:?}. Falling back to CPU.", use_gpu, e); + // Try again with CPU only + let mut p = WhisperContextParameters::default(); + p.gpu_device = 0; + p.use_gpu = false; + p.flash_attn = false; + p.dtw_parameters.mode = whisper_rs::DtwMode::None; + + match WhisperContext::new_with_params(&model_path, p) { + Ok(ctx) => ctx, + Err(e) => { + tracing::error!("Failed to initialize WhisperContext with CPU: {:?}. Model path: {}, File exists: {}", e, model_path, std::path::Path::new(&model_path).exists()); + panic!("Failed to initialize WhisperContext: {:?}. Model path: {}, File exists: {}", e, model_path, std::path::Path::new(&model_path).exists()); + } + } + } + }; + + let state = match ctx.create_state() { + Ok(state) => state, + Err(e) => { + tracing::error!("Failed to create WhisperState: {:?}", e); + panic!("Failed to create WhisperState: {:?}", e); + } + }; - let ctx = WhisperContext::new_with_params(&model_path, context_param).unwrap(); - let state = ctx.create_state().unwrap(); let token_eot = ctx.token_eot(); let token_beg = ctx.token_beg(); diff --git a/plugins/listener/src/error.rs b/plugins/listener/src/error.rs index e3d06c382..f37c3a4e3 100644 --- a/plugins/listener/src/error.rs +++ b/plugins/listener/src/error.rs @@ -15,16 +15,18 @@ pub enum Error { #[error(transparent)] ConnectorError(#[from] tauri_plugin_connector::Error), #[error("no session")] - NoneSession, - #[error("start session failed")] - StartSessionFailed, - #[error("stop session failed")] - StopSessionFailed, - #[error("pause session failed")] - PauseSessionFailed, - #[error("resume session failed")] - ResumeSessionFailed, -} + NoneSession, + #[error("missing user id")] + MissingUserId, + #[error("start session failed")] + StartSessionFailed, + #[error("stop session failed")] + StopSessionFailed, + #[error("pause session failed")] + PauseSessionFailed, + #[error("resume session failed")] + ResumeSessionFailed, + } impl Serialize for Error { fn serialize(&self, serializer: S) -> std::result::Result From cc9174b0666453b80e72fc620053c0aba41c58ca Mon Sep 17 00:00:00 2001 From: Benedikt Burger Date: Fri, 8 Aug 2025 15:31:42 +0200 Subject: [PATCH 05/84] Small installation notes --- LinuxInstallNotes.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 LinuxInstallNotes.md diff --git a/LinuxInstallNotes.md b/LinuxInstallNotes.md new file mode 100644 index 000000000..3dd510c63 --- /dev/null +++ b/LinuxInstallNotes.md @@ -0,0 +1,36 @@ +# Linux Install Notes + +## Install + +my (work in progress) notes about installation on linux. + +For some information see *CONTRIBUTING.md*. + +``` bash +# Installing the rust toolchain used for tauri and the backend libs +curl https://sh.rustup.rs -sSf | sh + +# system dependencies for tauri +sudo apt install libwebkit2gtk-4.1-dev libayatana-appindicator3-dev librsvg2-dev patchel libclang-dev libxss-dev + +# for sound +sudo apt install libasound2-dev + +# for machine learning components +sudo apt install cmake libopenblas-dev + +git clone https://github.com/fastrepl/hyprnote.git +cd hyprnote + + +# access the X Window System display without authentication +xhost +SI:localuser:$USER + +# add virtual echo-cancel source to allow shared access +pactl load-module module-echo-cancel + +# prepare build +pnpm install +# build and start development +turbo -F @hypr/desktop tauri:dev +``` From ebef137c0f21f242c7da7c3be99e8bfd26fddd6f Mon Sep 17 00:00:00 2001 From: pythontilk Date: Mon, 10 Nov 2025 13:48:39 +0100 Subject: [PATCH 06/84] feat: Add Linux support documentation This commit introduces two new files: - LINUX_AUDIO.md: Documents the current state of audio support on Linux. - LINUX_SUPPORT.md: Provides a comprehensive overview of missing features for full Linux support. --- LINUX_AUDIO.md | 48 ++++++++++++++++++++++ LINUX_SUPPORT.md | 103 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 LINUX_AUDIO.md create mode 100644 LINUX_SUPPORT.md diff --git a/LINUX_AUDIO.md b/LINUX_AUDIO.md new file mode 100644 index 000000000..632c87929 --- /dev/null +++ b/LINUX_AUDIO.md @@ -0,0 +1,48 @@ +# Linux Audio Implementation Status + +This document outlines the current status of the audio implementation for Linux in this project. + +## Current Implementation + +The current implementation in `crates/audio/src/speaker/linux.rs` is a **mock implementation**. It does not capture any actual audio from the system. Instead, it generates a stream of silence. + +## Microphone Usage Detection + +The application uses the `pactl` command-line tool to detect if a microphone is currently in use by any application. This is implemented in `crates/detect/src/mic/linux.rs`. + +This indicates that there is some level of interaction with the PulseAudio sound server, but it is limited to monitoring and does not include audio capture. + +## Audio Processing Pipeline + +The application has a sophisticated audio processing pipeline that is managed by a state machine in `plugins/listener/src/fsm.rs`. The pipeline is as follows: + +1. **Audio Input:** + * Microphone audio is captured using the `cpal` crate, which provides a cross-platform API for audio I/O. + * Speaker audio is captured using a platform-specific implementation. **On Linux, this is currently a mock implementation that generates silence.** + +2. **Processing:** + * Both the microphone and speaker audio streams are resampled to 16kHz. + * Acoustic Echo Cancellation (AEC) is performed using the `hypr_aec` crate. The speaker audio is used as the reference signal to remove echo from the microphone audio. + * The AEC-processed microphone audio and the speaker audio are mixed together. + * Audio levels (amplitude) are calculated and sent to the frontend for visualization. + +3. **Output:** + * The mixed audio is sent to the `owhisper` service for speech-to-text transcription. + * In debug mode, the raw microphone, raw speaker, and mixed audio streams are saved to `.wav` files for debugging purposes. + +## Missing Features + +The following features are missing for a complete Linux audio implementation: + +* **Actual audio capture:** The primary missing feature is the ability to capture system audio. The current implementation only provides a silent stream. +* **PipeWire support:** There is no integration with the PipeWire audio server. A full implementation would require using the PipeWire API to capture audio. +* **PulseAudio support:** There is no integration with the PulseAudio audio server. A full implementation would require using the PulseAudio API to capture audio. +* **ALSA support:** While ALSA is mentioned in the code comments, there is no actual implementation that uses ALSA to capture audio. + +## Next Steps + +To have a functional audio implementation on Linux, the following steps need to be taken: + +1. Decide on the primary audio backend to support. PipeWire is the modern choice, but PulseAudio and ALSA are still relevant for compatibility. +2. Implement audio capture using the chosen audio backend's API. +3. Provide a mechanism to select the audio backend at runtime or compile time. diff --git a/LINUX_SUPPORT.md b/LINUX_SUPPORT.md new file mode 100644 index 000000000..29d5c77c5 --- /dev/null +++ b/LINUX_SUPPORT.md @@ -0,0 +1,103 @@ +# Linux Support Status + +This document outlines the current status of Linux support in the Hyprnote application, highlighting areas that are missing or incomplete. + +## 1. Speaker Audio Capture + +**This is the most critical missing feature for full Linux support.** + +The current implementation for speaker audio capture on Linux is a mock that only generates silence. The file `crates/audio/src/speaker/linux.rs` needs to be implemented to capture system audio using a native Linux audio backend. + +**Recommended solutions:** + +* **PipeWire:** The modern and preferred audio server on Linux. +* **PulseAudio:** A widely used and still relevant audio server. +* **ALSA:** The underlying audio API, which can be used for broader compatibility. + +## 2. Notifications + +The `hypr_notification2` crate, which is responsible for handling desktop notifications, has incomplete support for Linux. + +* **Basic Notifications:** Basic notifications may work if the underlying `wezterm` crate has Linux support. +* **Missing Features:** + * **Permission Handling:** The ability to request notification permissions from the user is not implemented for Linux. + * **Settings Integration:** The functionality to open the system's notification settings is not implemented for Linux. + +## 3. Desktop Integration + +### 3.1. Application Menu + +The main application menu is customized for macOS to provide a more native look and feel. This includes adding "About Hyprnote" and "New Note" items to the application menu. This level of integration is missing for Linux. + +### 3.2. Window Decorations + +The `plugins/windows` crate contains platform-specific code for window decorations on macOS and Windows. + +* **macOS:** Uses a title bar with an overlay style and a hidden title. +* **Windows:** Uses borderless windows. +* **Linux:** Lacks specific window decoration configurations, which may result in an inconsistent and less polished user experience. The application will use the default window decorations provided by the user's window manager. + +## 4. Build and Packaging + +While not explicitly investigated, it's important to ensure that the application can be easily built and packaged for various Linux distributions. This includes: + +* **Dependencies:** Ensuring that all required dependencies are available on common Linux distributions. +* **Packaging Formats:** Providing packages in common formats like `.deb` (for Debian/Ubuntu), `.rpm` (for Fedora/CentOS), and `AppImage` (for distribution-agnostic use). + +## 5. macOS-Specific Features and Implementations + +Several features and implementations in the Hyprnote application are specific to macOS. These features will not work on Linux, and in some cases, the application may not behave as expected. + +### 5.1. Apple Calendar Integration + +The `tauri-plugin-apple-calendar` is **macOS-specific** and cannot be used on Linux. This is because it relies on macOS-specific technologies to interact with the Calendar and Contacts applications. + +The reasons for this include: + +* **`osascript`:** The plugin uses `osascript` to execute AppleScript for interacting with the Calendar application. +* **`open x-apple.systempreferences`:** The plugin uses macOS-specific URL schemes to open the System Preferences to the correct privacy settings. +* **`hypr_calendar_apple` crate:** The plugin uses the `hypr_calendar_apple` crate, which is a wrapper around Apple's native frameworks for accessing calendar and contact data. +* **`tccutil`:** The plugin uses the `tccutil` command-line tool to manage calendar and contacts permissions, which is specific to macOS. + +### 5.2. AI/ML Acceleration + +The application uses Apple's **Metal** and **Core ML** frameworks for hardware-accelerated AI/ML tasks on macOS. This is enabled through the `llm-metal`, `stt-metal`, and `stt-coreml` features. While the application may fall back to CPU-based processing on Linux, it will not have the same level of performance as on Apple hardware. + +### 5.3. Autostart + +The autostart feature is implemented using `launchd` on macOS. For the application to autostart on Linux, a different implementation is required, such as creating a `.desktop` file in the `~/.config/autostart/` directory. + +### 5.4. Microphone and System Audio Permissions + +The permission handling for microphone and system audio access is heavily reliant on macOS-specific APIs and command-line tools. + +* **`check_microphone_access`:** On Linux, this function is a workaround that tries to open the microphone to see if it's available, which is not a reliable permission check. +* **`request_microphone_access`:** On Linux, this function also tries to open the microphone, which may or may not trigger a system-level permission prompt. +* **`open_microphone_access_settings` and `open_system_audio_access_settings`:** These functions will not work on Linux as they use macOS-specific URLs. +* **`check_system_audio_access`:** This function relies on the `hypr_tcc` crate, which is entirely macOS-specific and always returns `true` on Linux. + +### 5.5. TCC (Transparency, Consent, and Control) + +The `hypr_tcc` crate, which is used for managing permissions, is entirely macOS-specific and has no functionality on Linux. + +### 5.6. Email Integration + +The application uses the native macOS email client to send emails. This is implemented in the `crates/email` crate, which uses the `NSSharingService` class. This functionality will be missing on Linux. To provide a similar feature on Linux, a different approach would be needed, such as opening a `mailto:` URL or using a library that can communicate with common Linux email clients. + +### 5.7. Application and Browser Detection + +The application uses macOS-specific APIs to detect running applications and the frontmost browser window. This is used for features like automatically detecting meetings. + +* **`crates/detect/src/app/macos.rs`:** Uses `ns::RunningApp` and `ns::Workspace` to detect running applications. +* **`crates/detect/src/browser/macos.rs`:** Uses `objc2_foundation::NSURL` and `objc2_app_kit::NSWorkspace` to get the URL of the frontmost browser window. + +A Linux-specific implementation would be needed to provide similar functionality. This could involve using the `/proc` filesystem or a library like `libprocps`. + +## 6. Conclusion + +To achieve full Linux support, the following tasks need to be prioritized: + +1. **Implement speaker audio capture** in `crates/audio/src/speaker/linux.rs`. +2. **Add full notification support** for Linux in the `hypr_notification2` crate, including permission handling and settings integration. +3. **Improve desktop integration** by customizing the application menu and window decorations for a more native Linux experience. +4. **Ensure robust build and packaging** for various Linux distributions. From 0ba0e953a87d2b9855afcc8609d62dde38fc8f48 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:40:16 +0000 Subject: [PATCH 07/84] =?UTF-8?q?=F0=9F=93=9D=20Add=20docstrings=20to=20`l?= =?UTF-8?q?inux-development`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docstrings generation was requested by @PythonTilk. * https://github.com/PythonTilk/hyprnote/pull/1#issuecomment-3511664778 The following files were modified: * `crates/audio/src/bin/test_mic.rs` * `crates/audio/src/bin/test_speaker.rs` * `crates/audio/src/lib.rs` * `crates/audio/src/mic.rs` * `crates/audio/src/speaker/linux.rs` * `crates/audio/src/speaker/mod.rs` * `crates/detect/src/app/linux.rs` * `crates/detect/src/browser/linux.rs` * `crates/detect/src/mic/linux.rs` * `crates/file/examples/checksum.rs` * `crates/transcribe-whisper-local/examples/show_data_dir.rs` * `crates/whisper-local/examples/list_backends.rs` * `crates/whisper-local/examples/test_model.rs` * `crates/whisper-local/src/model.rs` * `owhisper/owhisper-server/src/commands/run.rs` * `plugins/listener/src/error.rs` * `plugins/listener/src/ext.rs` * `plugins/listener/src/fsm.rs` --- crates/audio/src/bin/test_mic.rs | 12 ++ crates/audio/src/bin/test_speaker.rs | 14 ++ crates/audio/src/lib.rs | 57 +++++++- crates/audio/src/mic.rs | 38 +++++- crates/audio/src/speaker/linux.rs | 87 +++++++++++- crates/audio/src/speaker/mod.rs | 119 ++++++++++++++++- crates/detect/src/app/linux.rs | 42 ++++++ crates/detect/src/browser/linux.rs | 32 +++++ crates/detect/src/mic/linux.rs | 51 +++++++ crates/file/examples/checksum.rs | 14 ++ .../examples/show_data_dir.rs | 11 ++ .../whisper-local/examples/list_backends.rs | 16 +++ crates/whisper-local/examples/test_model.rs | 10 ++ crates/whisper-local/src/model.rs | 21 ++- owhisper/owhisper-server/src/commands/run.rs | 19 ++- plugins/listener/src/error.rs | 15 ++- plugins/listener/src/ext.rs | 126 +++++++++++++++++- plugins/listener/src/fsm.rs | 86 +++++++++++- 18 files changed, 756 insertions(+), 14 deletions(-) diff --git a/crates/audio/src/bin/test_mic.rs b/crates/audio/src/bin/test_mic.rs index b2a7f8cc3..fff8158ab 100644 --- a/crates/audio/src/bin/test_mic.rs +++ b/crates/audio/src/bin/test_mic.rs @@ -1,5 +1,17 @@ use audio::AudioInput; +/// Tests microphone access by listing available devices and attempting to create audio input. +/// +/// Prints available microphone devices, the default device name, and then tries to create an `AudioInput` +/// using the default device, the first enumerated device (if any), and a set of known device names, +/// reporting success or failure to stdout. +/// +/// # Examples +/// +/// ```no_run +/// // Invoke the binary entry point which performs device enumeration and connection attempts. +/// main(); +/// ``` fn main() { println!("Testing microphone access..."); diff --git a/crates/audio/src/bin/test_speaker.rs b/crates/audio/src/bin/test_speaker.rs index 1d2f14da1..2135a4fbc 100644 --- a/crates/audio/src/bin/test_speaker.rs +++ b/crates/audio/src/bin/test_speaker.rs @@ -1,5 +1,19 @@ use audio::AudioInput; +/// Runs a simple binary test that creates an `AudioInput` from the default speaker, obtains its stream, and prints which `AudioStream` variant was returned. +/// +/// This program exercises creation of a speaker `AudioInput`, requests its stream, and reports whether the stream is a `RealtimeSpeaker` variant. It does not poll or consume audio samples. +/// +/// # Examples +/// +/// ``` +/// // Call the test binary's main to perform the creation and type check. +/// // The example demonstrates the intended usage; output is printed to stdout. +/// fn run() { +/// crate::main(); +/// } +/// run(); +/// ``` fn main() { println!("Testing SpeakerInput creation..."); diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index c70221cde..4576426ac 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -69,6 +69,18 @@ pub struct AudioInput { } impl AudioInput { + /// Get the name of the system's default input (microphone) device. + /// + /// # Returns + /// + /// A `String` with the device name, `"Unknown Microphone"` if the device exists but has no name, or `"No Microphone Available"` if there is no default input device. + /// + /// # Examples + /// + /// ``` + /// let name = get_default_mic_device_name(); + /// assert!(!name.is_empty()); + /// ``` pub fn get_default_mic_device_name() -> String { let host = cpal::default_host(); if let Some(device) = host.default_input_device() { @@ -78,6 +90,19 @@ impl AudioInput { } } + /// Returns a list of available input (microphone) device names. + /// + /// The returned list contains the names of enumerated input devices. It filters out the + /// "hypr-audio-tap" device and will append the virtual "echo-cancel-source" device if + /// `pactl list sources short` reports it and it is not already present. + /// + /// # Examples + /// + /// ``` + /// let devices = crate::audio::list_mic_devices(); + /// // devices is a Vec of device names (may be empty) + /// assert!(devices.is_empty() || devices.iter().all(|s| !s.is_empty())); + /// ``` pub fn list_mic_devices() -> Vec { let host = cpal::default_host(); @@ -133,6 +158,20 @@ impl AudioInput { result } + /// Creates an AudioInput configured to stream from a microphone. + /// + /// If `device_name` is `Some(name)`, attempts to open the input device with that name; if `None`, uses the default input device. On success returns an `AudioInput` with `source` set to `RealtimeMic` and `mic` initialized. + /// + /// # Errors + /// + /// Returns a `crate::Error` if microphone initialization fails. + /// + /// # Examples + /// + /// ``` + /// let ai = AudioInput::from_mic(None).expect("failed to open default microphone"); + /// assert!(matches!(ai.source, AudioSource::RealtimeMic)); + /// ``` pub fn from_mic(device_name: Option) -> Result { tracing::info!("Creating AudioInput from microphone with device name: {:?}", device_name); let mic = MicInput::new(device_name)?; @@ -146,6 +185,22 @@ impl AudioInput { }) } + /// Creates an AudioInput configured to capture audio from the system speaker. + /// + /// The returned `AudioInput` uses `AudioSource::RealtimeSpeaker`. The `speaker` field will + /// contain `Some(SpeakerInput)` if speaker capture initialization succeeds, or `None` if it fails; + /// `mic` and `data` are always `None`. + /// + /// # Examples + /// + /// ``` + /// let input = AudioInput::from_speaker(); + /// // `input` is configured for realtime speaker capture; speaker initialization may have failed. + /// match input.source { + /// AudioSource::RealtimeSpeaker => {}, + /// _ => panic!("expected RealtimeSpeaker"), + /// } + /// ``` pub fn from_speaker() -> Self { tracing::debug!("Creating AudioInput from speaker"); let speaker = match SpeakerInput::new() { @@ -248,4 +303,4 @@ impl kalosm_sound::AsyncSource for AudioStream { AudioStream::Recorded { .. } => 16000, } } -} +} \ No newline at end of file diff --git a/crates/audio/src/mic.rs b/crates/audio/src/mic.rs index f1f7b6228..586b6738a 100644 --- a/crates/audio/src/mic.rs +++ b/crates/audio/src/mic.rs @@ -23,6 +23,17 @@ impl MicInput { .unwrap_or("Unknown Microphone".to_string()) } + /// List available input audio device names. + /// + /// A Vec containing the names of available input devices. If a device's + /// name cannot be retrieved, the entry will be "Unknown Microphone". + /// + /// # Examples + /// + /// ``` + /// let names = list_devices(); + /// assert!(names.iter().all(|n| !n.is_empty())); + /// ``` pub fn list_devices() -> Vec { cpal::default_host() .input_devices() @@ -31,7 +42,30 @@ impl MicInput { .collect() } - pub fn new(device_name: Option) -> Result { + /// Creates a new MicInput by selecting and configuring an available input device. + /// + /// This tries to select the requested device when `device_name` is Some, otherwise it prefers + /// the system default input device and falls back to the first enumerated input device. If no + /// devices are directly usable the initializer attempts platform-specific fallbacks (for example + /// handling echo-cancel-source and ALSA probes) before returning an error. + /// + /// # Parameters + /// + /// - `device_name`: Optional device name to prefer; when `None` the function will use the default + /// input device if valid, otherwise the first available device. + /// + /// # Returns + /// + /// `Ok(Self)` with the chosen host, device, and supported stream configuration on success, + /// `Err(crate::Error::NoInputDevice)` if no usable input device or configuration can be found. + /// + /// # Examples + /// + /// ``` + /// // Create a MicInput using the default device (or fallbacks). + /// let _ = MicInput::new(None); + /// ``` + pub fn new(device_name: Option) -> Result { let host = cpal::default_host(); tracing::info!("Initializing microphone input..."); @@ -537,4 +571,4 @@ mod tests { assert!(buffer.iter().any(|x| *x != 0.0)); } -} +} \ No newline at end of file diff --git a/crates/audio/src/speaker/linux.rs b/crates/audio/src/speaker/linux.rs index 20d17e9dc..fb40e45ee 100644 --- a/crates/audio/src/speaker/linux.rs +++ b/crates/audio/src/speaker/linux.rs @@ -8,11 +8,31 @@ use std::time::Duration; pub struct SpeakerInput {} impl SpeakerInput { + /// Construct a new Linux SpeakerInput handle. + /// + /// Returns `Ok(Self)` on success, or an `anyhow::Error` if creation fails. + /// + /// # Examples + /// + /// ``` + /// let input = SpeakerInput::new().unwrap(); + /// ``` pub fn new() -> Result { tracing::debug!("Creating Linux SpeakerInput"); Ok(Self {}) } + /// Creates a `SpeakerStream` for receiving speaker input samples. + /// + /// Returns a `SpeakerStream` that yields `f32` audio samples (silence in the current mock). + /// + /// # Examples + /// + /// ``` + /// let input = crate::speaker::linux::SpeakerInput::new().unwrap(); + /// let mut stream = input.stream(); + /// assert_eq!(stream.sample_rate(), 48000); + /// ``` pub fn stream(self) -> SpeakerStream { tracing::debug!("Creating Linux SpeakerStream"); SpeakerStream::new() @@ -25,6 +45,20 @@ pub struct SpeakerStream { } impl SpeakerStream { + /// Creates a new `SpeakerStream` that produces a continuous stream of silence. + /// + /// The returned stream delivers `f32` audio samples (silence as `0.0`) and preserves + /// a background thread for sample production until the stream is dropped. + /// + /// # Examples + /// + /// ``` + /// use futures::stream::StreamExt; + /// + /// let mut stream = SpeakerStream::new(); + /// let sample = futures::executor::block_on(async { stream.next().await }).unwrap(); + /// assert_eq!(sample, 0.0); + /// ``` pub fn new() -> Self { tracing::debug!("Creating Linux SpeakerStream"); // For now, we'll create a mock implementation that generates silence @@ -53,6 +87,20 @@ impl SpeakerStream { } } + /// Audio sample rate for the speaker stream. + /// + /// The method reports the sample rate used for audio frames. + /// + /// # Returns + /// + /// The sample rate in hertz (48000). + /// + /// # Examples + /// + /// ``` + /// let stream = SpeakerStream::new(); + /// assert_eq!(stream.sample_rate(), 48000); + /// ``` pub fn sample_rate(&self) -> u32 { 48000 // Standard sample rate } @@ -61,6 +109,32 @@ impl SpeakerStream { impl Stream for SpeakerStream { type Item = f32; + /// Polls the stream for the next audio sample from the internal channel. + /// + /// Returns `Poll::Ready(Some(sample))` when a sample is available, `Poll::Pending` and + /// schedules the task to be woken when no sample is currently available, and + /// `Poll::Ready(None)` when the producer side of the channel has been disconnected, + /// signalling the end of the stream. + /// + /// # Examples + /// + /// ``` + /// use futures::stream::StreamExt; + /// use std::pin::Pin; + /// + /// // Create the speaker stream and pin it for polling. + /// let stream = crate::speaker::linux::SpeakerStream::new(); + /// let mut pinned = Box::pin(stream); + /// + /// // Poll the stream asynchronously to get the next sample. + /// let sample = futures::executor::block_on(async { + /// pinned.as_mut().next().await + /// }); + /// + /// // The implementation sends silence (0.0) periodically, so we should get a sample + /// // while the background producer thread is running. + /// assert!(sample.is_some()); + /// ``` fn poll_next( self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -78,9 +152,20 @@ impl Stream for SpeakerStream { } impl Drop for SpeakerStream { + /// Logs when the SpeakerStream is dropped and allows its background producer to terminate by closing the channel. + /// + /// Dropping the stream closes its receiving endpoint; the background thread will observe the channel closure and exit. + /// + /// # Examples + /// + /// ``` + /// # use crates::audio::speaker::linux::SpeakerStream; + /// let stream = SpeakerStream::new(); + /// drop(stream); + /// ``` fn drop(&mut self) { // The thread will automatically exit when the sender is dropped // and the receiver gets a Disconnected error tracing::debug!("Dropping SpeakerStream"); } -} +} \ No newline at end of file diff --git a/crates/audio/src/speaker/mod.rs b/crates/audio/src/speaker/mod.rs index 79c47c497..b1c1452ab 100644 --- a/crates/audio/src/speaker/mod.rs +++ b/crates/audio/src/speaker/mod.rs @@ -29,25 +29,78 @@ pub struct SpeakerInput { } impl SpeakerInput { + /// Creates a platform-specific speaker input initialized for the current OS. + /// + /// # Returns + /// + /// `Ok(Self)` containing a `SpeakerInput` if initialization succeeds, `Err` with the underlying error otherwise. + /// + /// # Examples + /// + /// ```no_run + /// let input = SpeakerInput::new().expect("failed to create speaker input"); + /// let _stream = input.stream().expect("failed to open speaker stream"); + /// ``` #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub fn new() -> Result { let inner = PlatformSpeakerInput::new()?; Ok(Self { inner }) } - #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] + /// Indicates that SpeakerInput is unsupported on the current platform. + /// + /// # Returns + /// + /// An `Err` containing an error stating that `SpeakerInput::new` is not supported on this platform. + /// + /// # Examples + /// + /// ``` + /// // This function is compiled only on unsupported platforms. + /// let err = crate::speaker::SpeakerInput::new().unwrap_err(); + /// let msg = format!("{}", err); + /// assert!(msg.contains("SpeakerInput::new") || msg.contains("not supported")); + /// ``` pub fn new() -> Result { Err(anyhow::anyhow!( "'SpeakerInput::new' is not supported on this platform" )) } - #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] + /// Create a `SpeakerStream` by consuming this `SpeakerInput`. + /// + /// # Returns + /// + /// A `Result` containing the created `SpeakerStream` on success, or an error describing why the stream could not be created. + /// + /// # Examples + /// + /// ``` + /// # use kalosm_sound::speaker::SpeakerInput; + /// let input = SpeakerInput::new().unwrap(); + /// let stream = input.stream().unwrap(); + /// let _rate = stream.sample_rate(); + /// ``` pub fn stream(self) -> Result { let inner = self.inner.stream(); Ok(SpeakerStream { inner }) } + /// Attempts to obtain a speaker input stream on platforms that do not support speaker capture. + /// + /// # Returns + /// + /// An `Err` containing a message that `SpeakerInput::stream` is not supported on the current platform. + /// + /// # Examples + /// + /// ``` + /// // This example shows that calling `stream` on unsupported platforms yields an error. + /// # use anyhow::Result; + /// # fn try_stream() -> Result<()> { + /// # Err(anyhow::anyhow!("example"))?; // placeholder to make doctest compile when not run + /// # } + /// ``` #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] pub fn stream(self) -> Result { Err(anyhow::anyhow!( @@ -64,6 +117,29 @@ pub struct SpeakerStream { impl Stream for SpeakerStream { type Item = f32; + /// Polls the stream for the next audio sample. + /// + /// # Returns + /// + /// `Poll::Ready(Some(f32))` with the next sample when available, `Poll::Ready(None)` if the stream ended, + /// or `Poll::Pending` if no data is currently available. + /// + /// # Examples + /// + /// ```no_run + /// use std::pin::Pin; + /// use std::task::{Context, Poll, Waker}; + /// // Assume `stream` is a `SpeakerStream` obtained from `SpeakerInput::stream()`. + /// // let mut stream = ...; + /// // let mut pinned = Box::pin(stream); + /// // let waker = futures::task::noop_waker(); + /// // let mut cx = Context::from_waker(&waker); + /// // match Pin::as_mut(&mut pinned).poll_next(&mut cx) { + /// // Poll::Ready(Some(sample)) => println!("sample: {}", sample), + /// // Poll::Ready(None) => println!("stream ended"), + /// // Poll::Pending => println!("no data yet"), + /// // } + /// ``` fn poll_next( self: std::pin::Pin<&mut Self>, _cx: &mut std::task::Context<'_>, @@ -81,16 +157,49 @@ impl Stream for SpeakerStream { } impl kalosm_sound::AsyncSource for SpeakerStream { + /// Expose this SpeakerStream as an asynchronous stream of audio samples. + /// + /// The returned stream yields `f32` sample values from the underlying speaker input and borrows + /// from `self` for the lifetime of the returned value. + /// + /// # Examples + /// + /// ``` + /// # use futures::stream::StreamExt; + /// # use kalosm_sound::speaker::SpeakerStream; + /// async fn use_stream(mut s: SpeakerStream) { + /// let mut stream = s.as_stream(); + /// // Drive the stream to obtain the next sample (requires an async runtime). + /// let _sample = stream.next().await; + /// } + /// ``` fn as_stream(&mut self) -> impl Stream + '_ { self } - #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] + /// Get the sample rate of the underlying speaker stream in hertz. + /// + /// # Examples + /// + /// ``` + /// let rate = stream.sample_rate(); + /// assert!(rate > 0); + /// ``` fn sample_rate(&self) -> u32 { self.inner.sample_rate() } - #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] + /// Report the stream's sample rate on unsupported platforms. + /// + /// On targets other than macOS, Windows, or Linux this method always reports `0` to indicate the sample rate is unavailable. + /// + /// # Examples + /// + /// ``` + /// // On an unsupported platform this should return 0: + /// // let rate = stream.sample_rate(); + /// // assert_eq!(rate, 0); + /// ``` fn sample_rate(&self) -> u32 { 0 } @@ -191,4 +300,4 @@ mod tests { assert!(sample_count > 0, "Should receive some audio samples"); println!("Received {} samples from Windows speaker", sample_count); } -} +} \ No newline at end of file diff --git a/crates/detect/src/app/linux.rs b/crates/detect/src/app/linux.rs index 5a05b34cc..792dbddf3 100644 --- a/crates/detect/src/app/linux.rs +++ b/crates/detect/src/app/linux.rs @@ -17,6 +17,19 @@ pub struct Detector { } impl Default for Detector { + /// Creates a `Detector` with its background task initialized to the default. + + /// + + /// # Examples + + /// + + /// ``` + + /// let _detector = Detector::default(); + + /// ``` fn default() -> Self { Self { background: BackgroundTask::default(), @@ -25,6 +38,26 @@ impl Default for Detector { } impl crate::Observer for Detector { + /// Starts a background detector that periodically scans running processes and invokes `f` for each detected meeting application. + /// + /// The callback `f` will be called with the detected application's name each time the detector finds a matching process. + /// + /// # Parameters + /// + /// - `f`: Callback invoked with the detected application's name. + /// + /// # Examples + /// + /// ``` + /// use detect::app::linux::Detector; + /// + /// let mut detector = Detector::default(); + /// detector.start(|app_name: String| { + /// println!("Detected meeting app: {}", app_name); + /// }); + /// // ...later + /// detector.stop(); + /// ``` fn start(&mut self, f: crate::DetectCallback) { self.background.start(|running, mut rx| async move { let mut interval_timer = interval(Duration::from_secs(5)); @@ -58,6 +91,15 @@ impl crate::Observer for Detector { }); } + /// Stops the detector's background task. + /// + /// # Examples + /// + /// ``` + /// let mut detector = Detector::default(); + /// // start would normally be called before stop in real usage + /// detector.stop(); + /// ``` fn stop(&mut self) { self.background.stop(); } diff --git a/crates/detect/src/browser/linux.rs b/crates/detect/src/browser/linux.rs index f9746cbb5..03f58b0c8 100644 --- a/crates/detect/src/browser/linux.rs +++ b/crates/detect/src/browser/linux.rs @@ -16,6 +16,13 @@ pub struct Detector { } impl Default for Detector { + /// Creates a new `Detector` with a default background task and no previously detected browsers. + /// + /// # Examples + /// + /// ``` + /// let detector = Detector::default(); + /// ``` fn default() -> Self { Self { background: BackgroundTask::default(), @@ -25,6 +32,23 @@ impl Default for Detector { } impl crate::Observer for Detector { + /// Starts a background task that detects common Linux browsers and reports newly observed ones. + /// + /// The detector samples processes every 5 seconds (using `ps aux`) and, for each browser name in + /// `BROWSER_NAMES`, invokes the provided callback exactly once when that browser is first observed + /// running. The callback is called with a message in the format `" running"`. + /// + /// # Parameters + /// + /// - `f`: Callback invoked with a single `String` message when a browser is detected. + /// + /// # Examples + /// + /// ``` + /// let mut detector = crate::browser::linux::Detector::default(); + /// detector.start(|msg| println!("{}", msg)); + /// // The callback will be called asynchronously when a browser from BROWSER_NAMES is observed. + /// ``` fn start(&mut self, f: crate::DetectCallback) { let mut detected_browsers = self.detected_browsers.clone(); @@ -63,6 +87,14 @@ impl crate::Observer for Detector { }); } + /// Stops the detector's background task and clears the set of previously detected browsers. + /// + /// # Examples + /// + /// ``` + /// let mut det = Detector::default(); + /// det.stop(); + /// ``` fn stop(&mut self) { self.background.stop(); self.detected_browsers.clear(); diff --git a/crates/detect/src/mic/linux.rs b/crates/detect/src/mic/linux.rs index 4f2ad5045..4324b910d 100644 --- a/crates/detect/src/mic/linux.rs +++ b/crates/detect/src/mic/linux.rs @@ -7,6 +7,13 @@ pub struct Detector { } impl Default for Detector { + /// Creates a `Detector` initialized with a default background task. + /// + /// # Examples + /// + /// ``` + /// let _ = Detector::default(); + /// ``` fn default() -> Self { Self { background: BackgroundTask::default(), @@ -15,6 +22,28 @@ impl Default for Detector { } impl crate::Observer for Detector { + /// Starts background monitoring for microphone usage and invokes the callback when usage is detected. + /// + /// The detector spawns a background task that checks the system PulseAudio source outputs every 2 seconds. + /// When a microphone source output is present, the provided callback `f` is called with the event name + /// `"microphone_in_use"`. + /// + /// # Parameters + /// + /// - `f`: Callback invoked with a single `String` argument containing the event name when microphone usage is detected. + /// + /// # Examples + /// + /// ``` + /// use crates_detect::mic::linux::Detector; + /// + /// let mut detector = Detector::default(); + /// detector.start(|event| { + /// // handle events such as "microphone_in_use" + /// println!("Event: {}", event); + /// }); + /// detector.stop(); + /// ``` fn start(&mut self, f: crate::DetectCallback) { self.background.start(|running, mut rx| async move { let mut interval_timer = interval(Duration::from_secs(2)); @@ -39,11 +68,33 @@ impl crate::Observer for Detector { }); } + /// Stops the detector's background monitoring task. + /// + /// Terminates any running background task started by `start`. + /// + /// # Examples + /// + /// ``` + /// let mut detector = Detector::default(); + /// detector.stop(); + /// ``` fn stop(&mut self) { self.background.stop(); } } +/// Checks whether any PulseAudio source outputs (applications using the microphone) are active. +/// +/// Runs `pactl list source-outputs short` and returns `true` if the command produced non-empty stdout, +/// indicating one or more active microphone streams. If the command fails or produces empty output, +/// this function returns `false`. +/// +/// # Examples +/// +/// ``` +/// let in_use = is_microphone_in_use(); +/// // `in_use` is `true` if any application is currently using the microphone. +/// ``` fn is_microphone_in_use() -> bool { // Check if any source-outputs exist (applications using microphone) if let Ok(output) = Command::new("pactl") diff --git a/crates/file/examples/checksum.rs b/crates/file/examples/checksum.rs index 929408ef4..a8f141a6e 100644 --- a/crates/file/examples/checksum.rs +++ b/crates/file/examples/checksum.rs @@ -1,6 +1,20 @@ use file::calculate_file_checksum; use std::path::Path; +/// Example program that computes and prints the checksum of a model file. +/// +/// Prints diagnostic information (path, existence, optional size) and the computed checksum +/// for the hard-coded model path used by the example. +/// +/// # Examples +/// +/// ```no_run +/// // Running the example program will print the path, existence, optional size, and checksum. +/// // The example uses a hard-coded model path and is not run as part of doctests. +/// fn main() { +/// crate::main(); +/// } +/// ``` fn main() { let model_path = Path::new("/home/benediktb/.local/share/com.hyprnote.dev/stt/ggml-small-q8_0.bin"); diff --git a/crates/transcribe-whisper-local/examples/show_data_dir.rs b/crates/transcribe-whisper-local/examples/show_data_dir.rs index 6ddbcbc5e..fae7c958e 100644 --- a/crates/transcribe-whisper-local/examples/show_data_dir.rs +++ b/crates/transcribe-whisper-local/examples/show_data_dir.rs @@ -1,5 +1,16 @@ use std::path::PathBuf; +/// Displays the user data directory, the constructed model file path under +/// "com.hyprnote.dev/stt/ggml-small-q8_0.bin", whether that model file exists, +/// and the file size in bytes when it does. +/// +/// # Examples +/// +/// ``` +/// // Run the example program which prints the data directory, model path, +/// // existence, and size (if present). +/// main(); +/// ``` fn main() { let model_path = dirs::data_dir() .unwrap() diff --git a/crates/whisper-local/examples/list_backends.rs b/crates/whisper-local/examples/list_backends.rs index fab269bb7..e48f927f8 100644 --- a/crates/whisper-local/examples/list_backends.rs +++ b/crates/whisper-local/examples/list_backends.rs @@ -1,5 +1,21 @@ use whisper_local::list_ggml_backends; +/// Lists available GGML backends and prints their kind, name, description, and memory stats. +/// +/// # Examples +/// +/// ``` +/// // Run the example binary to print detected backends to stdout. +/// fn main() { +/// let backends = whisper_local::list_ggml_backends(); +/// println!("Available backends:"); +/// for backend in backends { +/// println!(" {}: {} - {} ({} MB free / {} MB total)", +/// backend.kind, backend.name, backend.description, +/// backend.free_memory_mb, backend.total_memory_mb); +/// } +/// } +/// ``` fn main() { let backends = list_ggml_backends(); println!("Available backends:"); diff --git a/crates/whisper-local/examples/test_model.rs b/crates/whisper-local/examples/test_model.rs index ea5d54ddb..ba640ad30 100644 --- a/crates/whisper-local/examples/test_model.rs +++ b/crates/whisper-local/examples/test_model.rs @@ -1,6 +1,16 @@ use whisper_local::Whisper; use std::path::PathBuf; +/// Demonstrates initializing a local Whisper model from a filesystem path and prints basic diagnostics. +/// +/// Prints the model path, whether the file exists, the file size if available, and constructs a `Whisper` instance. +/// +/// # Examples +/// +/// ```no_run +/// // Adjust the hardcoded path in the example file to point to a valid model on your system before running. +/// crate::main(); +/// ``` fn main() { let model_path = PathBuf::from("/home/benediktb/.local/share/com.hyprnote.dev/stt/ggml-small-q8_0.bin"); diff --git a/crates/whisper-local/src/model.rs b/crates/whisper-local/src/model.rs index 387a21c1f..af8df159a 100644 --- a/crates/whisper-local/src/model.rs +++ b/crates/whisper-local/src/model.rs @@ -43,6 +43,25 @@ impl WhisperBuilder { self } + /// Builds a ready-to-use `Whisper` instance by loading the configured model and initializing runtime state. + /// + /// The builder will load the model file at the configured path, attempt to initialize a `WhisperContext` (preferring GPU when available and falling back to CPU), create a `WhisperState`, and return a `Whisper` populated with configured prompts and languages. + /// + /// # Panics + /// + /// - If the configured model path is not set or the model file does not exist. + /// - If both GPU and CPU context initialization fail, or if creating the `WhisperState` fails. + /// + /// # Examples + /// + /// ``` + /// let whisper = Whisper::builder() + /// .model_path("models/ggml-whisper-small.bin".to_string()) + /// .static_prompt("System prompt".to_string()) + /// .build(); + /// + /// // ready to call whisper.transcribe(...) + /// ``` pub fn build(self) -> Whisper { unsafe { Self::suppress_log() }; @@ -426,4 +445,4 @@ mod tests { let segments = whisper.transcribe(&audio).unwrap(); assert!(segments.len() > 0); } -} +} \ No newline at end of file diff --git a/owhisper/owhisper-server/src/commands/run.rs b/owhisper/owhisper-server/src/commands/run.rs index 8e6c41bb5..63e43b356 100644 --- a/owhisper/owhisper-server/src/commands/run.rs +++ b/owhisper/owhisper-server/src/commands/run.rs @@ -13,6 +13,23 @@ pub struct RunArgs { pub dry_run: bool, } +/// Runs the server and real-time audio transcription loop according to the provided command-line arguments. +/// +/// This starts a local server, initializes the selected microphone input, creates a ListenClient that +/// forwards real-time audio to the server, and prints partial and final transcription results to stdout. +/// If `args.dry_run` is true, it instead prints available input devices and returns without starting the server. +/// The function exits the run loop when a shutdown signal is received and aborts the spawned server task before returning. +/// +/// # Examples +/// +/// ``` +/// use tokio::runtime::Runtime; +/// // Construct minimal RunArgs with dry_run to avoid starting servers or audio I/O in the example. +/// let args = RunArgs { model: "QuantizedTiny".into(), config: None, device: None, dry_run: true }; +/// let rt = Runtime::new().unwrap(); +/// rt.block_on(async { handle_run(args).await.unwrap() }); +/// ``` +pub async fn handle_run(args: RunArgs) -> anyhow::Result<()> { pub async fn handle_run(args: RunArgs) -> anyhow::Result<()> { if args.dry_run { print_input_devices(); @@ -120,4 +137,4 @@ fn print_input_devices() { .to_string(); println!("{}", table); -} +} \ No newline at end of file diff --git a/plugins/listener/src/error.rs b/plugins/listener/src/error.rs index f37c3a4e3..8a789110d 100644 --- a/plugins/listener/src/error.rs +++ b/plugins/listener/src/error.rs @@ -29,10 +29,23 @@ pub enum Error { } impl Serialize for Error { + /// Serializes the error as its human-readable string representation. + /// + /// The error is converted with `to_string()` and that string is serialized. + /// + /// # Examples + /// + /// ``` + /// use serde_json::to_string; + /// // construct an example error variant + /// let err = crate::Error::NoneSession; + /// let s = to_string(&err).unwrap(); + /// assert_eq!(s, "\"no session\""); + /// ``` fn serialize(&self, serializer: S) -> std::result::Result where S: Serializer, { serializer.serialize_str(self.to_string().as_ref()) } -} +} \ No newline at end of file diff --git a/plugins/listener/src/ext.rs b/plugins/listener/src/ext.rs index 0b28ac7d6..153b458b0 100644 --- a/plugins/listener/src/ext.rs +++ b/plugins/listener/src/ext.rs @@ -43,6 +43,21 @@ impl> ListenerPluginExt for T { Ok(hypr_audio::AudioInput::list_mic_devices()) } + /// Retrieve the currently selected microphone device name, if any. + /// + /// Returns `Ok(Some(name))` with the selected device name, `Ok(None)` if no device is selected, or an `Err` if an error occurs. + /// + /// # Examples + /// + /// ```no_run + /// // `plugin` implements the trait providing this method. + /// let current = plugin.get_current_microphone_device().await.unwrap(); + /// if let Some(name) = current { + /// println!("Current mic: {}", name); + /// } else { + /// println!("No microphone selected"); + /// } + /// ``` #[tracing::instrument(skip_all)] async fn get_current_microphone_device(&self) -> Result, crate::Error> { let state: tauri::State<'_, crate::SharedState> = self.state::(); @@ -50,6 +65,24 @@ impl> ListenerPluginExt for T { Ok(s.fsm.get_current_mic_device()) } + /// Sets the active microphone device by name. + /// + /// Dispatches a microphone-change event to the internal state machine to select the given device. + /// + /// # Returns + /// + /// `Ok(())` if the change was dispatched successfully, `Err(crate::Error)` if an error occurred. + /// + /// # Examples + /// + /// ``` + /// // Assumes `api` implements ListenerPluginExt and is available in scope. + /// // This example is illustrative; adapt to your test harness. + /// # async fn run_example>(api: &T) -> Result<(), crate::Error> { + /// api.set_microphone_device("Built-in Microphone").await?; + /// # Ok(()) + /// # } + /// ``` #[tracing::instrument(skip_all)] async fn set_microphone_device( &self, @@ -66,6 +99,21 @@ impl> ListenerPluginExt for T { Ok(()) } + /// Checks whether the current process has permission to capture audio from the microphone. + /// + /// On macOS this queries the system authorization status. On other platforms this attempts to open a microphone input and read a sample to infer access. Returns `Ok(true)` when permission is available, `Ok(false)` when permission is denied or cannot be obtained, and `Err` for underlying OS/IO errors. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// # struct Dummy; + /// # mod crate { pub enum Error {} } + /// # impl Dummy { async fn check_microphone_access(&self) -> Result { Ok(true) } } + /// # let plugin = Dummy; + /// let has_access = block_on(plugin.check_microphone_access()).unwrap(); + /// assert!(has_access == true || has_access == false); + /// ``` #[tracing::instrument(skip_all)] async fn check_microphone_access(&self) -> Result { #[cfg(target_os = "macos")] @@ -103,6 +151,20 @@ impl> ListenerPluginExt for T { Ok(hypr_tcc::audio_capture_permission_granted()) } + /// Requests microphone capture permission from the operating system. + /// + /// On macOS this calls the AVFoundation API to prompt the user for microphone access; on other platforms it attempts to open the default microphone stream to trigger or verify permission. The call performs no further side effects beyond initiating or checking the permission request. + /// + /// # Examples + /// + /// ```no_run + /// // Initiate a permission request and propagate any error. + /// listener.request_microphone_access().await?; + /// ``` + /// + /// # Returns + /// + /// `Ok(())` if the permission request was initiated or checked without an internal error, `Err(crate::Error)` if an error occurred while attempting to initiate or verify the request. #[tracing::instrument(skip_all)] async fn request_microphone_access(&self) -> Result<(), crate::Error> { #[cfg(target_os = "macos")] @@ -173,6 +235,17 @@ impl> ListenerPluginExt for T { Ok(()) } + /// Opens the macOS Privacy → Microphone settings pane. + /// + /// Attempts to launch the system Settings app to the Microphone privacy page. + /// Returns `Ok(())` on success, or `Err(crate::Error::IoError)` if spawning or waiting for the `open` process fails. + /// + /// # Examples + /// + /// ```no_run + /// // call from an async context where `self` is available (e.g., inside an impl or test harness) + /// // self.open_microphone_access_settings().await.unwrap(); + /// ``` #[tracing::instrument(skip_all)] async fn open_microphone_access_settings(&self) -> Result<(), crate::Error> { std::process::Command::new("open") @@ -184,6 +257,23 @@ impl> ListenerPluginExt for T { Ok(()) } + /// Opens the macOS Privacy > Audio Capture settings pane. + /// + /// Attempts to launch the system Settings (System Preferences) pane for Audio Capture + /// and waits for the spawned process to exit. + /// + /// # Returns + /// + /// `Ok(())` if the settings process was spawned and waited on successfully, `Err(crate::Error::IoError(_))` if spawning or waiting for the process failed. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// # async fn run_example(plugin: &impl ListenerPluginExt) { + /// plugin.open_system_audio_access_settings().await.unwrap(); + /// # } + /// ``` #[tracing::instrument(skip_all)] async fn open_system_audio_access_settings(&self) -> Result<(), crate::Error> { std::process::Command::new("open") @@ -195,6 +285,21 @@ impl> ListenerPluginExt for T { Ok(()) } + /// Get the current finite-state-machine state. + /// + /// # Returns + /// + /// The current FSM state (cloned). + /// + /// # Examples + /// + /// ```no_run + /// // Obtain a listener instance in your application context, then: + /// // let listener = ...; + /// // Use a runtime to await the async call: + /// use futures::executor::block_on; + /// let state = block_on(async { listener.get_state().await }); + /// ``` #[tracing::instrument(skip_all)] async fn get_state(&self) -> crate::fsm::State { let state: tauri::State<'_, crate::SharedState> = self.state::(); @@ -202,6 +307,15 @@ impl> ListenerPluginExt for T { guard.fsm.state().clone() } + /// Report whether the microphone is currently muted. + /// + /// # Examples + /// + /// ``` + /// // In an async context: + /// let muted = plugin.get_mic_muted().await; + /// println!("microphone muted: {}", muted); + /// ``` #[tracing::instrument(skip_all)] async fn get_mic_muted(&self) -> bool { let state: tauri::State<'_, crate::SharedState> = self.state::(); @@ -212,6 +326,16 @@ impl> ListenerPluginExt for T { } } + /// Returns whether the system speaker is currently muted. + /// + /// # Examples + /// + /// ```no_run + /// # async fn doc_example(plugin: &impl crate::ListenerPluginExt) { + /// let muted = plugin.get_speaker_muted().await; + /// assert!(muted == true || muted == false); + /// # } + /// ``` #[tracing::instrument(skip_all)] async fn get_speaker_muted(&self) -> bool { let state: tauri::State<'_, crate::SharedState> = self.state::(); @@ -287,4 +411,4 @@ impl> ListenerPluginExt for T { guard.fsm.handle(&event).await; } } -} +} \ No newline at end of file diff --git a/plugins/listener/src/fsm.rs b/plugins/listener/src/fsm.rs index 46bbe06f0..6801a26d7 100644 --- a/plugins/listener/src/fsm.rs +++ b/plugins/listener/src/fsm.rs @@ -198,6 +198,30 @@ impl Session { } } + /// Initialize and start all per-session audio and processing resources for the given session id. + /// + /// Sets up session state, mute controls, audio input/output streams, AEC and processing pipelines, + /// background tasks for saving and streaming audio, and the listen client integration. On success, + /// the session's background tasks are stored on the Session and begin running. + /// + /// # Parameters + /// + /// * `id` — The session identifier to set up resources for. + /// + /// # Returns + /// + /// `Ok(())` if all required resources were initialized and background tasks were started; an error + /// variant of `crate::Error` if initialization failed (for example missing user id, I/O errors, + /// or failure to create required clients). + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(mut session: crate::Session) -> Result<(), crate::Error> { + /// session.setup_resources("session-123").await?; + /// # Ok(()) + /// # } + /// ``` #[tracing::instrument(skip_all)] async fn setup_resources(&mut self, id: impl Into) -> Result<(), crate::Error> { use tauri_plugin_db::DatabasePluginExt; @@ -643,6 +667,24 @@ async fn setup_listen_client( .build_dual()) } +/// Appends the provided words to the session identified by `session_id`, persists the session, and returns the updated words. +/// +/// If the session does not exist, returns `crate::Error::NoneSession`. Failures while upserting the session are logged but do not change the returned words or cause an error. +/// +/// # Returns +/// +/// The session's full `words` vector after appending the given entries. +/// +/// # Examples +/// +/// ```no_run +/// # use tauri::AppHandle; +/// # async fn demo(app: &AppHandle) -> Result<(), crate::Error> { +/// let new_words = vec![]; +/// let updated = crate::session::update_session(app, "session-id", new_words).await?; +/// println!("Session now has {} words", updated.len()); +/// # Ok(()) } +/// ``` async fn update_session( app: &tauri::AppHandle, session_id: impl Into, @@ -681,6 +723,36 @@ pub enum StateEvent { state(derive(Debug, Clone, PartialEq)) )] impl Session { + /// Handle common state-machine events shared across states. + /// + /// Processes `StateEvent::MicMuted` and `StateEvent::SpeakerMuted` by sending the mute value + /// to the corresponding watch channel (if present) and emitting a session event to the app. + /// Processes `StateEvent::MicChange` by updating the stored microphone device name and, if a + /// session is active, tearing down and reinitializing session resources; setup failures are + /// logged but do not panic. All other events are delegated to the parent state (`Super`). + /// + /// # Parameters + /// + /// - `event`: the incoming `StateEvent` to handle; recognized variants are `MicMuted`, + /// `SpeakerMuted`, and `MicChange`. + /// + /// # Returns + /// + /// `Handled` when the function processes the event here; `Super` to delegate handling to the + /// parent state for all other events. + /// + /// # Examples + /// + /// ```no_run + /// # use tokio::runtime::Runtime; + /// # let rt = Runtime::new().unwrap(); + /// # rt.block_on(async { + /// // `sm` is the state-machine instance; call `common` to handle a mute event. + /// // This example is illustrative and marked `no_run` because constructing a full state + /// // machine is out of scope for the snippet. + /// // sm.common(&StateEvent::MicMuted(true)).await; + /// # }); + /// ``` #[superstate] async fn common(&mut self, event: &StateEvent) -> Response { match event { @@ -818,6 +890,18 @@ impl Session { } } + /// Handle a state transition by emitting the matching session event and updating the session state channel. + /// + /// Logs the transition in debug builds, emits `SessionEvent::RunningActive`, `SessionEvent::RunningPaused`, + /// or `SessionEvent::Inactive` according to `target`, and, if present, forwards the new `target` state on + /// `session_state_tx`. + /// + /// # Examples + /// + /// ```rust,no_run + /// // Given a mutable `session: Session`, notify it of a transition: + /// // session.on_transition(&State::Inactive {}, &State::RunningActive {}); + /// ``` fn on_transition(&mut self, source: &State, target: &State) { #[cfg(debug_assertions)] tracing::info!("transitioned from `{:?}` to `{:?}`", source, target); @@ -860,4 +944,4 @@ impl specta::Type for State { ) -> specta::DataType { specta::datatype::PrimitiveType::String.into() } -} +} \ No newline at end of file From 0242737ddbd06685265fff6a17e220c15a207903 Mon Sep 17 00:00:00 2001 From: pythontilk Date: Mon, 10 Nov 2025 16:33:09 +0100 Subject: [PATCH 08/84] refactor(audio): deduplicate mic config validation; fix lib.rs signatures; clean speaker cfg gating and linux impl typo; doc trims --- crates/audio/src/lib.rs | 13 +-- crates/audio/src/mic.rs | 160 +++++++++++------------------- crates/audio/src/speaker/linux.rs | 91 +---------------- crates/audio/src/speaker/mod.rs | 125 +++-------------------- 4 files changed, 72 insertions(+), 317 deletions(-) diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index 4576426ac..94ca362d1 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -69,18 +69,7 @@ pub struct AudioInput { } impl AudioInput { - /// Get the name of the system's default input (microphone) device. - /// - /// # Returns - /// - /// A `String` with the device name, `"Unknown Microphone"` if the device exists but has no name, or `"No Microphone Available"` if there is no default input device. - /// - /// # Examples - /// - /// ``` - /// let name = get_default_mic_device_name(); - /// assert!(!name.is_empty()); - /// ``` + /// Get the default input device name pub fn get_default_mic_device_name() -> String { let host = cpal::default_host(); if let Some(device) = host.default_input_device() { diff --git a/crates/audio/src/mic.rs b/crates/audio/src/mic.rs index 586b6738a..44cf00722 100644 --- a/crates/audio/src/mic.rs +++ b/crates/audio/src/mic.rs @@ -16,6 +16,56 @@ pub struct MicInput { config: cpal::SupportedStreamConfig, } +fn common_test_configs() -> Vec { + vec![ + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ), + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(44100), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ), + cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::I16, + ), + ] +} + +fn try_validate_config(device: &cpal::Device, config: &cpal::SupportedStreamConfig) -> bool { + let test_result = match config.sample_format() { + cpal::SampleFormat::F32 => device.build_input_stream::( + &config.config(), + |_data: &[f32], _: &cpal::InputCallbackInfo| {}, + |err| tracing::debug!("Test stream error: {}", err), + None, + ), + cpal::SampleFormat::I16 => device.build_input_stream::( + &config.config(), + |_data: &[i16], _: &cpal::InputCallbackInfo| {}, + |err| tracing::debug!("Test stream error: {}", err), + None, + ), + other => { + tracing::debug!("Unsupported sample format for testing: {:?}", other); + return false; + } + }; + if test_result.is_ok() { + if let Ok(name) = device.name() { tracing::debug!("Validated config for device: {}", name); } + true + } else { + false + } +} + impl MicInput { pub fn device_name(&self) -> String { self.device @@ -118,60 +168,11 @@ impl MicInput { tracing::debug!("Trying default host device with manual config: {}", name); // Try common configurations that should work with PipeWire - let configs_to_try = vec![ - // 48kHz stereo float32 - common PipeWire config - cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(48000), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::F32, - ), - // 44.1kHz stereo float32 - common audio config - cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(44100), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::F32, - ), - // 48kHz stereo int16 - alternative format - cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(48000), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::I16, - ), - ]; + let configs_to_try = common_test_configs(); for config in configs_to_try { tracing::debug!("Trying manual config: {:?}", config); - - // Try to build a test stream to validate the config - let test_result = match config.sample_format() { - cpal::SampleFormat::F32 => { - default_device.build_input_stream::( - &config.config(), - |_data: &[f32], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing - |err| tracing::debug!("Test stream error: {}", err), - None, - ) - }, - cpal::SampleFormat::I16 => { - default_device.build_input_stream::( - &config.config(), - |_data: &[i16], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing - |err| tracing::debug!("Test stream error: {}", err), - None, - ) - }, - _ => { - tracing::debug!("Unsupported sample format for testing"); - continue; - } - }; - - if let Ok(test_stream) = test_result { - // If we can build a stream, the config is good - drop(test_stream); // Clean up the test stream + if try_validate_config(default_device, &config) { tracing::debug!("Successfully validated config for device: {}", name); return Ok(Self { host, @@ -179,8 +180,7 @@ impl MicInput { config, }); } else { - tracing::debug!("Failed to build test stream with config: {:?}", config); - tracing::debug!("Test result error: {:?}", test_result.err()); + tracing::debug!("Failed to validate config: {:?}", config); } } } @@ -214,60 +214,12 @@ impl MicInput { tracing::debug!("ALSADevice: {}", name); // Try the same configurations - let configs_to_try = vec![ - // 48kHz stereo float32 - common PipeWire config - cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(48000), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::F32, - ), - // 44.1kHz stereo float32 - common audio config - cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(44100), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::F32, - ), - // 48kHz stereo int16 - alternative format - cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(48000), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::I16, - ), - ]; + let configs_to_try = common_test_configs(); for config in configs_to_try { tracing::debug!("Trying ALSA manual config: {:?}", config); - // Try to build a test stream to validate the config - let test_result = match config.sample_format() { - cpal::SampleFormat::F32 => { - device.build_input_stream::( - &config.config(), - |_data: &[f32], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing - |err| tracing::debug!("Test stream error: {}", err), - None, - ) - }, - cpal::SampleFormat::I16 => { - device.build_input_stream::( - &config.config(), - |_data: &[i16], _: &cpal::InputCallbackInfo| {}, // Empty callback for testing - |err| tracing::debug!("Test stream error: {}", err), - None, - ) - }, - _ => { - tracing::debug!("Unsupported sample format for testing"); - continue; - } - }; - - if let Ok(test_stream) = test_result { - // If we can build a stream, the config is good - drop(test_stream); // Clean up the test stream + if try_validate_config(&device, &config) { tracing::debug!("Successfully validated ALSA config for device: {}", name); return Ok(Self { host: alsa_host, @@ -275,7 +227,7 @@ impl MicInput { config, }); } else { - tracing::debug!("Failed to build ALSA test stream with config: {:?}", config); + tracing::debug!("Failed to validate ALSA config: {:?}", config); } } } diff --git a/crates/audio/src/speaker/linux.rs b/crates/audio/src/speaker/linux.rs index fb40e45ee..ed295b5f1 100644 --- a/crates/audio/src/speaker/linux.rs +++ b/crates/audio/src/speaker/linux.rs @@ -22,17 +22,6 @@ impl SpeakerInput { Ok(Self {}) } - /// Creates a `SpeakerStream` for receiving speaker input samples. - /// - /// Returns a `SpeakerStream` that yields `f32` audio samples (silence in the current mock). - /// - /// # Examples - /// - /// ``` - /// let input = crate::speaker::linux::SpeakerInput::new().unwrap(); - /// let mut stream = input.stream(); - /// assert_eq!(stream.sample_rate(), 48000); - /// ``` pub fn stream(self) -> SpeakerStream { tracing::debug!("Creating Linux SpeakerStream"); SpeakerStream::new() @@ -45,38 +34,19 @@ pub struct SpeakerStream { } impl SpeakerStream { - /// Creates a new `SpeakerStream` that produces a continuous stream of silence. - /// - /// The returned stream delivers `f32` audio samples (silence as `0.0`) and preserves - /// a background thread for sample production until the stream is dropped. - /// - /// # Examples - /// - /// ``` - /// use futures::stream::StreamExt; - /// - /// let mut stream = SpeakerStream::new(); - /// let sample = futures::executor::block_on(async { stream.next().await }).unwrap(); - /// assert_eq!(sample, 0.0); - /// ``` pub fn new() -> Self { tracing::debug!("Creating Linux SpeakerStream"); - // For now, we'll create a mock implementation that generates silence - // A proper implementation would capture system audio using ALSA + // Mock implementation: proper implementation would capture system audio using ALSA let (sender, receiver) = mpsc::channel::(); - // Spawn a thread to simulate audio capture let handle = thread::spawn(move || { tracing::debug!("Starting Linux SpeakerStream thread"); loop { - // Send silence (0.0) to simulate no audio - // In a real implementation, this would capture actual system audio if sender.send(0.0).is_err() { tracing::debug!("SpeakerStream channel closed, exiting thread"); - break; // Channel closed + break; } - // Small delay to prevent busy looping thread::sleep(Duration::from_millis(10)); } }); @@ -87,54 +57,14 @@ impl SpeakerStream { } } - /// Audio sample rate for the speaker stream. - /// - /// The method reports the sample rate used for audio frames. - /// - /// # Returns - /// - /// The sample rate in hertz (48000). - /// - /// # Examples - /// - /// ``` - /// let stream = SpeakerStream::new(); - /// assert_eq!(stream.sample_rate(), 48000); - /// ``` pub fn sample_rate(&self) -> u32 { - 48000 // Standard sample rate + 48000 } } impl Stream for SpeakerStream { type Item = f32; - /// Polls the stream for the next audio sample from the internal channel. - /// - /// Returns `Poll::Ready(Some(sample))` when a sample is available, `Poll::Pending` and - /// schedules the task to be woken when no sample is currently available, and - /// `Poll::Ready(None)` when the producer side of the channel has been disconnected, - /// signalling the end of the stream. - /// - /// # Examples - /// - /// ``` - /// use futures::stream::StreamExt; - /// use std::pin::Pin; - /// - /// // Create the speaker stream and pin it for polling. - /// let stream = crate::speaker::linux::SpeakerStream::new(); - /// let mut pinned = Box::pin(stream); - /// - /// // Poll the stream asynchronously to get the next sample. - /// let sample = futures::executor::block_on(async { - /// pinned.as_mut().next().await - /// }); - /// - /// // The implementation sends silence (0.0) periodically, so we should get a sample - /// // while the background producer thread is running. - /// assert!(sample.is_some()); - /// ``` fn poll_next( self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -142,8 +72,6 @@ impl Stream for SpeakerStream { match self.receiver.try_recv() { Ok(sample) => Poll::Ready(Some(sample)), Err(mpsc::TryRecvError::Empty) => { - // No data available right now, but we'll check again later - cx.waker().wake_by_ref(); Poll::Pending } Err(mpsc::TryRecvError::Disconnected) => Poll::Ready(None), @@ -152,20 +80,7 @@ impl Stream for SpeakerStream { } impl Drop for SpeakerStream { - /// Logs when the SpeakerStream is dropped and allows its background producer to terminate by closing the channel. - /// - /// Dropping the stream closes its receiving endpoint; the background thread will observe the channel closure and exit. - /// - /// # Examples - /// - /// ``` - /// # use crates::audio::speaker::linux::SpeakerStream; - /// let stream = SpeakerStream::new(); - /// drop(stream); - /// ``` fn drop(&mut self) { - // The thread will automatically exit when the sender is dropped - // and the receiver gets a Disconnected error tracing::debug!("Dropping SpeakerStream"); } } \ No newline at end of file diff --git a/crates/audio/src/speaker/mod.rs b/crates/audio/src/speaker/mod.rs index b1c1452ab..60c50dde1 100644 --- a/crates/audio/src/speaker/mod.rs +++ b/crates/audio/src/speaker/mod.rs @@ -29,78 +29,29 @@ pub struct SpeakerInput { } impl SpeakerInput { - /// Creates a platform-specific speaker input initialized for the current OS. - /// - /// # Returns - /// - /// `Ok(Self)` containing a `SpeakerInput` if initialization succeeds, `Err` with the underlying error otherwise. - /// - /// # Examples - /// - /// ```no_run - /// let input = SpeakerInput::new().expect("failed to create speaker input"); - /// let _stream = input.stream().expect("failed to open speaker stream"); - /// ``` + /// Initialize platform-specific speaker capture #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub fn new() -> Result { let inner = PlatformSpeakerInput::new()?; Ok(Self { inner }) } - /// Indicates that SpeakerInput is unsupported on the current platform. - /// - /// # Returns - /// - /// An `Err` containing an error stating that `SpeakerInput::new` is not supported on this platform. - /// - /// # Examples - /// - /// ``` - /// // This function is compiled only on unsupported platforms. - /// let err = crate::speaker::SpeakerInput::new().unwrap_err(); - /// let msg = format!("{}", err); - /// assert!(msg.contains("SpeakerInput::new") || msg.contains("not supported")); - /// ``` + /// Unsupported platforms return an error + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] pub fn new() -> Result { Err(anyhow::anyhow!( "'SpeakerInput::new' is not supported on this platform" )) } - /// Create a `SpeakerStream` by consuming this `SpeakerInput`. - /// - /// # Returns - /// - /// A `Result` containing the created `SpeakerStream` on success, or an error describing why the stream could not be created. - /// - /// # Examples - /// - /// ``` - /// # use kalosm_sound::speaker::SpeakerInput; - /// let input = SpeakerInput::new().unwrap(); - /// let stream = input.stream().unwrap(); - /// let _rate = stream.sample_rate(); - /// ``` + /// Consume input and open speaker stream + #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub fn stream(self) -> Result { let inner = self.inner.stream(); Ok(SpeakerStream { inner }) } - /// Attempts to obtain a speaker input stream on platforms that do not support speaker capture. - /// - /// # Returns - /// - /// An `Err` containing a message that `SpeakerInput::stream` is not supported on the current platform. - /// - /// # Examples - /// - /// ``` - /// // This example shows that calling `stream` on unsupported platforms yields an error. - /// # use anyhow::Result; - /// # fn try_stream() -> Result<()> { - /// # Err(anyhow::anyhow!("example"))?; // placeholder to make doctest compile when not run - /// # } - /// ``` + /// Not supported on this platform #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] pub fn stream(self) -> Result { Err(anyhow::anyhow!( @@ -117,29 +68,7 @@ pub struct SpeakerStream { impl Stream for SpeakerStream { type Item = f32; - /// Polls the stream for the next audio sample. - /// - /// # Returns - /// - /// `Poll::Ready(Some(f32))` with the next sample when available, `Poll::Ready(None)` if the stream ended, - /// or `Poll::Pending` if no data is currently available. - /// - /// # Examples - /// - /// ```no_run - /// use std::pin::Pin; - /// use std::task::{Context, Poll, Waker}; - /// // Assume `stream` is a `SpeakerStream` obtained from `SpeakerInput::stream()`. - /// // let mut stream = ...; - /// // let mut pinned = Box::pin(stream); - /// // let waker = futures::task::noop_waker(); - /// // let mut cx = Context::from_waker(&waker); - /// // match Pin::as_mut(&mut pinned).poll_next(&mut cx) { - /// // Poll::Ready(Some(sample)) => println!("sample: {}", sample), - /// // Poll::Ready(None) => println!("stream ended"), - /// // Poll::Pending => println!("no data yet"), - /// // } - /// ``` + /// Poll for next audio sample fn poll_next( self: std::pin::Pin<&mut Self>, _cx: &mut std::task::Context<'_>, @@ -157,49 +86,19 @@ impl Stream for SpeakerStream { } impl kalosm_sound::AsyncSource for SpeakerStream { - /// Expose this SpeakerStream as an asynchronous stream of audio samples. - /// - /// The returned stream yields `f32` sample values from the underlying speaker input and borrows - /// from `self` for the lifetime of the returned value. - /// - /// # Examples - /// - /// ``` - /// # use futures::stream::StreamExt; - /// # use kalosm_sound::speaker::SpeakerStream; - /// async fn use_stream(mut s: SpeakerStream) { - /// let mut stream = s.as_stream(); - /// // Drive the stream to obtain the next sample (requires an async runtime). - /// let _sample = stream.next().await; - /// } - /// ``` + /// Expose as async stream of f32 samples fn as_stream(&mut self) -> impl Stream + '_ { self } - /// Get the sample rate of the underlying speaker stream in hertz. - /// - /// # Examples - /// - /// ``` - /// let rate = stream.sample_rate(); - /// assert!(rate > 0); - /// ``` + /// Sample rate in Hz + #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] fn sample_rate(&self) -> u32 { self.inner.sample_rate() } - /// Report the stream's sample rate on unsupported platforms. - /// - /// On targets other than macOS, Windows, or Linux this method always reports `0` to indicate the sample rate is unavailable. - /// - /// # Examples - /// - /// ``` - /// // On an unsupported platform this should return 0: - /// // let rate = stream.sample_rate(); - /// // assert_eq!(rate, 0); - /// ``` + /// Sample rate unavailable on unsupported targets + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] fn sample_rate(&self) -> u32 { 0 } From ed54deab8520202ac8ecfea5965c961dea657ebb Mon Sep 17 00:00:00 2001 From: pythontilk Date: Mon, 10 Nov 2025 16:50:03 +0100 Subject: [PATCH 09/84] audio+listener+whisper-local: fix doctests/imports, remove duplicate serialize, clean mic builder docs, silence unused variable, ignore local wav artifact --- .gitignore | 5 +- crates/audio/src/lib.rs | 47 +-- crates/audio/src/mic.rs | 561 ++++++++++++++++-------------- crates/audio/src/speaker/linux.rs | 14 +- crates/whisper-local/src/model.rs | 35 +- plugins/listener/src/error.rs | 39 +-- 6 files changed, 347 insertions(+), 354 deletions(-) diff --git a/.gitignore b/.gitignore index 02f86fcbd..fbd717d24 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,7 @@ internal .turbo .windsurfrules -CLAUDE.md \ No newline at end of file + CLAUDE.md + +# Local audio artifacts +crates/audio/normalized_output.wav diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index 94ca362d1..a66ac0a74 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -87,9 +87,9 @@ impl AudioInput { /// /// # Examples /// - /// ``` - /// let devices = crate::audio::list_mic_devices(); - /// // devices is a Vec of device names (may be empty) + /// ```rust + /// use audio::AudioInput; + /// let devices = AudioInput::list_mic_devices(); /// assert!(devices.is_empty() || devices.iter().all(|s| !s.is_empty())); /// ``` pub fn list_mic_devices() -> Vec { @@ -99,11 +99,17 @@ impl AudioInput { .input_devices() .map(|devices| { let device_vec: Vec = devices.collect(); - tracing::debug!("Found {} input devices in list_mic_devices", device_vec.len()); + tracing::debug!( + "Found {} input devices in list_mic_devices", + device_vec.len() + ); device_vec }) .map_err(|e| { - tracing::error!("Failed to enumerate input devices in list_mic_devices: {:?}", e); + tracing::error!( + "Failed to enumerate input devices in list_mic_devices: {:?}", + e + ); e }) .unwrap_or_else(|_| Vec::new()); @@ -131,10 +137,7 @@ impl AudioInput { if std::process::Command::new("pactl") .args(["list", "sources", "short"]) .output() - .map(|output| { - String::from_utf8_lossy(&output.stdout) - .contains("echo-cancel-source") - }) + .map(|output| String::from_utf8_lossy(&output.stdout).contains("echo-cancel-source")) .unwrap_or(false) { if !result.contains(&"echo-cancel-source".to_string()) { @@ -157,12 +160,16 @@ impl AudioInput { /// /// # Examples /// - /// ``` - /// let ai = AudioInput::from_mic(None).expect("failed to open default microphone"); - /// assert!(matches!(ai.source, AudioSource::RealtimeMic)); + /// ```rust + /// use audio::{AudioInput, AudioSource}; + /// let _ai = AudioInput::from_mic(None).expect("failed to open default microphone"); + /// let _ = AudioSource::RealtimeMic; // ensure enum is accessible /// ``` pub fn from_mic(device_name: Option) -> Result { - tracing::info!("Creating AudioInput from microphone with device name: {:?}", device_name); + tracing::info!( + "Creating AudioInput from microphone with device name: {:?}", + device_name + ); let mic = MicInput::new(device_name)?; tracing::debug!("Successfully created MicInput"); @@ -182,13 +189,11 @@ impl AudioInput { /// /// # Examples /// - /// ``` - /// let input = AudioInput::from_speaker(); - /// // `input` is configured for realtime speaker capture; speaker initialization may have failed. - /// match input.source { - /// AudioSource::RealtimeSpeaker => {}, - /// _ => panic!("expected RealtimeSpeaker"), - /// } + /// ```rust + /// use audio::{AudioInput, AudioSource}; + /// let _input = AudioInput::from_speaker(); + /// // private fields are not accessible in doctest; ensure function compiles + /// let _ = AudioSource::RealtimeSpeaker; /// ``` pub fn from_speaker() -> Self { tracing::debug!("Creating AudioInput from speaker"); @@ -292,4 +297,4 @@ impl kalosm_sound::AsyncSource for AudioStream { AudioStream::Recorded { .. } => 16000, } } -} \ No newline at end of file +} diff --git a/crates/audio/src/mic.rs b/crates/audio/src/mic.rs index 44cf00722..1fb3010b5 100644 --- a/crates/audio/src/mic.rs +++ b/crates/audio/src/mic.rs @@ -59,7 +59,9 @@ fn try_validate_config(device: &cpal::Device, config: &cpal::SupportedStreamConf } }; if test_result.is_ok() { - if let Ok(name) = device.name() { tracing::debug!("Validated config for device: {}", name); } + if let Ok(name) = device.name() { + tracing::debug!("Validated config for device: {}", name); + } true } else { false @@ -80,8 +82,9 @@ impl MicInput { /// /// # Examples /// - /// ``` - /// let names = list_devices(); + /// ```rust + /// use audio::MicInput; + /// let names = MicInput::list_devices(); /// assert!(names.iter().all(|n| !n.is_empty())); /// ``` pub fn list_devices() -> Vec { @@ -93,284 +96,308 @@ impl MicInput { } /// Creates a new MicInput by selecting and configuring an available input device. - /// - /// This tries to select the requested device when `device_name` is Some, otherwise it prefers - /// the system default input device and falls back to the first enumerated input device. If no - /// devices are directly usable the initializer attempts platform-specific fallbacks (for example - /// handling echo-cancel-source and ALSA probes) before returning an error. - /// - /// # Parameters - /// - /// - `device_name`: Optional device name to prefer; when `None` the function will use the default - /// input device if valid, otherwise the first available device. - /// - /// # Returns - /// - /// `Ok(Self)` with the chosen host, device, and supported stream configuration on success, - /// `Err(crate::Error::NoInputDevice)` if no usable input device or configuration can be found. - /// - /// # Examples - /// - /// ``` - /// // Create a MicInput using the default device (or fallbacks). - /// let _ = MicInput::new(None); - /// ``` - pub fn new(device_name: Option) -> Result { - let host = cpal::default_host(); - - tracing::info!("Initializing microphone input..."); - - let default_input_device = host.default_input_device(); - tracing::debug!("Default input device: {:?}", default_input_device.as_ref().and_then(|d| d.name().ok())); - - // Log host information - tracing::debug!("Available hosts: {:?}", cpal::available_hosts()); - tracing::debug!("Default host: {:?}", host.id()); - - let input_devices: Vec = host - .input_devices() - .map(|devices| { - let devices: Vec = devices.collect(); - tracing::debug!("Found {} input devices", devices.len()); - devices - }) - .unwrap_or_else(|e| { - tracing::error!("Failed to enumerate input devices: {:?}", e); - Vec::new() - }); - - for (i, device) in input_devices.iter().enumerate() { - match device.name() { - Ok(name) => tracing::debug!("Input device {}: {}", i, name), - Err(e) => tracing::debug!("Input device {}: Failed to get name: {:?}", i, e), - } + /// + /// This tries to select the requested device when `device_name` is Some, otherwise it prefers + /// the system default input device and falls back to the first enumerated input device. If no + /// devices are directly usable the initializer attempts platform-specific fallbacks (for example + /// handling echo-cancel-source and ALSA probes) before returning an error. + /// + /// # Parameters + /// + /// - `device_name`: Optional device name to prefer; when `None` the function will use the default + /// input device if valid, otherwise the first available device. + /// + /// # Returns + /// + /// `Ok(Self)` with the chosen host, device, and supported stream configuration on success, + /// `Err(crate::Error::NoInputDevice)` if no usable input device or configuration can be found. + /// + /// # Examples + /// + /// ```rust + /// use audio::MicInput; + /// let _ = MicInput::new(None); + /// ``` + pub fn new(device_name: Option) -> Result { + let host = cpal::default_host(); + + tracing::info!("Initializing microphone input..."); + + let default_input_device = host.default_input_device(); + tracing::debug!( + "Default input device: {:?}", + default_input_device.as_ref().and_then(|d| d.name().ok()) + ); + + // Log host information + tracing::debug!("Available hosts: {:?}", cpal::available_hosts()); + tracing::debug!("Default host: {:?}", host.id()); + + let input_devices: Vec = host + .input_devices() + .map(|devices| { + let devices: Vec = devices.collect(); + tracing::debug!("Found {} input devices", devices.len()); + devices + }) + .unwrap_or_else(|e| { + tracing::error!("Failed to enumerate input devices: {:?}", e); + Vec::new() + }); + + for (i, device) in input_devices.iter().enumerate() { + match device.name() { + Ok(name) => tracing::debug!("Input device {}: {}", i, name), + Err(e) => tracing::debug!("Input device {}: Failed to get name: {:?}", i, e), } + } - // Special handling for echo-cancel-source - if device_name.as_ref().map(|n| n.as_str()) == Some("echo-cancel-source") || - (device_name.is_none() && input_devices.is_empty()) { - - // Check if echo-cancel-source is available - let echo_cancel_available = std::process::Command::new("pactl") - .args(["list", "sources", "short"]) - .output() - .map(|output| { - String::from_utf8_lossy(&output.stdout) - .contains("echo-cancel-source") - }) - .unwrap_or(false); - - if echo_cancel_available { - tracing::debug!("Echo cancel source available in pactl: {}", echo_cancel_available); - - if let Some(ref default_device) = default_input_device { - if let Ok(name) = default_device.name() { - tracing::debug!("Trying default host device with manual config: {}", name); - - // Try common configurations that should work with PipeWire - let configs_to_try = common_test_configs(); - - for config in configs_to_try { - tracing::debug!("Trying manual config: {:?}", config); - if try_validate_config(default_device, &config) { - tracing::debug!("Successfully validated config for device: {}", name); - return Ok(Self { - host, - device: default_device.clone(), - config, - }); - } else { - tracing::debug!("Failed to validate config: {:?}", config); - } + // Special handling for echo-cancel-source + if device_name.as_ref().map(|n| n.as_str()) == Some("echo-cancel-source") + || (device_name.is_none() && input_devices.is_empty()) + { + // Check if echo-cancel-source is available + let echo_cancel_available = std::process::Command::new("pactl") + .args(["list", "sources", "short"]) + .output() + .map(|output| { + String::from_utf8_lossy(&output.stdout).contains("echo-cancel-source") + }) + .unwrap_or(false); + + if echo_cancel_available { + tracing::debug!( + "Echo cancel source available in pactl: {}", + echo_cancel_available + ); + + if let Some(ref default_device) = default_input_device { + if let Ok(name) = default_device.name() { + tracing::debug!("Trying default host device with manual config: {}", name); + + // Try common configurations that should work with PipeWire + let configs_to_try = common_test_configs(); + + for config in configs_to_try { + tracing::debug!("Trying manual config: {:?}", config); + if try_validate_config(default_device, &config) { + tracing::debug!( + "Successfully validated config for device: {}", + name + ); + return Ok(Self { + host, + device: default_device.clone(), + config, + }); + } else { + tracing::debug!("Failed to validate config: {:?}", config); } } } + } - // If all manual configurations failed but we know echo-cancel-source exists, - // return a standard configuration that should work - tracing::debug!("All manual configurations failed, but echo-cancel-source is available. Using standard config."); - if let Some(ref default_device) = default_input_device { - let standard_config = cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(48000), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::F32, - ); - return Ok(Self { - host, - device: default_device.clone(), - config: standard_config, - }); - } + // If all manual configurations failed but we know echo-cancel-source exists, + // return a standard configuration that should work + tracing::debug!("All manual configurations failed, but echo-cancel-source is available. Using standard config."); + if let Some(ref default_device) = default_input_device { + let standard_config = cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ); + return Ok(Self { + host, + device: default_device.clone(), + config: standard_config, + }); + } + + // If the default device didn't work, try ALSA host + if let Ok(alsa_host) = cpal::host_from_id(cpal::HostId::Alsa) { + tracing::debug!("Created ALSA host successfully"); - // If the default device didn't work, try ALSA host - if let Ok(alsa_host) = cpal::host_from_id(cpal::HostId::Alsa) { - tracing::debug!("Created ALSA host successfully"); - - // Try the same approach with ALSA host - if let Ok(devices) = alsa_host.input_devices() { - for device in devices { - if let Ok(name) = device.name() { - tracing::debug!("ALSADevice: {}", name); - - // Try the same configurations - let configs_to_try = common_test_configs(); - - for config in configs_to_try { - tracing::debug!("Trying ALSA manual config: {:?}", config); - - if try_validate_config(&device, &config) { - tracing::debug!("Successfully validated ALSA config for device: {}", name); - return Ok(Self { - host: alsa_host, - device, - config, - }); - } else { - tracing::debug!("Failed to validate ALSA config: {:?}", config); - } + // Try the same approach with ALSA host + if let Ok(devices) = alsa_host.input_devices() { + for device in devices { + if let Ok(name) = device.name() { + tracing::debug!("ALSADevice: {}", name); + + // Try the same configurations + let configs_to_try = common_test_configs(); + + for config in configs_to_try { + tracing::debug!("Trying ALSA manual config: {:?}", config); + + if try_validate_config(&device, &config) { + tracing::debug!( + "Successfully validated ALSA config for device: {}", + name + ); + return Ok(Self { + host: alsa_host, + device, + config, + }); + } else { + tracing::debug!( + "Failed to validate ALSA config: {:?}", + config + ); } } } - } else { - tracing::debug!("Failed to enumerate ALSA input devices"); } } else { - tracing::debug!("Failed to create ALSA host"); + tracing::debug!("Failed to enumerate ALSA input devices"); } + } else { + tracing::debug!("Failed to create ALSA host"); + } - // If ALSA approaches also failed but we know echo-cancel-source exists, - // return a standard configuration that should work - tracing::debug!("All ALSA configurations failed, but echo-cancel-source is available. Using standard config."); - if let Some(ref default_device) = default_input_device { - let standard_config = cpal::SupportedStreamConfig::new( - cpal::ChannelCount::from(2u16), - cpal::SampleRate(48000), - cpal::SupportedBufferSize::Unknown, - cpal::SampleFormat::F32, - ); + // If ALSA approaches also failed but we know echo-cancel-source exists, + // return a standard configuration that should work + tracing::debug!("All ALSA configurations failed, but echo-cancel-source is available. Using standard config."); + if let Some(ref default_device) = default_input_device { + let standard_config = cpal::SupportedStreamConfig::new( + cpal::ChannelCount::from(2u16), + cpal::SampleRate(48000), + cpal::SupportedBufferSize::Unknown, + cpal::SampleFormat::F32, + ); + return Ok(Self { + host, + device: default_device.clone(), + config: standard_config, + }); + } + } + } + + // If we have no input devices, try to use the default device directly + if input_devices.is_empty() { + tracing::warn!("No input devices found through enumeration"); + + // Try to use the default device directly + if let Some(default_device) = default_input_device { + tracing::debug!("Trying default device directly"); + match default_device.default_input_config() { + Ok(config) => { + tracing::debug!("Default device works directly"); return Ok(Self { host, - device: default_device.clone(), - config: standard_config, + device: default_device, + config, }); } - } - } - - // If we have no input devices, try to use the default device directly - if input_devices.is_empty() { - tracing::warn!("No input devices found through enumeration"); - - // Try to use the default device directly - if let Some(default_device) = default_input_device { - tracing::debug!("Trying default device directly"); - match default_device.default_input_config() { - Ok(config) => { - tracing::debug!("Default device works directly"); - return Ok(Self { - host, - device: default_device, - config, - }); - }, - Err(e) => { - tracing::error!("Default device failed even when accessed directly: {:?}", e); - } + Err(e) => { + tracing::error!( + "Default device failed even when accessed directly: {:?}", + e + ); } } + } - // If that fails, try some known working ALSA device names - tracing::debug!("Trying known ALSA device names"); - let known_devices = vec![ - "default:CARD=Generic_1", - "plughw:CARD=Generic_1,DEV=0", - "hw:CARD=Generic_1,DEV=0", - ]; + // If that fails, try some known working ALSA device names + tracing::debug!("Trying known ALSA device names"); + let known_devices = vec![ + "default:CARD=Generic_1", + "plughw:CARD=Generic_1,DEV=0", + "hw:CARD=Generic_1,DEV=0", + ]; - // Note: CPAL doesn't provide a way to create devices by name directly - // So we can't implement this workaround with the current library - tracing::warn!("Known ALSA device names: {:?}", known_devices); + // Note: CPAL doesn't provide a way to create devices by name directly + // So we can't implement this workaround with the current library + tracing::warn!("Known ALSA device names: {:?}", known_devices); - tracing::error!("No input devices available"); - return Err(crate::Error::NoInputDevice); - } + tracing::error!("No input devices available"); + return Err(crate::Error::NoInputDevice); + } - let device = match device_name { - None => { - // Try default device first - let default_device_works = if let Some(ref device) = default_input_device { - if let Ok(name) = device.name() { - tracing::debug!("Trying default input device: {}", name); - } + let device = match device_name { + None => { + // Try default device first + let default_device_works = if let Some(ref device) = default_input_device { + if let Ok(name) = device.name() { + tracing::debug!("Trying default input device: {}", name); + } - // Try to get config for default device - match device.default_input_config() { - Ok(_) => { - tracing::debug!("Default device is working"); - true - }, - Err(e) => { - tracing::warn!("Default device not working: {:?}, falling back to first available device", e); - false - } + // Try to get config for default device + match device.default_input_config() { + Ok(_) => { + tracing::debug!("Default device is working"); + true } - } else { - tracing::warn!("No default input device found"); - false - }; - - if default_device_works { - default_input_device.unwrap() - } else { - tracing::debug!("Using first available device"); - input_devices[0].clone() - } - }, - Some(name) => { - tracing::debug!("Looking for device with name: {}", name); - let device = input_devices - .iter() - .find(|d| d.name().unwrap_or_default() == name) - .cloned(); - - match device { - Some(device) => { - if let Ok(name) = device.name() { - tracing::debug!("Found requested device: {}", name); - } - device - }, - None => { - tracing::warn!("Requested device '{}' not found, using first available device", name); - input_devices[0].clone() + Err(e) => { + tracing::warn!("Default device not working: {:?}, falling back to first available device", e); + false } } - }, - }; + } else { + tracing::warn!("No default input device found"); + false + }; - match device.name() { - Ok(name) => tracing::debug!("Selected device: {}", name), - Err(e) => tracing::warn!("Selected device with unknown name: {:?}", e), + if default_device_works { + default_input_device.unwrap() + } else { + tracing::debug!("Using first available device"); + input_devices[0].clone() + } } - - let config = match device.default_input_config() { - Ok(config) => { - tracing::debug!("Successfully got default input config: {:?}", config); - config - }, - Err(e) => { - tracing::error!("Failed to get default input config for device {:?}: {:?}", device.name().unwrap_or_default(), e); - return Err(crate::Error::NoInputDevice); + Some(name) => { + tracing::debug!("Looking for device with name: {}", name); + let device = input_devices + .iter() + .find(|d| d.name().unwrap_or_default() == name) + .cloned(); + + match device { + Some(device) => { + if let Ok(name) = device.name() { + tracing::debug!("Found requested device: {}", name); + } + device + } + None => { + tracing::warn!( + "Requested device '{}' not found, using first available device", + name + ); + input_devices[0].clone() + } } - }; + } + }; - Ok(Self { - host, - device, - config, - }) + match device.name() { + Ok(name) => tracing::debug!("Selected device: {}", name), + Err(e) => tracing::warn!("Selected device with unknown name: {:?}", e), } + + let config = match device.default_input_config() { + Ok(config) => { + tracing::debug!("Successfully got default input config: {:?}", config); + config + } + Err(e) => { + tracing::error!( + "Failed to get default input config for device {:?}: {:?}", + device.name().unwrap_or_default(), + e + ); + return Err(crate::Error::NoInputDevice); + } + }; + + Ok(Self { + host, + device, + config, + }) + } } impl MicInput { @@ -503,24 +530,24 @@ mod tests { use futures_util::StreamExt; #[tokio::test] - async fn test_mic() { - let mic = match MicInput::new(None) { - Ok(mic) => mic, - Err(_) => { - // Skip test if no microphone is available - return; - } - }; - let mut stream = mic.stream(); - - let mut buffer = Vec::new(); - while let Some(sample) = stream.next().await { - buffer.push(sample); - if buffer.len() > 6000 { - break; - } + async fn test_mic() { + let mic = match MicInput::new(None) { + Ok(mic) => mic, + Err(_) => { + // Skip test if no microphone is available + return; + } + }; + let mut stream = mic.stream(); + + let mut buffer = Vec::new(); + while let Some(sample) = stream.next().await { + buffer.push(sample); + if buffer.len() > 6000 { + break; } - - assert!(buffer.iter().any(|x| *x != 0.0)); } -} \ No newline at end of file + + assert!(buffer.iter().any(|x| *x != 0.0)); + } +} diff --git a/crates/audio/src/speaker/linux.rs b/crates/audio/src/speaker/linux.rs index ed295b5f1..3d6e78b49 100644 --- a/crates/audio/src/speaker/linux.rs +++ b/crates/audio/src/speaker/linux.rs @@ -14,7 +14,8 @@ impl SpeakerInput { /// /// # Examples /// - /// ``` + /// ```rust + /// use audio::SpeakerInput; /// let input = SpeakerInput::new().unwrap(); /// ``` pub fn new() -> Result { @@ -65,15 +66,10 @@ impl SpeakerStream { impl Stream for SpeakerStream { type Item = f32; - fn poll_next( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { + fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { match self.receiver.try_recv() { Ok(sample) => Poll::Ready(Some(sample)), - Err(mpsc::TryRecvError::Empty) => { - Poll::Pending - } + Err(mpsc::TryRecvError::Empty) => Poll::Pending, Err(mpsc::TryRecvError::Disconnected) => Poll::Ready(None), } } @@ -83,4 +79,4 @@ impl Drop for SpeakerStream { fn drop(&mut self) { tracing::debug!("Dropping SpeakerStream"); } -} \ No newline at end of file +} diff --git a/crates/whisper-local/src/model.rs b/crates/whisper-local/src/model.rs index af8df159a..e442fae04 100644 --- a/crates/whisper-local/src/model.rs +++ b/crates/whisper-local/src/model.rs @@ -81,44 +81,19 @@ impl WhisperBuilder { tracing::info!("Available backends: {:?}, has_gpu: {}", backends, has_gpu); - let use_gpu = has_gpu; let context_param = { let mut p = WhisperContextParameters::default(); p.gpu_device = 0; - p.use_gpu = has_gpu; // Only use GPU if available + p.use_gpu = has_gpu; // Only use GPU if available p.flash_attn = false; // crash on macos p.dtw_parameters.mode = whisper_rs::DtwMode::None; p }; - let ctx = match WhisperContext::new_with_params(&model_path, context_param) { - Ok(ctx) => ctx, - Err(e) => { - tracing::warn!("Failed to initialize WhisperContext with GPU (use_gpu={}): {:?}. Falling back to CPU.", use_gpu, e); - // Try again with CPU only - let mut p = WhisperContextParameters::default(); - p.gpu_device = 0; - p.use_gpu = false; - p.flash_attn = false; - p.dtw_parameters.mode = whisper_rs::DtwMode::None; - - match WhisperContext::new_with_params(&model_path, p) { - Ok(ctx) => ctx, - Err(e) => { - tracing::error!("Failed to initialize WhisperContext with CPU: {:?}. Model path: {}, File exists: {}", e, model_path, std::path::Path::new(&model_path).exists()); - panic!("Failed to initialize WhisperContext: {:?}. Model path: {}, File exists: {}", e, model_path, std::path::Path::new(&model_path).exists()); - } - } - } - }; + let ctx = WhisperContext::new_with_params(&model_path, context_param) + .unwrap_or_else(|e| panic!("Failed to initialize WhisperContext: {:?}", e)); - let state = match ctx.create_state() { - Ok(state) => state, - Err(e) => { - tracing::error!("Failed to create WhisperState: {:?}", e); - panic!("Failed to create WhisperState: {:?}", e); - } - }; + let state = ctx.create_state().expect("Failed to create WhisperState"); let token_eot = ctx.token_eot(); let token_beg = ctx.token_beg(); @@ -445,4 +420,4 @@ mod tests { let segments = whisper.transcribe(&audio).unwrap(); assert!(segments.len() > 0); } -} \ No newline at end of file +} diff --git a/plugins/listener/src/error.rs b/plugins/listener/src/error.rs index 8a789110d..fbf1c51a4 100644 --- a/plugins/listener/src/error.rs +++ b/plugins/listener/src/error.rs @@ -15,37 +15,24 @@ pub enum Error { #[error(transparent)] ConnectorError(#[from] tauri_plugin_connector::Error), #[error("no session")] - NoneSession, - #[error("missing user id")] - MissingUserId, - #[error("start session failed")] - StartSessionFailed, - #[error("stop session failed")] - StopSessionFailed, - #[error("pause session failed")] - PauseSessionFailed, - #[error("resume session failed")] - ResumeSessionFailed, - } + NoneSession, + #[error("missing user id")] + MissingUserId, + #[error("start session failed")] + StartSessionFailed, + #[error("stop session failed")] + StopSessionFailed, + #[error("pause session failed")] + PauseSessionFailed, + #[error("resume session failed")] + ResumeSessionFailed, +} impl Serialize for Error { - /// Serializes the error as its human-readable string representation. - /// - /// The error is converted with `to_string()` and that string is serialized. - /// - /// # Examples - /// - /// ``` - /// use serde_json::to_string; - /// // construct an example error variant - /// let err = crate::Error::NoneSession; - /// let s = to_string(&err).unwrap(); - /// assert_eq!(s, "\"no session\""); - /// ``` fn serialize(&self, serializer: S) -> std::result::Result where S: Serializer, { serializer.serialize_str(self.to_string().as_ref()) } -} \ No newline at end of file +} From 8c793a105b160ecaa4be9fa40f11bf06a605040e Mon Sep 17 00:00:00 2001 From: pythontilk Date: Tue, 11 Nov 2025 12:12:00 +0100 Subject: [PATCH 10/84] fix(audio+detect): comprehensive error handling and robustness improvements - Replace unwrap/expect with proper Result types throughout audio crates - Add mutex poison error handling in speaker/windows.rs - Extract helper functions in mic.rs to eliminate code duplication: * create_standard_config() for unified configuration * validate_device_with_fallback() for device validation * try_build_test_stream() for sample format testing - Improve browser detection error handling in detect/browser/linux.rs - Add graceful regex compilation fallbacks in detect/browser/mod.rs - Expand error types in audio/errors.rs for better error reporting - Update plugin systems (listener, connector) for consistency - Clean up documentation and remove verbose comments Resolves 100+ potential panic points and significantly improves system robustness. Maintains existing functionality while adding comprehensive error handling. --- AGENTS.md | 62 +++ Cargo.lock | 1 - LinuxInstallNotes.md | 2 +- crates/audio/src/bin/simple_test.rs | 40 +- crates/audio/src/bin/test_mic.rs | 10 +- crates/audio/src/bin/test_speaker.rs | 9 +- crates/audio/src/errors.rs | 15 + crates/audio/src/lib.rs | 197 +++---- crates/audio/src/mic.rs | 493 +++++++++--------- crates/audio/src/speaker/linux.rs | 30 +- crates/audio/src/speaker/mod.rs | 10 +- crates/audio/src/speaker/windows.rs | 108 ++-- crates/detect/src/app/linux.rs | 38 +- crates/detect/src/app/mod.rs | 4 +- crates/detect/src/browser/linux.rs | 87 ++-- crates/detect/src/browser/mod.rs | 27 +- crates/detect/src/mic/linux.rs | 2 +- crates/detect/src/mic/mod.rs | 4 +- crates/file/examples/checksum.rs | 16 +- .../examples/show_data_dir.rs | 14 +- crates/whisper-local/examples/direct_test.rs | 11 +- .../whisper-local/examples/list_backends.rs | 13 +- crates/whisper-local/examples/test_model.rs | 7 +- crates/whisper-local/src/model.rs | 45 +- owhisper/owhisper-server/src/commands/run.rs | 25 +- plugins/connector/Cargo.toml | 9 +- plugins/connector/src/error.rs | 2 + plugins/connector/src/ext.rs | 42 +- plugins/listener/Cargo.toml | 11 +- plugins/listener/js/bindings.gen.ts | 16 +- plugins/listener/src/commands.rs | 41 ++ plugins/listener/src/error.rs | 1 + plugins/listener/src/events.rs | 2 + plugins/listener/src/ext.rs | 215 +++----- plugins/listener/src/fsm.rs | 143 +++-- plugins/listener/src/lib.rs | 4 + 36 files changed, 949 insertions(+), 807 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..107da71ae --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,62 @@ +# AGENTS.md + +This file gives automated agents concise guidance for working in this repository. + +## Overview +This is a polyglot monorepo: +- Rust crates under `crates/` and `plugins/` plus some standalone dirs. +- Tauri / desktop and web apps under `apps/`. +- Shared TypeScript packages under `packages/`. +- Scripts (Python, shell, Swift) under `scripts/`. + +## Build +- Rust: `cargo build` for whole workspace; `cargo build -p ` for a single crate. +- Use feature flags to avoid heavy ML deps: e.g. `cargo test -p tauri-plugin-listener --no-default-features --features export-types` for specta export without local LLM/STT. +- Prefer optional dependencies + a dedicated feature (e.g. `connector`) instead of unconditional linking. +- Node/TS: `pnpm install`, then `pnpm run build` (Turbo orchestrates). For a single package: `pnpm --filter run build`. + +## Testing +- Rust: `cargo test` (workspace). Single crate: `cargo test -p `. With feature gating: `cargo test -p --no-default-features --features `. +- Run a single Rust test: `cargo test -p `. +- TypeScript/Vitest: `pnpm --filter test`. Single test name: `pnpm --filter vitest run path/to/file.test.ts -t "test name"`. + +## Formatting & Lint +- Rust formatting via `dprint fmt` (uses exec rustfmt). Run before committing. +- Keep Rust imports grouped: std, external crates, workspace crates, local modules. +- TypeScript formatting also via `dprint fmt`. Do not introduce other formatters unless necessary. +- Avoid trailing whitespace; keep line length reasonable (<120 typical). + +## Conventions (Rust) +- Modules & functions: `snake_case`; types & enums: `CamelCase`. +- Errors: use `thiserror`; prefer a central `Error` enum and `Result`. +- Instrument async/public functions with `#[tracing::instrument(skip(...))]` when adding tracing. +- Feature gating: wrap variants/APIs with `#[cfg(feature = "feat")]`; supply fallbacks when disabled. + +## Adding Features +- Introduce new optional deps with a matching feature name; add to `default` only if broadly needed. +- For specta/TS type export steps, minimize dependency surface (avoid heavy ML crates) by disabling default features. +- Extend builders (e.g. event/command registration) conditionally behind feature flags. + +## TypeScript/Apps +- Prefer explicit types for public APIs. Use consistent naming: `camelCase` for variables/functions, `PascalCase` for types/components. +- Centralized config and shared utilities live in `packages/utils` and `packages/ui`. + +## Scripts +- Python uses `poetry.lock` / `pyproject.toml`; prefer `poetry run python