diff --git a/crates/recording/src/sources/audio_mixer.rs b/crates/recording/src/sources/audio_mixer.rs index 976fffbda7..48474d9699 100644 --- a/crates/recording/src/sources/audio_mixer.rs +++ b/crates/recording/src/sources/audio_mixer.rs @@ -114,10 +114,7 @@ impl AudioMixerBuilder { let mut amix = filter_graph.add( &ffmpeg::filter::find("amix").expect("Failed to find amix filter"), "amix", - &format!( - "inputs={}:duration=first:dropout_transition=0", - abuffers.len() - ), + &format!("inputs={}:duration=longest", abuffers.len()), )?; let aformat_args = format!( @@ -388,16 +385,21 @@ impl AudioMixer { let elapsed = Duration::from_secs_f64(self.samples_out as f64 / output_rate); let timestamp = start.instant() + start_timestamp.duration_since(start) + elapsed; - self.samples_out += filtered.samples(); + let frame_samples = filtered.samples(); + let mut frame = AudioFrame::new(filtered, Timestamp::Instant(timestamp)); - if self - .output - .try_send(AudioFrame::new(filtered, Timestamp::Instant(timestamp))) - .is_err() - { - return Err(()); + loop { + match self.output.try_send(frame) { + Ok(()) => break, + Err(err) if err.is_full() => { + frame = err.into_inner(); + std::thread::sleep(Duration::from_millis(1)); + } + Err(_) => return Err(()), + } } + self.samples_out += frame_samples; filtered = ffmpeg::frame::Audio::empty(); } diff --git a/crates/recording/src/sources/microphone.rs b/crates/recording/src/sources/microphone.rs index 4d645737bb..3bcd84022c 100644 --- a/crates/recording/src/sources/microphone.rs +++ b/crates/recording/src/sources/microphone.rs @@ -4,8 +4,11 @@ use crate::{ }; use anyhow::anyhow; use cap_media_info::AudioInfo; +use cpal::SampleFormat; use futures::{SinkExt, channel::mpsc}; -use std::sync::Arc; +use std::{borrow::Cow, sync::Arc}; + +const MICROPHONE_TARGET_CHANNELS: u16 = 1; pub struct Microphone { info: AudioInfo, @@ -25,7 +28,10 @@ impl AudioSource for Microphone { Self: Sized, { async move { - let audio_info = feed_lock.audio_info(); + let source_info = feed_lock.audio_info(); + let audio_info = source_info.with_max_channels(MICROPHONE_TARGET_CHANNELS); + let source_channels = source_info.channels; + let target_channels = audio_info.channels; let (tx, rx) = flume::bounded(8); feed_lock @@ -35,9 +41,16 @@ impl AudioSource for Microphone { tokio::spawn(async move { while let Ok(frame) = rx.recv_async().await { + let packed = maybe_downmix_channels( + &frame.data, + frame.format, + source_channels, + target_channels, + ); + let _ = audio_tx .send(AudioFrame::new( - audio_info.wrap_frame_with_max_channels(&frame.data, 2), + audio_info.wrap_frame(packed.as_ref()), frame.timestamp, )) .await; @@ -45,7 +58,7 @@ impl AudioSource for Microphone { }); Ok(Self { - info: audio_info.with_max_channels(2), + info: audio_info, _lock: feed_lock, }) } @@ -55,3 +68,167 @@ impl AudioSource for Microphone { self.info } } + +fn maybe_downmix_channels<'a>( + data: &'a [u8], + format: SampleFormat, + source_channels: usize, + target_channels: usize, +) -> Cow<'a, [u8]> { + if target_channels == 0 || source_channels == 0 || target_channels >= source_channels { + return Cow::Borrowed(data); + } + + if target_channels == 1 { + if let Some(samples) = downmix_to_mono(data, format, source_channels) { + Cow::Owned(samples) + } else { + Cow::Borrowed(data) + } + } else { + Cow::Borrowed(data) + } +} + +fn downmix_to_mono(data: &[u8], format: SampleFormat, source_channels: usize) -> Option> { + let sample_size = sample_format_size(format)?; + + let frame_size = sample_size.checked_mul(source_channels)?; + if frame_size == 0 || !data.len().is_multiple_of(frame_size) { + return None; + } + + let frame_count = data.len() / frame_size; + let mut out = vec![0u8; frame_count * sample_size]; + + for (frame_idx, frame) in data.chunks(frame_size).enumerate() { + let mono = average_frame_sample(format, frame, sample_size, source_channels)?; + let start = frame_idx * sample_size; + write_sample_from_f64(format, mono, &mut out[start..start + sample_size]); + } + + Some(out) +} + +fn sample_format_size(format: SampleFormat) -> Option { + Some(match format { + SampleFormat::U8 => 1, + SampleFormat::I16 => 2, + SampleFormat::I32 => 4, + SampleFormat::I64 => 8, + SampleFormat::F32 => 4, + SampleFormat::F64 => 8, + _ => return None, + }) +} + +fn average_frame_sample( + format: SampleFormat, + frame: &[u8], + sample_size: usize, + channels: usize, +) -> Option { + let mut sum = 0.0; + for ch in 0..channels { + let start = ch * sample_size; + let end = start + sample_size; + sum += sample_to_f64(format, &frame[start..end])?; + } + + Some(sum / channels as f64) +} + +fn sample_to_f64(format: SampleFormat, bytes: &[u8]) -> Option { + match format { + SampleFormat::U8 => bytes.first().copied().map(|v| v as f64), + SampleFormat::I16 => { + let mut buf = [0u8; 2]; + buf.copy_from_slice(bytes); + Some(i16::from_ne_bytes(buf) as f64) + } + SampleFormat::I32 => { + let mut buf = [0u8; 4]; + buf.copy_from_slice(bytes); + Some(i32::from_ne_bytes(buf) as f64) + } + SampleFormat::I64 => { + let mut buf = [0u8; 8]; + buf.copy_from_slice(bytes); + Some(i64::from_ne_bytes(buf) as f64) + } + SampleFormat::F32 => { + let mut buf = [0u8; 4]; + buf.copy_from_slice(bytes); + Some(f32::from_ne_bytes(buf) as f64) + } + SampleFormat::F64 => { + let mut buf = [0u8; 8]; + buf.copy_from_slice(bytes); + Some(f64::from_ne_bytes(buf)) + } + _ => None, + } +} + +fn write_sample_from_f64(format: SampleFormat, value: f64, out: &mut [u8]) { + match format { + SampleFormat::U8 => { + let sample = value.round().clamp(u8::MIN as f64, u8::MAX as f64) as u8; + out[0] = sample; + } + SampleFormat::I16 => { + let sample = value.round().clamp(i16::MIN as f64, i16::MAX as f64) as i16; + out.copy_from_slice(&sample.to_ne_bytes()); + } + SampleFormat::I32 => { + let sample = value.round().clamp(i32::MIN as f64, i32::MAX as f64) as i32; + out.copy_from_slice(&sample.to_ne_bytes()); + } + SampleFormat::I64 => { + let sample = value.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64; + out.copy_from_slice(&sample.to_ne_bytes()); + } + SampleFormat::F32 => { + let sample = value as f32; + out.copy_from_slice(&sample.to_ne_bytes()); + } + SampleFormat::F64 => { + out.copy_from_slice(&value.to_ne_bytes()); + } + _ => {} + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn downmixes_stereo_f32_to_mono() { + let frames = [(0.5f32, -0.25f32), (1.0f32, 1.0f32)]; + let mut data = Vec::new(); + + for (left, right) in frames { + data.extend_from_slice(&left.to_ne_bytes()); + data.extend_from_slice(&right.to_ne_bytes()); + } + + let downmixed = maybe_downmix_channels(&data, SampleFormat::F32, 2, 1); + let owned = downmixed.into_owned(); + assert_eq!(owned.len(), frames.len() * std::mem::size_of::()); + + let first = f32::from_ne_bytes(owned[0..4].try_into().unwrap()); + let second = f32::from_ne_bytes(owned[4..8].try_into().unwrap()); + + assert!((first - 0.125).abs() < f32::EPSILON); + assert!((second - 1.0).abs() < f32::EPSILON); + } + + #[test] + fn leaves_mono_buffers_untouched() { + let sample = 0.75f32; + let data = sample.to_ne_bytes().to_vec(); + let result = maybe_downmix_channels(&data, SampleFormat::F32, 1, 1); + assert!(matches!(result, Cow::Borrowed(_))); + } +}