CapSoftware · richiemcilroy · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/crates/recording/src/sources/audio_mixer.rs b/crates/recording/src/sources/audio_mixer.rs
@@ -114,10 +114,7 @@ impl AudioMixerBuilder {
         let mut amix = filter_graph.add(
             &ffmpeg::filter::find("amix").expect("Failed to find amix filter"),
             "amix",
-            &format!(
-                "inputs={}:duration=first:dropout_transition=0",
-                abuffers.len()
-            ),
+            &format!("inputs={}:duration=longest", abuffers.len()),
         )?;
 
         let aformat_args = format!(
@@ -388,16 +385,21 @@ impl AudioMixer {
             let elapsed = Duration::from_secs_f64(self.samples_out as f64 / output_rate);
             let timestamp = start.instant() + start_timestamp.duration_since(start) + elapsed;
 
-            self.samples_out += filtered.samples();
+            let frame_samples = filtered.samples();
+            let mut frame = AudioFrame::new(filtered, Timestamp::Instant(timestamp));
 
-            if self
-                .output
-                .try_send(AudioFrame::new(filtered, Timestamp::Instant(timestamp)))
-                .is_err()
-            {
-                return Err(());
+            loop {
+                match self.output.try_send(frame) {
+                    Ok(()) => break,
+                    Err(err) if err.is_full() => {
+                        frame = err.into_inner();
+                        std::thread::sleep(Duration::from_millis(1));
+                    }
+                    Err(_) => return Err(()),
+                }
             }
 
+            self.samples_out += frame_samples;
             filtered = ffmpeg::frame::Audio::empty();
         }
 

diff --git a/crates/recording/src/sources/microphone.rs b/crates/recording/src/sources/microphone.rs
@@ -4,8 +4,11 @@ use crate::{
 };
 use anyhow::anyhow;
 use cap_media_info::AudioInfo;
+use cpal::SampleFormat;
 use futures::{SinkExt, channel::mpsc};
-use std::sync::Arc;
+use std::{borrow::Cow, sync::Arc};
+
+const MICROPHONE_TARGET_CHANNELS: u16 = 1;
 
 pub struct Microphone {
     info: AudioInfo,
@@ -25,7 +28,10 @@ impl AudioSource for Microphone {
         Self: Sized,
     {
         async move {
-            let audio_info = feed_lock.audio_info();
+            let source_info = feed_lock.audio_info();
+            let audio_info = source_info.with_max_channels(MICROPHONE_TARGET_CHANNELS);
+            let source_channels = source_info.channels;
+            let target_channels = audio_info.channels;
             let (tx, rx) = flume::bounded(8);
 
             feed_lock
@@ -35,17 +41,24 @@ impl AudioSource for Microphone {
 
             tokio::spawn(async move {
                 while let Ok(frame) = rx.recv_async().await {
+                    let packed = maybe_downmix_channels(
+                        &frame.data,
+                        frame.format,
+                        source_channels,
+                        target_channels,
+                    );
+
                     let _ = audio_tx
                         .send(AudioFrame::new(
-                            audio_info.wrap_frame_with_max_channels(&frame.data, 2),
+                            audio_info.wrap_frame(packed.as_ref()),
                             frame.timestamp,
                         ))
                         .await;
                 }
             });
 
             Ok(Self {
-                info: audio_info.with_max_channels(2),
+                info: audio_info,
                 _lock: feed_lock,
             })
         }
@@ -55,3 +68,167 @@ impl AudioSource for Microphone {
         self.info
     }
 }
+
+fn maybe_downmix_channels<'a>(
+    data: &'a [u8],
+    format: SampleFormat,
+    source_channels: usize,
+    target_channels: usize,
+) -> Cow<'a, [u8]> {
+    if target_channels == 0 || source_channels == 0 || target_channels >= source_channels {
+        return Cow::Borrowed(data);
+    }
+
+    if target_channels == 1 {
+        if let Some(samples) = downmix_to_mono(data, format, source_channels) {
+            Cow::Owned(samples)
+        } else {
+            Cow::Borrowed(data)
+        }
+    } else {
+        Cow::Borrowed(data)
+    }
+}
+
+fn downmix_to_mono(data: &[u8], format: SampleFormat, source_channels: usize) -> Option<Vec<u8>> {
+    let sample_size = sample_format_size(format)?;
+
+    let frame_size = sample_size.checked_mul(source_channels)?;
+    if frame_size == 0 || !data.len().is_multiple_of(frame_size) {
+        return None;
+    }
+
+    let frame_count = data.len() / frame_size;
+    let mut out = vec![0u8; frame_count * sample_size];
+
+    for (frame_idx, frame) in data.chunks(frame_size).enumerate() {
+        let mono = average_frame_sample(format, frame, sample_size, source_channels)?;
+        let start = frame_idx * sample_size;
+        write_sample_from_f64(format, mono, &mut out[start..start + sample_size]);
+    }
+
+    Some(out)
+}
+
+fn sample_format_size(format: SampleFormat) -> Option<usize> {
+    Some(match format {
+        SampleFormat::U8 => 1,
+        SampleFormat::I16 => 2,
+        SampleFormat::I32 => 4,
+        SampleFormat::I64 => 8,
+        SampleFormat::F32 => 4,
+        SampleFormat::F64 => 8,
+        _ => return None,
+    })
+}
+
+fn average_frame_sample(
+    format: SampleFormat,
+    frame: &[u8],
+    sample_size: usize,
+    channels: usize,
+) -> Option<f64> {
+    let mut sum = 0.0;
+    for ch in 0..channels {
+        let start = ch * sample_size;
+        let end = start + sample_size;
+        sum += sample_to_f64(format, &frame[start..end])?;
+    }
+
+    Some(sum / channels as f64)
+}
+
+fn sample_to_f64(format: SampleFormat, bytes: &[u8]) -> Option<f64> {
+    match format {
+        SampleFormat::U8 => bytes.first().copied().map(|v| v as f64),
+        SampleFormat::I16 => {
+            let mut buf = [0u8; 2];
+            buf.copy_from_slice(bytes);
+            Some(i16::from_ne_bytes(buf) as f64)
+        }
+        SampleFormat::I32 => {
+            let mut buf = [0u8; 4];
+            buf.copy_from_slice(bytes);
+            Some(i32::from_ne_bytes(buf) as f64)
+        }
+        SampleFormat::I64 => {
+            let mut buf = [0u8; 8];
+            buf.copy_from_slice(bytes);
+            Some(i64::from_ne_bytes(buf) as f64)
+        }
+        SampleFormat::F32 => {
+            let mut buf = [0u8; 4];
+            buf.copy_from_slice(bytes);
+            Some(f32::from_ne_bytes(buf) as f64)
+        }
+        SampleFormat::F64 => {
+            let mut buf = [0u8; 8];
+            buf.copy_from_slice(bytes);
+            Some(f64::from_ne_bytes(buf))
+        }
+        _ => None,
+    }
+}
+
+fn write_sample_from_f64(format: SampleFormat, value: f64, out: &mut [u8]) {
+    match format {
+        SampleFormat::U8 => {
+            let sample = value.round().clamp(u8::MIN as f64, u8::MAX as f64) as u8;
+            out[0] = sample;
+        }
+        SampleFormat::I16 => {
+            let sample = value.round().clamp(i16::MIN as f64, i16::MAX as f64) as i16;
+            out.copy_from_slice(&sample.to_ne_bytes());
+        }
+        SampleFormat::I32 => {
+            let sample = value.round().clamp(i32::MIN as f64, i32::MAX as f64) as i32;
+            out.copy_from_slice(&sample.to_ne_bytes());
+        }
+        SampleFormat::I64 => {
+            let sample = value.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64;
+            out.copy_from_slice(&sample.to_ne_bytes());
+        }
+        SampleFormat::F32 => {
+            let sample = value as f32;
+            out.copy_from_slice(&sample.to_ne_bytes());
+        }
+        SampleFormat::F64 => {
+            out.copy_from_slice(&value.to_ne_bytes());
+        }
+        _ => {}
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn downmixes_stereo_f32_to_mono() {
+        let frames = [(0.5f32, -0.25f32), (1.0f32, 1.0f32)];
+        let mut data = Vec::new();
+
+        for (left, right) in frames {
+            data.extend_from_slice(&left.to_ne_bytes());
+            data.extend_from_slice(&right.to_ne_bytes());
+        }
+
+        let downmixed = maybe_downmix_channels(&data, SampleFormat::F32, 2, 1);
+        let owned = downmixed.into_owned();
+        assert_eq!(owned.len(), frames.len() * std::mem::size_of::<f32>());
+
+        let first = f32::from_ne_bytes(owned[0..4].try_into().unwrap());
+        let second = f32::from_ne_bytes(owned[4..8].try_into().unwrap());
+
+        assert!((first - 0.125).abs() < f32::EPSILON);
+        assert!((second - 1.0).abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn leaves_mono_buffers_untouched() {
+        let sample = 0.75f32;
+        let data = sample.to_ne_bytes().to_vec();
+        let result = maybe_downmix_channels(&data, SampleFormat::F32, 1, 1);
+        assert!(matches!(result, Cow::Borrowed(_)));
+    }
+}