Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 105 additions & 2 deletions stackvox/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@

from __future__ import annotations

import ctypes
import ctypes.util
import json
import logging
import os
import queue
import signal
import socket
import socketserver
import sys
import threading

import sounddevice as sd
Expand All @@ -38,14 +41,26 @@
RECV_BYTES = 1024


# Set when PortAudio's cached default-output device is suspected stale and
# needs Pa_Terminate / Pa_Initialize before the next playback. Initial state
# is True so the first playback always refreshes; the macOS device watcher
# re-sets it on real device changes; the worker also re-sets on playback
# failure as a belt-and-suspenders retry path.
_audio_dirty = threading.Event()
_audio_dirty.set()

# Holds CoreAudio callback references so they aren't garbage collected.
_ca_refs: list = []


def _refresh_audio_devices() -> None:
"""Reset PortAudio so the next play picks up the current system default.

PortAudio caches the default output device at init time; without this the
daemon keeps playing to whatever was default when it started (e.g. the
built-in speakers after the user swapped to Bluetooth). Terminating and
re-initialising is the only portable way to refresh that cache. Costs
~10-50ms per call, which is invisible next to synthesis time.
~10-50ms per call.
"""
try:
sd._terminate()
Expand All @@ -54,21 +69,106 @@ def _refresh_audio_devices() -> None:
logger.exception("failed to refresh audio devices")


def _start_device_watcher() -> None:
"""macOS only: mark `_audio_dirty` when the default output device changes.

Avoids reinitialising PortAudio on every playback (the simpler approach,
which adds 10-50ms of latency before each speech). macOS notifies the
property listener on more than just real device changes — playback start,
volume changes, and other side effects all fire it — so we compare the
current default-output device ID against the last seen one and only mark
dirty on actual changes.

No-ops on non-macOS; the dirty flag stays at its initial state (set), so
the first playback refreshes once and subsequent playbacks reuse the
PortAudio context. Device changes on those platforms are handled by the
worker's failure-retry path.
"""
if sys.platform != "darwin":
return
try:
ca = ctypes.CDLL(ctypes.util.find_library("CoreAudio") or "")
cf = ctypes.CDLL(ctypes.util.find_library("CoreFoundation") or "")
except Exception:
logger.debug("CoreAudio unavailable; device watcher disabled")
return

class _PropAddr(ctypes.Structure):
_fields_ = [
("mSelector", ctypes.c_uint32),
("mScope", ctypes.c_uint32),
("mElement", ctypes.c_uint32),
]

_ListenerProc = ctypes.CFUNCTYPE(
ctypes.c_int32,
ctypes.c_uint32, # inObjectID
ctypes.c_uint32, # inNumberAddresses
ctypes.POINTER(_PropAddr),
ctypes.c_void_p,
)

prop = _PropAddr(
0x644F7574, # kAudioHardwarePropertyDefaultOutputDevice 'dOut'
0x676C6F62, # kAudioObjectPropertyScopeGlobal 'glob'
0, # kAudioObjectPropertyElementMain
)

def _read_default_device() -> int:
device = ctypes.c_uint32(0)
size = ctypes.c_uint32(ctypes.sizeof(device))
status = ca.AudioObjectGetPropertyData(
1, # kAudioObjectSystemObject
ctypes.byref(prop),
0,
None,
ctypes.byref(size),
ctypes.byref(device),
)
return device.value if status == 0 else 0

last_device = [_read_default_device()]

def _on_device_change(obj_id: int, n: int, addrs, data) -> int:
try:
current = _read_default_device()
if current and current != last_device[0]:
last_device[0] = current
_audio_dirty.set()
except Exception:
logger.debug("device-change callback error", exc_info=True)
return 0

cb = _ListenerProc(_on_device_change)
_ca_refs.append(cb) # prevent GC
ca.AudioObjectAddPropertyListener(1, ctypes.byref(prop), cb, None)

threading.Thread(
target=cf.CFRunLoopRun,
daemon=True,
name="audio-device-watcher",
).start()
logger.debug("audio device watcher started")


class _DaemonState:
def __init__(self, voice: str, speed: float, lang: str) -> None:
self.tts = Stackvox(voice=voice, speed=speed, lang=lang)
self.queue: queue.Queue[dict] = queue.Queue(maxsize=MAX_QUEUE)
self.stop_event = threading.Event()
self.worker = threading.Thread(target=self._worker, daemon=True)
self.worker.start()
_start_device_watcher()

def _worker(self) -> None:
while not self.stop_event.is_set():
try:
req = self.queue.get(timeout=WORKER_POLL_SECONDS)
except queue.Empty:
continue
_refresh_audio_devices()
if _audio_dirty.is_set():
_refresh_audio_devices()
_audio_dirty.clear()
Comment on lines +169 to +171
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Race condition: clear the dirty flag before refreshing, not after. If a device change fires during _refresh_audio_devices() (10-50 ms window), the subsequent clear() swallows it and the next playback targets the wrong device.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At stackvox/daemon.py, line 169:

<comment>Race condition: clear the dirty flag *before* refreshing, not after. If a device change fires during `_refresh_audio_devices()` (10-50 ms window), the subsequent `clear()` swallows it and the next playback targets the wrong device.</comment>

<file context>
@@ -54,21 +69,106 @@ def _refresh_audio_devices() -> None:
             except queue.Empty:
                 continue
-            _refresh_audio_devices()
+            if _audio_dirty.is_set():
+                _refresh_audio_devices()
+                _audio_dirty.clear()
</file context>
Suggested change
if _audio_dirty.is_set():
_refresh_audio_devices()
_audio_dirty.clear()
if _audio_dirty.is_set():
_audio_dirty.clear()
_refresh_audio_devices()

try:
self.tts.speak(
req["text"],
Expand All @@ -78,6 +178,9 @@ def _worker(self) -> None:
)
except Exception:
logger.exception("playback error")
# Failed playback might be a stale audio context; mark dirty
# so the next request refreshes before trying again.
_audio_dirty.set()

def submit(self, req: dict) -> bool:
try:
Expand Down
36 changes: 30 additions & 6 deletions tests/test_daemon_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,42 @@ def test_full_queue_returns_busy(server: ServerHarness):
assert reply in {"ok", "busy"}


def test_worker_refreshes_audio_devices_before_each_play(server: ServerHarness):
"""PortAudio is reset before every play so device switches are picked up."""
def test_worker_refreshes_audio_only_when_dirty(server: ServerHarness):
"""PortAudio is refreshed once per dirty cycle, not before every play.

Initial dirty state at startup → first playback refreshes. A second
playback with no device change → no extra refresh. Re-marking dirty
(simulating a default-output-device change) → next playback refreshes
again.
"""
import time

from stackvox import daemon

# Ensure a known starting state regardless of prior-test ordering: dirty
# so the first playback in this test refreshes once.
daemon._audio_dirty.set()

def wait_for_speak_count(target: int) -> None:
deadline = time.monotonic() + 1.0
while server.tts.speak.call_count < target and time.monotonic() < deadline:
time.sleep(0.01)

# First playback consumes the dirty flag.
assert _roundtrip(server.sock, json.dumps({"text": "a"}) + "\n") == "ok"
wait_for_speak_count(1)
assert server.refresh.call_count == 1

# Second playback with no device change → no additional refresh.
assert _roundtrip(server.sock, json.dumps({"text": "b"}) + "\n") == "ok"
wait_for_speak_count(2)
assert server.refresh.call_count == 1

deadline = time.monotonic() + 1.0
while server.tts.speak.call_count < 2 and time.monotonic() < deadline:
time.sleep(0.01)
# Simulate a real device change.
daemon._audio_dirty.set()

assert server.tts.speak.call_count == 2
assert _roundtrip(server.sock, json.dumps({"text": "c"}) + "\n") == "ok"
wait_for_speak_count(3)
assert server.refresh.call_count == 2


Expand Down