From ff7161c03bcd328ee50a9b90cda6f35906d38d26 Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Thu, 21 May 2026 00:14:10 -0400
Subject: [PATCH] fix(v1): redact ffmpeg argv + 400 on missing model for
 /v1/audio (#14 #18)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two harness findings on the OpenAI-compatible audio routes:

- #14: POST /v1/audio/transcriptions with a non-audio body could leak
  the moonshine container's ffmpeg argv + tempfile path through the
  proxy when an out-of-tree / older upstream returned a raw
  CalledProcessError repr. The dispatcher forwards upstream bodies
  verbatim by design, so the audio route now scrubs the response for
  the leak markers ("ffmpeg" / "CalledProcessError") and substitutes a
  clean 415 envelope with code=audio.unsupported_format. New
  UnsupportedMediaType subclass added to hal0.errors so other 415 call
  sites have an ergonomic on-ramp.

- #18: POST /v1/audio/speech with a body missing the 'model' field
  previously fell through the dispatcher's default-model + no-route
  path and surfaced a misleading 404. The route now raises BadRequest
  up front with code=request.missing_model so OpenAI clients see the
  real problem (matching field added on /v1/audio/transcriptions for
  consistency). Existing tests' code assertion updated from
  validation.invalid to request.missing_model.

Tests
=====
- tests/api/test_v1_audio.py (new) covers:
  * non-audio multipart → 415 audio.unsupported_format, no "ffmpeg" or
    "CalledProcessError" substring in the response body
  * clean upstream 5xx body passes through untouched (scrubber narrowness)
  * /v1/audio/speech missing model → 400 request.missing_model
  * /v1/audio/speech empty/whitespace model → 400 request.missing_model
  * happy-path STT + TTS via httpx.MockTransport round-trip
  * sanity: scrubber leaves non-audio routes alone

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hal0/api/routes/v1.py      |  72 +++++++-
 src/hal0/errors.py             |  22 +++
 tests/api/test_typed_errors.py |   2 +
 tests/api/test_v1_audio.py     | 299 +++++++++++++++++++++++++++++++++
 tests/api/test_v1_dispatch.py  |  13 +-
 5 files changed, 393 insertions(+), 15 deletions(-)
 create mode 100644 tests/api/test_v1_audio.py

diff --git a/src/hal0/api/routes/v1.py b/src/hal0/api/routes/v1.py
index 47481bac..c62b54d0 100644
--- a/src/hal0/api/routes/v1.py
+++ b/src/hal0/api/routes/v1.py
@@ -270,10 +270,11 @@ async def audio_transcriptions(request: Request, dispatcher: DispatcherDep) -> R
     # WAV payload.
     #
     # Per OpenAI's contract the ``model`` form field is required. We surface
-    # the missing-model case as 400 (validation.invalid) rather than letting
-    # it fall through to the dispatcher's default-model + no-route 404,
-    # which obscured the real problem (issue #34).
-    return await _forward_multipart(request, dispatcher, require_model=True)
+    # the missing-model case as 400 (request.missing_model) rather than
+    # letting it fall through to the dispatcher's default-model + no-route
+    # 404, which obscured the real problem (issue #34).
+    response = await _forward_multipart(request, dispatcher, require_model=True)
+    return _scrub_audio_decoder_leakage(response)
 
 
 @router.post("/audio/speech")
@@ -282,16 +283,16 @@ async def audio_speech(request: Request, dispatcher: DispatcherDep) -> Response:
     # ({"model": "...", "input": "...", "voice": "..."}). Standard path,
     # but the ``model`` field is required by the OpenAI contract; raise
     # 400 explicitly so the caller doesn't see a misleading 404 from the
-    # dispatcher's default-model fallback path (issue #34).
+    # dispatcher's default-model fallback path (issue #34 / harness #18).
     from hal0.errors import BadRequest
 
     body = await _read_json_body(request)
     model = body.get("model")
     if not isinstance(model, str) or not model.strip():
         raise BadRequest(
-            "Request body field 'model' is required",
+            "missing required field 'model'",
             details={"field": "model", "path": "/v1/audio/speech"},
-            code="validation.invalid",
+            code="request.missing_model",
         )
     return await _dispatch_and_forward(request, dispatcher, body=body)
 
@@ -437,6 +438,59 @@ class _ImageModelNotCurated(Hal0Error):
     )
 
 
+# Sentinel substrings whose presence in an upstream error body signals
+# that the audio decoder (ffmpeg) leaked its argv or CalledProcessError
+# repr through. Older / out-of-tree moonshine builds didn't redact this
+# before returning a 5xx, so the proxy must scrub defensively. Issue #14
+# (tests/harness/FINDINGS.md §14) — the hal0 envelope contract forbids
+# echoing subprocess argv or tempfile paths to clients.
+_AUDIO_DECODER_LEAK_MARKERS = (b"CalledProcessError", b"ffmpeg", b"FFmpeg", b"FFMPEG")
+
+
+def _scrub_audio_decoder_leakage(response: Response) -> Response:
+    """Replace a leaky upstream STT error with a clean hal0 415 envelope.
+
+    The moonshine container in this repo already converts ffmpeg decode
+    failures to a 415 with the ``audio.unsupported_format`` envelope and
+    no ``ffmpeg`` substring (see ``packaging/toolbox/moonshine/moonshine_server.py``).
+    But the proxy can't assume every reachable upstream is on that build
+    — older deployed images, third-party STT containers, or operator-side
+    decoders may still surface a ``CalledProcessError`` repr that includes
+    the subprocess argv and the user-supplied tempfile path. When we see
+    those markers, swap the response for a synthetic 415 carrying the hal0
+    envelope shape so callers never see implementation detail.
+
+    Only inspects non-streaming responses with a readable ``body`` attr;
+    StreamingResponse passes through untouched (STT responses aren't
+    streamed today, but if a future upstream does, the scrub is a no-op
+    rather than a body-drain).
+    """
+    if isinstance(response, StreamingResponse):
+        return response
+    body = getattr(response, "body", None)
+    if not isinstance(body, (bytes, bytearray)) or not body:
+        return response
+    # The upstream's bad path is by definition a non-2xx; ignore 2xx bodies
+    # that happen to mention ffmpeg in a metadata field.
+    if 200 <= response.status_code < 300:
+        return response
+    if not any(marker in body for marker in _AUDIO_DECODER_LEAK_MARKERS):
+        return response
+
+    envelope = {
+        "error": {
+            "code": "audio.unsupported_format",
+            "message": "unsupported audio format; expected wav/mp3/flac/ogg/m4a/webm",
+            "details": {"upstream_status": response.status_code},
+        }
+    }
+    return Response(
+        content=json.dumps(envelope).encode("utf-8"),
+        status_code=415,
+        media_type="application/json",
+    )
+
+
 _MODEL_FIELD_RE = re.compile(
     rb'Content-Disposition:\s*form-data;\s*name="model"\s*\r\n\r\n([^\r\n]+)',
     re.IGNORECASE,
@@ -496,9 +550,9 @@ async def _forward_multipart(
 
     if require_model and not model_value:
         raise BadRequest(
-            "Request body field 'model' is required",
+            "missing required field 'model'",
             details={"field": "model", "path": request.url.path},
-            code="validation.invalid",
+            code="request.missing_model",
         )
 
     call = await dispatcher.dispatch(request, body={"model": model_value} if model_value else {})
diff --git a/src/hal0/errors.py b/src/hal0/errors.py
index 54a76971..c9ffdefc 100644
--- a/src/hal0/errors.py
+++ b/src/hal0/errors.py
@@ -149,6 +149,27 @@ class Conflict(Hal0Error):
     status = 409
 
 
+class UnsupportedMediaType(Hal0Error):
+    """415 — the request body's media type is not one the route can handle.
+
+    Use this when the route received bytes labelled as one type but couldn't
+    decode them (e.g. ``/v1/audio/transcriptions`` got a multipart upload
+    whose file part wasn't actually audio the upstream could decode). The
+    default ``code`` lives in the ``audio.*`` namespace because that's the
+    only current call site; override per-raise for other media surfaces.
+
+    Example::
+
+        raise UnsupportedMediaType(
+            "unsupported audio format; expected wav/mp3/flac/ogg/m4a/webm",
+            code="audio.unsupported_format",
+        )
+    """
+
+    code = "audio.unsupported_format"
+    status = 415
+
+
 class UnprocessableEntity(Hal0Error):
     """422 — the request was well-formed but failed business-rule validation.
 
@@ -182,4 +203,5 @@ class UnprocessableEntity(Hal0Error):
     "NotFound",
     "Unauthorized",
     "UnprocessableEntity",
+    "UnsupportedMediaType",
 ]
diff --git a/tests/api/test_typed_errors.py b/tests/api/test_typed_errors.py
index 12d83121..c4aa3d53 100644
--- a/tests/api/test_typed_errors.py
+++ b/tests/api/test_typed_errors.py
@@ -33,6 +33,7 @@
     NotFound,
     Unauthorized,
     UnprocessableEntity,
+    UnsupportedMediaType,
 )
 
 # ── Fixtures ────────────────────────────────────────────────────────────────
@@ -66,6 +67,7 @@ def client(app: FastAPI) -> TestClient:
         (Forbidden, 403, "auth.forbidden"),
         (NotFound, 404, "resource.not_found"),
         (Conflict, 409, "resource.conflict"),
+        (UnsupportedMediaType, 415, "audio.unsupported_format"),
         (UnprocessableEntity, 422, "validation.unprocessable"),
     ],
 )
diff --git a/tests/api/test_v1_audio.py b/tests/api/test_v1_audio.py
new file mode 100644
index 00000000..9b5de883
--- /dev/null
+++ b/tests/api/test_v1_audio.py
@@ -0,0 +1,299 @@
+"""Wiring tests for ``/v1/audio/*`` envelope semantics.
+
+Covers two harness findings:
+
+  * **#14** — STT pipeline must not leak the moonshine container's ffmpeg
+    argv / ``CalledProcessError`` repr through ``POST /v1/audio/transcriptions``
+    when the upload isn't a decodable audio format. The proxy scrubs any
+    upstream body carrying those markers and re-emits a clean 415 with the
+    hal0 envelope ``code="audio.unsupported_format"``.
+  * **#18** — ``POST /v1/audio/speech`` with the OpenAI body missing the
+    ``model`` field must return 400 with ``code="request.missing_model"``,
+    not the dispatcher's misleading 404 from the default-model fallback
+    path.
+
+Happy-path STT + TTS round-trips use ``httpx.MockTransport`` injected into
+the dispatcher's client so we exercise the full route → dispatcher →
+forward path without a live upstream container.
+"""
+
+from __future__ import annotations
+
+import httpx
+import pytest
+from fastapi.testclient import TestClient
+
+from hal0.upstreams.registry import Upstream
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+
+def _seed_stt_upstream(client: TestClient, port: int = 8089) -> None:
+    """Register a fake STT slot the dispatcher's legacy fallback will land on.
+
+    The legacy heuristics in ``hal0.dispatcher.proxy`` don't have a rule for
+    ``/v1/audio/transcriptions`` model ids that aren't FLM tag-style, so an
+    arbitrary STT model name falls through to the ``primary`` slot. We
+    register under that name so the dispatch resolves cleanly in tests
+    without having to install a registry binding.
+    """
+    client.app.state.upstreams.upsert(
+        Upstream(
+            name="primary",
+            kind="slot",
+            url=f"http://127.0.0.1:{port}/v1",
+            slot_name="primary",
+            auth_style="none",
+        )
+    )
+
+
+def _seed_tts_upstream(client: TestClient, port: int = 8090) -> None:
+    """Register a fake TTS slot the dispatcher's legacy fallback will land on.
+
+    Same fallthrough logic as STT — the dispatcher routes unknown ``model``
+    ids to ``primary``, so we register there.
+    """
+    client.app.state.upstreams.upsert(
+        Upstream(
+            name="primary",
+            kind="slot",
+            url=f"http://127.0.0.1:{port}/v1",
+            slot_name="primary",
+            auth_style="none",
+        )
+    )
+
+
+def _install_mock_transport(client: TestClient, handler: httpx.MockTransport | object) -> None:
+    """Swap the dispatcher's httpx client for one backed by ``handler``.
+
+    The dispatcher lazily constructs its own client on first use, so we
+    install ours BEFORE any /v1 request — once the lifespan has finished
+    (which the ``client`` fixture guarantees) but before the test fires.
+    """
+    if not isinstance(handler, httpx.MockTransport):
+        handler = httpx.MockTransport(handler)  # type: ignore[arg-type]
+    dispatcher = client.app.state.dispatcher
+    # Close any existing client first so we don't leak sockets.
+    if getattr(dispatcher, "_http_client", None) is not None:
+        # Best-effort sync close — the test client's event loop is paused
+        # between requests so a fire-and-forget close is acceptable here.
+        try:
+            import asyncio
+
+            asyncio.get_event_loop().run_until_complete(dispatcher.aclose())
+        except Exception:
+            pass
+    dispatcher._http_client = httpx.AsyncClient(transport=handler)
+    dispatcher._owns_http_client = True
+
+
+# ── #14: STT envelope on non-audio uploads ────────────────────────────────────
+
+
+def test_v1_audio_transcriptions_redacts_ffmpeg_argv(client: TestClient) -> None:
+    """A non-audio multipart body must surface as 415 audio.unsupported_format.
+
+    Simulates an older / out-of-tree moonshine container that hasn't been
+    updated to redact its ``CalledProcessError`` — the upstream returns a
+    leaky 500 body, and the proxy must scrub it so the client never sees
+    the subprocess argv or the user-supplied tempfile path.
+    """
+    _seed_stt_upstream(client)
+
+    leaky_body = (
+        b"{\"detail\":\"Command '[\\'ffmpeg\\', \\'-y\\', \\'-i\\', "
+        b"'/tmp/tmpABC.bin']' returned non-zero exit status 1.\"}"
+    )
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        return httpx.Response(
+            500,
+            content=leaky_body,
+            headers={"content-type": "application/json"},
+        )
+
+    _install_mock_transport(client, handler)
+
+    files = {"file": ("not-audio.wav", b"this is not actually audio", "audio/wav")}
+    data = {"model": "moonshine-small-streaming-en"}
+    r = client.post("/v1/audio/transcriptions", files=files, data=data)
+
+    assert r.status_code == 415, r.text
+    body_text = r.text
+    body_lower = body_text.lower()
+    # Acceptance per harness #14: no subprocess argv leaks through the proxy.
+    assert "ffmpeg" not in body_lower, body_text
+    assert "calledprocesserror" not in body_lower, body_text
+    # And no tempfile path either (defence in depth — covered by the marker
+    # check, but we assert explicitly so a future regex regression breaks
+    # here rather than silently passing).
+    assert "/tmp/" not in body_text, body_text
+
+    payload = r.json()
+    assert payload["error"]["code"] == "audio.unsupported_format"
+    assert "unsupported audio format" in payload["error"]["message"].lower()
+
+
+def test_v1_audio_transcriptions_clean_upstream_error_passes_through(
+    client: TestClient,
+) -> None:
+    """An upstream 5xx body that doesn't mention ffmpeg passes through verbatim.
+
+    Regression guard for the scrubber's narrowness — we only replace the body
+    when the leak markers are present. Unrelated upstream failures should
+    still surface their original envelope so callers can debug them.
+    """
+    _seed_stt_upstream(client)
+
+    clean_body = b'{"detail":"model not loaded"}'
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        return httpx.Response(
+            503,
+            content=clean_body,
+            headers={"content-type": "application/json"},
+        )
+
+    _install_mock_transport(client, handler)
+
+    files = {"file": ("audio.wav", b"RIFF" + b"\x00" * 1024, "audio/wav")}
+    data = {"model": "moonshine-small-streaming-en"}
+    r = client.post("/v1/audio/transcriptions", files=files, data=data)
+
+    assert r.status_code == 503, r.text
+    assert r.json()["detail"] == "model not loaded"
+
+
+# ── #18: TTS missing-model envelope ───────────────────────────────────────────
+
+
+def test_v1_audio_speech_missing_model_returns_400_envelope(client: TestClient) -> None:
+    """POST /v1/audio/speech without ``model`` → 400 request.missing_model.
+
+    The dispatcher's no-route branch used to return 404 ('dispatch.no_route')
+    when no model was supplied — misleading because the route IS mounted, the
+    request just lacked a required field. Harness finding #18.
+    """
+    r = client.post(
+        "/v1/audio/speech",
+        json={"input": "hello world", "voice": "af_bella"},
+    )
+    assert r.status_code == 400, r.text
+    body = r.json()
+    assert body["error"]["code"] == "request.missing_model"
+    assert "model" in body["error"]["message"].lower()
+    # Defence in depth: the dispatcher's 404 envelope must not slip through.
+    assert body["error"]["code"] != "dispatch.no_route"
+
+
+def test_v1_audio_speech_empty_model_returns_400_envelope(client: TestClient) -> None:
+    """Whitespace-only ``model`` is treated as missing."""
+    r = client.post(
+        "/v1/audio/speech",
+        json={"model": "   ", "input": "hi", "voice": "af_bella"},
+    )
+    assert r.status_code == 400, r.text
+    assert r.json()["error"]["code"] == "request.missing_model"
+
+
+# ── Happy paths (mocked upstream) ─────────────────────────────────────────────
+
+
+def test_v1_audio_transcriptions_happy_path(client: TestClient) -> None:
+    """A well-formed multipart STT request reaches the upstream and returns its body."""
+    _seed_stt_upstream(client)
+
+    expected = {"text": "hello world"}
+
+    captured: dict[str, object] = {}
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        captured["url"] = str(req.url)
+        captured["method"] = req.method
+        captured["content_type"] = req.headers.get("content-type", "")
+        captured["body"] = req.content
+        return httpx.Response(200, json=expected)
+
+    _install_mock_transport(client, handler)
+
+    files = {"file": ("clip.wav", b"RIFF" + b"\x00" * 64, "audio/wav")}
+    data = {"model": "moonshine-small-streaming-en"}
+    r = client.post("/v1/audio/transcriptions", files=files, data=data)
+
+    assert r.status_code == 200, r.text
+    assert r.json() == expected
+    # The route forwards multipart bytes verbatim — content-type must
+    # preserve the multipart boundary or the upstream's parser would fail.
+    assert captured["content_type"].startswith("multipart/form-data")
+    # And the multipart body must reach the upstream un-rewritten (the
+    # request bytes carry the model field on the wire).
+    assert b'name="model"' in captured["body"]  # type: ignore[operator]
+
+
+def test_v1_audio_speech_happy_path(client: TestClient) -> None:
+    """A well-formed TTS request streams the upstream's audio bytes back."""
+    _seed_tts_upstream(client)
+
+    fake_wav = b"RIFF\x24\x00\x00\x00WAVEfmt " + b"\x00" * 32
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        return httpx.Response(
+            200,
+            content=fake_wav,
+            headers={"content-type": "audio/wav"},
+        )
+
+    _install_mock_transport(client, handler)
+
+    r = client.post(
+        "/v1/audio/speech",
+        json={"model": "tts", "input": "hello", "voice": "af_bella"},
+    )
+
+    assert r.status_code == 200, r.text
+    assert r.content == fake_wav
+    assert r.headers["content-type"].startswith("audio/wav")
+
+
+# ── Sanity: the scrubber leaves non-audio routes alone ────────────────────────
+
+
+@pytest.mark.parametrize(
+    "path,body",
+    [
+        ("/v1/chat/completions", {"model": "primary", "messages": []}),
+    ],
+)
+def test_scrubber_does_not_touch_non_audio_routes(
+    client: TestClient, path: str, body: dict[str, object]
+) -> None:
+    """The audio leakage scrubber is scoped to ``/v1/audio/transcriptions``.
+
+    A 500 body mentioning ffmpeg on a chat route (e.g. some upstream's stack
+    trace) must pass through untouched — only the STT route applies the
+    scrub. This protects callers that legitimately surface ffmpeg-adjacent
+    text in non-audio responses.
+    """
+    client.app.state.upstreams.upsert(
+        Upstream(
+            name="primary",
+            kind="slot",
+            url="http://127.0.0.1:8081/v1",
+            slot_name="primary",
+            auth_style="none",
+        )
+    )
+
+    leaky_body = b'{"detail":"trace mentions ffmpeg somewhere"}'
+
+    def handler(req: httpx.Request) -> httpx.Response:
+        return httpx.Response(500, content=leaky_body)
+
+    _install_mock_transport(client, handler)
+
+    r = client.post(path, json=body)
+    assert r.status_code == 500
+    # Body unchanged — the chat route never touches the scrubber.
+    assert "ffmpeg" in r.text
diff --git a/tests/api/test_v1_dispatch.py b/tests/api/test_v1_dispatch.py
index 88f67e70..0fd3c6c5 100644
--- a/tests/api/test_v1_dispatch.py
+++ b/tests/api/test_v1_dispatch.py
@@ -63,21 +63,22 @@ def test_v1_routes_are_no_longer_501_stubs(client: TestClient) -> None:
             assert r.json()["error"]["code"] != "system.not_implemented"
 
 
-# ── issue #34: missing-model on /v1/audio/* → 400, not 404 ─────────────────
+# ── issue #34 / harness #18: missing-model on /v1/audio/* → 400, not 404 ───
 
 
 def test_v1_audio_speech_missing_model_returns_400(client: TestClient) -> None:
-    """POST /v1/audio/speech without 'model' → 400 validation.invalid.
+    """POST /v1/audio/speech without 'model' → 400 request.missing_model.
 
     Pre-issue-#34 the dispatcher's default-model + no-route fallback
     surfaced a confusing 404 ('dispatch.no_route'). The route now raises
     BadRequest up front so OpenAI clients see a useful error message
-    naming the missing field.
+    naming the missing field. Code lives in the ``request.*`` namespace
+    per harness finding #18.
     """
     r = client.post("/v1/audio/speech", json={"input": "hello", "voice": "alloy"})
     assert r.status_code == 400, r.text
     body = r.json()
-    assert body["error"]["code"] == "validation.invalid"
+    assert body["error"]["code"] == "request.missing_model"
     assert "model" in body["error"]["message"].lower()
 
 
@@ -85,7 +86,7 @@ def test_v1_audio_speech_empty_model_returns_400(client: TestClient) -> None:
     """An empty / whitespace-only model field is treated as missing."""
     r = client.post("/v1/audio/speech", json={"model": "   ", "input": "hi"})
     assert r.status_code == 400, r.text
-    assert r.json()["error"]["code"] == "validation.invalid"
+    assert r.json()["error"]["code"] == "request.missing_model"
 
 
 def test_v1_audio_transcriptions_missing_model_returns_400(client: TestClient) -> None:
@@ -100,5 +101,5 @@ def test_v1_audio_transcriptions_missing_model_returns_400(client: TestClient) -
     r = client.post("/v1/audio/transcriptions", files=files)
     assert r.status_code == 400, r.text
     body = r.json()
-    assert body["error"]["code"] == "validation.invalid"
+    assert body["error"]["code"] == "request.missing_model"
     assert "model" in body["error"]["message"].lower()