From ff7161c03bcd328ee50a9b90cda6f35906d38d26 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 21 May 2026 00:14:10 -0400 Subject: [PATCH] fix(v1): redact ffmpeg argv + 400 on missing model for /v1/audio (#14 #18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two harness findings on the OpenAI-compatible audio routes: - #14: POST /v1/audio/transcriptions with a non-audio body could leak the moonshine container's ffmpeg argv + tempfile path through the proxy when an out-of-tree / older upstream returned a raw CalledProcessError repr. The dispatcher forwards upstream bodies verbatim by design, so the audio route now scrubs the response for the leak markers ("ffmpeg" / "CalledProcessError") and substitutes a clean 415 envelope with code=audio.unsupported_format. New UnsupportedMediaType subclass added to hal0.errors so other 415 call sites have an ergonomic on-ramp. - #18: POST /v1/audio/speech with a body missing the 'model' field previously fell through the dispatcher's default-model + no-route path and surfaced a misleading 404. The route now raises BadRequest up front with code=request.missing_model so OpenAI clients see the real problem (matching field added on /v1/audio/transcriptions for consistency). Existing tests' code assertion updated from validation.invalid to request.missing_model. Tests ===== - tests/api/test_v1_audio.py (new) covers: * non-audio multipart → 415 audio.unsupported_format, no "ffmpeg" or "CalledProcessError" substring in the response body * clean upstream 5xx body passes through untouched (scrubber narrowness) * /v1/audio/speech missing model → 400 request.missing_model * /v1/audio/speech empty/whitespace model → 400 request.missing_model * happy-path STT + TTS via httpx.MockTransport round-trip * sanity: scrubber leaves non-audio routes alone Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hal0/api/routes/v1.py | 72 +++++++- src/hal0/errors.py | 22 +++ tests/api/test_typed_errors.py | 2 + tests/api/test_v1_audio.py | 299 +++++++++++++++++++++++++++++++++ tests/api/test_v1_dispatch.py | 13 +- 5 files changed, 393 insertions(+), 15 deletions(-) create mode 100644 tests/api/test_v1_audio.py diff --git a/src/hal0/api/routes/v1.py b/src/hal0/api/routes/v1.py index 47481bac..c62b54d0 100644 --- a/src/hal0/api/routes/v1.py +++ b/src/hal0/api/routes/v1.py @@ -270,10 +270,11 @@ async def audio_transcriptions(request: Request, dispatcher: DispatcherDep) -> R # WAV payload. # # Per OpenAI's contract the ``model`` form field is required. We surface - # the missing-model case as 400 (validation.invalid) rather than letting - # it fall through to the dispatcher's default-model + no-route 404, - # which obscured the real problem (issue #34). - return await _forward_multipart(request, dispatcher, require_model=True) + # the missing-model case as 400 (request.missing_model) rather than + # letting it fall through to the dispatcher's default-model + no-route + # 404, which obscured the real problem (issue #34). + response = await _forward_multipart(request, dispatcher, require_model=True) + return _scrub_audio_decoder_leakage(response) @router.post("/audio/speech") @@ -282,16 +283,16 @@ async def audio_speech(request: Request, dispatcher: DispatcherDep) -> Response: # ({"model": "...", "input": "...", "voice": "..."}). Standard path, # but the ``model`` field is required by the OpenAI contract; raise # 400 explicitly so the caller doesn't see a misleading 404 from the - # dispatcher's default-model fallback path (issue #34). + # dispatcher's default-model fallback path (issue #34 / harness #18). from hal0.errors import BadRequest body = await _read_json_body(request) model = body.get("model") if not isinstance(model, str) or not model.strip(): raise BadRequest( - "Request body field 'model' is required", + "missing required field 'model'", details={"field": "model", "path": "/v1/audio/speech"}, - code="validation.invalid", + code="request.missing_model", ) return await _dispatch_and_forward(request, dispatcher, body=body) @@ -437,6 +438,59 @@ class _ImageModelNotCurated(Hal0Error): ) +# Sentinel substrings whose presence in an upstream error body signals +# that the audio decoder (ffmpeg) leaked its argv or CalledProcessError +# repr through. Older / out-of-tree moonshine builds didn't redact this +# before returning a 5xx, so the proxy must scrub defensively. Issue #14 +# (tests/harness/FINDINGS.md §14) — the hal0 envelope contract forbids +# echoing subprocess argv or tempfile paths to clients. +_AUDIO_DECODER_LEAK_MARKERS = (b"CalledProcessError", b"ffmpeg", b"FFmpeg", b"FFMPEG") + + +def _scrub_audio_decoder_leakage(response: Response) -> Response: + """Replace a leaky upstream STT error with a clean hal0 415 envelope. + + The moonshine container in this repo already converts ffmpeg decode + failures to a 415 with the ``audio.unsupported_format`` envelope and + no ``ffmpeg`` substring (see ``packaging/toolbox/moonshine/moonshine_server.py``). + But the proxy can't assume every reachable upstream is on that build + — older deployed images, third-party STT containers, or operator-side + decoders may still surface a ``CalledProcessError`` repr that includes + the subprocess argv and the user-supplied tempfile path. When we see + those markers, swap the response for a synthetic 415 carrying the hal0 + envelope shape so callers never see implementation detail. + + Only inspects non-streaming responses with a readable ``body`` attr; + StreamingResponse passes through untouched (STT responses aren't + streamed today, but if a future upstream does, the scrub is a no-op + rather than a body-drain). + """ + if isinstance(response, StreamingResponse): + return response + body = getattr(response, "body", None) + if not isinstance(body, (bytes, bytearray)) or not body: + return response + # The upstream's bad path is by definition a non-2xx; ignore 2xx bodies + # that happen to mention ffmpeg in a metadata field. + if 200 <= response.status_code < 300: + return response + if not any(marker in body for marker in _AUDIO_DECODER_LEAK_MARKERS): + return response + + envelope = { + "error": { + "code": "audio.unsupported_format", + "message": "unsupported audio format; expected wav/mp3/flac/ogg/m4a/webm", + "details": {"upstream_status": response.status_code}, + } + } + return Response( + content=json.dumps(envelope).encode("utf-8"), + status_code=415, + media_type="application/json", + ) + + _MODEL_FIELD_RE = re.compile( rb'Content-Disposition:\s*form-data;\s*name="model"\s*\r\n\r\n([^\r\n]+)', re.IGNORECASE, @@ -496,9 +550,9 @@ async def _forward_multipart( if require_model and not model_value: raise BadRequest( - "Request body field 'model' is required", + "missing required field 'model'", details={"field": "model", "path": request.url.path}, - code="validation.invalid", + code="request.missing_model", ) call = await dispatcher.dispatch(request, body={"model": model_value} if model_value else {}) diff --git a/src/hal0/errors.py b/src/hal0/errors.py index 54a76971..c9ffdefc 100644 --- a/src/hal0/errors.py +++ b/src/hal0/errors.py @@ -149,6 +149,27 @@ class Conflict(Hal0Error): status = 409 +class UnsupportedMediaType(Hal0Error): + """415 — the request body's media type is not one the route can handle. + + Use this when the route received bytes labelled as one type but couldn't + decode them (e.g. ``/v1/audio/transcriptions`` got a multipart upload + whose file part wasn't actually audio the upstream could decode). The + default ``code`` lives in the ``audio.*`` namespace because that's the + only current call site; override per-raise for other media surfaces. + + Example:: + + raise UnsupportedMediaType( + "unsupported audio format; expected wav/mp3/flac/ogg/m4a/webm", + code="audio.unsupported_format", + ) + """ + + code = "audio.unsupported_format" + status = 415 + + class UnprocessableEntity(Hal0Error): """422 — the request was well-formed but failed business-rule validation. @@ -182,4 +203,5 @@ class UnprocessableEntity(Hal0Error): "NotFound", "Unauthorized", "UnprocessableEntity", + "UnsupportedMediaType", ] diff --git a/tests/api/test_typed_errors.py b/tests/api/test_typed_errors.py index 12d83121..c4aa3d53 100644 --- a/tests/api/test_typed_errors.py +++ b/tests/api/test_typed_errors.py @@ -33,6 +33,7 @@ NotFound, Unauthorized, UnprocessableEntity, + UnsupportedMediaType, ) # ── Fixtures ──────────────────────────────────────────────────────────────── @@ -66,6 +67,7 @@ def client(app: FastAPI) -> TestClient: (Forbidden, 403, "auth.forbidden"), (NotFound, 404, "resource.not_found"), (Conflict, 409, "resource.conflict"), + (UnsupportedMediaType, 415, "audio.unsupported_format"), (UnprocessableEntity, 422, "validation.unprocessable"), ], ) diff --git a/tests/api/test_v1_audio.py b/tests/api/test_v1_audio.py new file mode 100644 index 00000000..9b5de883 --- /dev/null +++ b/tests/api/test_v1_audio.py @@ -0,0 +1,299 @@ +"""Wiring tests for ``/v1/audio/*`` envelope semantics. + +Covers two harness findings: + + * **#14** — STT pipeline must not leak the moonshine container's ffmpeg + argv / ``CalledProcessError`` repr through ``POST /v1/audio/transcriptions`` + when the upload isn't a decodable audio format. The proxy scrubs any + upstream body carrying those markers and re-emits a clean 415 with the + hal0 envelope ``code="audio.unsupported_format"``. + * **#18** — ``POST /v1/audio/speech`` with the OpenAI body missing the + ``model`` field must return 400 with ``code="request.missing_model"``, + not the dispatcher's misleading 404 from the default-model fallback + path. + +Happy-path STT + TTS round-trips use ``httpx.MockTransport`` injected into +the dispatcher's client so we exercise the full route → dispatcher → +forward path without a live upstream container. +""" + +from __future__ import annotations + +import httpx +import pytest +from fastapi.testclient import TestClient + +from hal0.upstreams.registry import Upstream + +# ── Helpers ─────────────────────────────────────────────────────────────────── + + +def _seed_stt_upstream(client: TestClient, port: int = 8089) -> None: + """Register a fake STT slot the dispatcher's legacy fallback will land on. + + The legacy heuristics in ``hal0.dispatcher.proxy`` don't have a rule for + ``/v1/audio/transcriptions`` model ids that aren't FLM tag-style, so an + arbitrary STT model name falls through to the ``primary`` slot. We + register under that name so the dispatch resolves cleanly in tests + without having to install a registry binding. + """ + client.app.state.upstreams.upsert( + Upstream( + name="primary", + kind="slot", + url=f"http://127.0.0.1:{port}/v1", + slot_name="primary", + auth_style="none", + ) + ) + + +def _seed_tts_upstream(client: TestClient, port: int = 8090) -> None: + """Register a fake TTS slot the dispatcher's legacy fallback will land on. + + Same fallthrough logic as STT — the dispatcher routes unknown ``model`` + ids to ``primary``, so we register there. + """ + client.app.state.upstreams.upsert( + Upstream( + name="primary", + kind="slot", + url=f"http://127.0.0.1:{port}/v1", + slot_name="primary", + auth_style="none", + ) + ) + + +def _install_mock_transport(client: TestClient, handler: httpx.MockTransport | object) -> None: + """Swap the dispatcher's httpx client for one backed by ``handler``. + + The dispatcher lazily constructs its own client on first use, so we + install ours BEFORE any /v1 request — once the lifespan has finished + (which the ``client`` fixture guarantees) but before the test fires. + """ + if not isinstance(handler, httpx.MockTransport): + handler = httpx.MockTransport(handler) # type: ignore[arg-type] + dispatcher = client.app.state.dispatcher + # Close any existing client first so we don't leak sockets. + if getattr(dispatcher, "_http_client", None) is not None: + # Best-effort sync close — the test client's event loop is paused + # between requests so a fire-and-forget close is acceptable here. + try: + import asyncio + + asyncio.get_event_loop().run_until_complete(dispatcher.aclose()) + except Exception: + pass + dispatcher._http_client = httpx.AsyncClient(transport=handler) + dispatcher._owns_http_client = True + + +# ── #14: STT envelope on non-audio uploads ──────────────────────────────────── + + +def test_v1_audio_transcriptions_redacts_ffmpeg_argv(client: TestClient) -> None: + """A non-audio multipart body must surface as 415 audio.unsupported_format. + + Simulates an older / out-of-tree moonshine container that hasn't been + updated to redact its ``CalledProcessError`` — the upstream returns a + leaky 500 body, and the proxy must scrub it so the client never sees + the subprocess argv or the user-supplied tempfile path. + """ + _seed_stt_upstream(client) + + leaky_body = ( + b"{\"detail\":\"Command '[\\'ffmpeg\\', \\'-y\\', \\'-i\\', " + b"'/tmp/tmpABC.bin']' returned non-zero exit status 1.\"}" + ) + + def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response( + 500, + content=leaky_body, + headers={"content-type": "application/json"}, + ) + + _install_mock_transport(client, handler) + + files = {"file": ("not-audio.wav", b"this is not actually audio", "audio/wav")} + data = {"model": "moonshine-small-streaming-en"} + r = client.post("/v1/audio/transcriptions", files=files, data=data) + + assert r.status_code == 415, r.text + body_text = r.text + body_lower = body_text.lower() + # Acceptance per harness #14: no subprocess argv leaks through the proxy. + assert "ffmpeg" not in body_lower, body_text + assert "calledprocesserror" not in body_lower, body_text + # And no tempfile path either (defence in depth — covered by the marker + # check, but we assert explicitly so a future regex regression breaks + # here rather than silently passing). + assert "/tmp/" not in body_text, body_text + + payload = r.json() + assert payload["error"]["code"] == "audio.unsupported_format" + assert "unsupported audio format" in payload["error"]["message"].lower() + + +def test_v1_audio_transcriptions_clean_upstream_error_passes_through( + client: TestClient, +) -> None: + """An upstream 5xx body that doesn't mention ffmpeg passes through verbatim. + + Regression guard for the scrubber's narrowness — we only replace the body + when the leak markers are present. Unrelated upstream failures should + still surface their original envelope so callers can debug them. + """ + _seed_stt_upstream(client) + + clean_body = b'{"detail":"model not loaded"}' + + def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response( + 503, + content=clean_body, + headers={"content-type": "application/json"}, + ) + + _install_mock_transport(client, handler) + + files = {"file": ("audio.wav", b"RIFF" + b"\x00" * 1024, "audio/wav")} + data = {"model": "moonshine-small-streaming-en"} + r = client.post("/v1/audio/transcriptions", files=files, data=data) + + assert r.status_code == 503, r.text + assert r.json()["detail"] == "model not loaded" + + +# ── #18: TTS missing-model envelope ─────────────────────────────────────────── + + +def test_v1_audio_speech_missing_model_returns_400_envelope(client: TestClient) -> None: + """POST /v1/audio/speech without ``model`` → 400 request.missing_model. + + The dispatcher's no-route branch used to return 404 ('dispatch.no_route') + when no model was supplied — misleading because the route IS mounted, the + request just lacked a required field. Harness finding #18. + """ + r = client.post( + "/v1/audio/speech", + json={"input": "hello world", "voice": "af_bella"}, + ) + assert r.status_code == 400, r.text + body = r.json() + assert body["error"]["code"] == "request.missing_model" + assert "model" in body["error"]["message"].lower() + # Defence in depth: the dispatcher's 404 envelope must not slip through. + assert body["error"]["code"] != "dispatch.no_route" + + +def test_v1_audio_speech_empty_model_returns_400_envelope(client: TestClient) -> None: + """Whitespace-only ``model`` is treated as missing.""" + r = client.post( + "/v1/audio/speech", + json={"model": " ", "input": "hi", "voice": "af_bella"}, + ) + assert r.status_code == 400, r.text + assert r.json()["error"]["code"] == "request.missing_model" + + +# ── Happy paths (mocked upstream) ───────────────────────────────────────────── + + +def test_v1_audio_transcriptions_happy_path(client: TestClient) -> None: + """A well-formed multipart STT request reaches the upstream and returns its body.""" + _seed_stt_upstream(client) + + expected = {"text": "hello world"} + + captured: dict[str, object] = {} + + def handler(req: httpx.Request) -> httpx.Response: + captured["url"] = str(req.url) + captured["method"] = req.method + captured["content_type"] = req.headers.get("content-type", "") + captured["body"] = req.content + return httpx.Response(200, json=expected) + + _install_mock_transport(client, handler) + + files = {"file": ("clip.wav", b"RIFF" + b"\x00" * 64, "audio/wav")} + data = {"model": "moonshine-small-streaming-en"} + r = client.post("/v1/audio/transcriptions", files=files, data=data) + + assert r.status_code == 200, r.text + assert r.json() == expected + # The route forwards multipart bytes verbatim — content-type must + # preserve the multipart boundary or the upstream's parser would fail. + assert captured["content_type"].startswith("multipart/form-data") + # And the multipart body must reach the upstream un-rewritten (the + # request bytes carry the model field on the wire). + assert b'name="model"' in captured["body"] # type: ignore[operator] + + +def test_v1_audio_speech_happy_path(client: TestClient) -> None: + """A well-formed TTS request streams the upstream's audio bytes back.""" + _seed_tts_upstream(client) + + fake_wav = b"RIFF\x24\x00\x00\x00WAVEfmt " + b"\x00" * 32 + + def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + content=fake_wav, + headers={"content-type": "audio/wav"}, + ) + + _install_mock_transport(client, handler) + + r = client.post( + "/v1/audio/speech", + json={"model": "tts", "input": "hello", "voice": "af_bella"}, + ) + + assert r.status_code == 200, r.text + assert r.content == fake_wav + assert r.headers["content-type"].startswith("audio/wav") + + +# ── Sanity: the scrubber leaves non-audio routes alone ──────────────────────── + + +@pytest.mark.parametrize( + "path,body", + [ + ("/v1/chat/completions", {"model": "primary", "messages": []}), + ], +) +def test_scrubber_does_not_touch_non_audio_routes( + client: TestClient, path: str, body: dict[str, object] +) -> None: + """The audio leakage scrubber is scoped to ``/v1/audio/transcriptions``. + + A 500 body mentioning ffmpeg on a chat route (e.g. some upstream's stack + trace) must pass through untouched — only the STT route applies the + scrub. This protects callers that legitimately surface ffmpeg-adjacent + text in non-audio responses. + """ + client.app.state.upstreams.upsert( + Upstream( + name="primary", + kind="slot", + url="http://127.0.0.1:8081/v1", + slot_name="primary", + auth_style="none", + ) + ) + + leaky_body = b'{"detail":"trace mentions ffmpeg somewhere"}' + + def handler(req: httpx.Request) -> httpx.Response: + return httpx.Response(500, content=leaky_body) + + _install_mock_transport(client, handler) + + r = client.post(path, json=body) + assert r.status_code == 500 + # Body unchanged — the chat route never touches the scrubber. + assert "ffmpeg" in r.text diff --git a/tests/api/test_v1_dispatch.py b/tests/api/test_v1_dispatch.py index 88f67e70..0fd3c6c5 100644 --- a/tests/api/test_v1_dispatch.py +++ b/tests/api/test_v1_dispatch.py @@ -63,21 +63,22 @@ def test_v1_routes_are_no_longer_501_stubs(client: TestClient) -> None: assert r.json()["error"]["code"] != "system.not_implemented" -# ── issue #34: missing-model on /v1/audio/* → 400, not 404 ───────────────── +# ── issue #34 / harness #18: missing-model on /v1/audio/* → 400, not 404 ─── def test_v1_audio_speech_missing_model_returns_400(client: TestClient) -> None: - """POST /v1/audio/speech without 'model' → 400 validation.invalid. + """POST /v1/audio/speech without 'model' → 400 request.missing_model. Pre-issue-#34 the dispatcher's default-model + no-route fallback surfaced a confusing 404 ('dispatch.no_route'). The route now raises BadRequest up front so OpenAI clients see a useful error message - naming the missing field. + naming the missing field. Code lives in the ``request.*`` namespace + per harness finding #18. """ r = client.post("/v1/audio/speech", json={"input": "hello", "voice": "alloy"}) assert r.status_code == 400, r.text body = r.json() - assert body["error"]["code"] == "validation.invalid" + assert body["error"]["code"] == "request.missing_model" assert "model" in body["error"]["message"].lower() @@ -85,7 +86,7 @@ def test_v1_audio_speech_empty_model_returns_400(client: TestClient) -> None: """An empty / whitespace-only model field is treated as missing.""" r = client.post("/v1/audio/speech", json={"model": " ", "input": "hi"}) assert r.status_code == 400, r.text - assert r.json()["error"]["code"] == "validation.invalid" + assert r.json()["error"]["code"] == "request.missing_model" def test_v1_audio_transcriptions_missing_model_returns_400(client: TestClient) -> None: @@ -100,5 +101,5 @@ def test_v1_audio_transcriptions_missing_model_returns_400(client: TestClient) - r = client.post("/v1/audio/transcriptions", files=files) assert r.status_code == 400, r.text body = r.json() - assert body["error"]["code"] == "validation.invalid" + assert body["error"]["code"] == "request.missing_model" assert "model" in body["error"]["message"].lower()