Hal0ai · thinmintdev · May 17, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/src/hal0/api/middleware/auth.py b/src/hal0/api/middleware/auth.py
@@ -38,7 +38,7 @@
 
   - ``GET  /api/health/system``        — liveness probe
   - ``GET  /api/status``               — dashboard liveness ping
-  - ``GET  /api/metrics``              — prometheus / dashboard scrape
+  - ``GET  /api/metrics``              — JSON metrics / dashboard scrape
   - ``GET  /api/features``             — feature-flag inspection
   - ``GET  /api/install/state``        — first-run gating
   - ``POST /api/install/complete``     — first-run sentinel write
@@ -127,10 +127,16 @@
     {
         # Liveness / observability — must be reachable for monitoring
         # tools that pre-date a per-deployment token.
+        #
+        # NOTE (issue #36): /api/metrics/prometheus used to live here but
+        # no Prometheus exposition route ever shipped. Listing a 404 path
+        # as "public" was confusing for scraper operators following the
+        # documented bypass list. Add it back if/when a real exporter
+        # ships — keep this list in lockstep with packaging/caddy's
+        # @public matcher.
         "/api/health/system",
         "/api/status",
         "/api/metrics",
-        "/api/metrics/prometheus",
         "/api/features",
         # First-run wizard gating — the dashboard hits these before the
         # user has any chance to mint a token.

diff --git a/src/hal0/api/routes/health.py b/src/hal0/api/routes/health.py
@@ -4,9 +4,12 @@
   GET  /api/status            — overall liveness + summary (dashboard polls this)
   GET  /api/health/system     — deep health (slots, disk, ram)
   GET  /api/metrics           — JSON metrics
-  GET  /api/metrics/prometheus — text/plain prometheus exposition
   GET  /api/features          — feature flags
   PUT  /api/features/{name}   — toggle feature flag
+
+Note (issue #36): a /api/metrics/prometheus route was advertised in this
+docstring and in PUBLIC_PATHS but was never implemented. Both stubs are
+removed until a real prometheus_client-backed exporter ships.
 """
 
 from __future__ import annotations

diff --git a/src/hal0/api/routes/updater.py b/src/hal0/api/routes/updater.py
@@ -191,7 +191,7 @@ async def check_updates(request: Request) -> dict[str, Any]:
 # ── /apply ─────────────────────────────────────────────────────────────────
 
 
-@router.post("/apply", dependencies=_writer)
+@router.post("/apply", status_code=202, dependencies=_writer)
 async def apply_update(request: Request) -> dict[str, Any]:
     """Kick off an update job in the background; return a job id.
 
@@ -200,8 +200,13 @@ async def apply_update(request: Request) -> dict[str, Any]:
         {"version": "0.1.0"}   # pin a specific version; omit for latest
 
     The actual update work happens in ``_run_apply_job`` which calls
-    ``Updater.apply()``. The route returns immediately with the job id;
-    poll ``/api/updates/status/{job_id}`` for state transitions.
+    ``Updater.apply()``. The route returns immediately with the queued-job
+    snapshot; poll ``/api/updates/status/{job_id}`` for state transitions.
+
+    Returns **202 Accepted** because the work is queued, not completed —
+    matches ``/api/models/{id}/pull`` and the rest of hal0's async-job
+    endpoints (issue #37). Failure paths still raise typed 4xx envelopes
+    via the middleware.
     """
     try:
         body = await request.json()

diff --git a/src/hal0/api/routes/v1.py b/src/hal0/api/routes/v1.py
@@ -131,8 +131,10 @@ async def _read_json_body(request: Request) -> dict[str, Any]:
 async def _dispatch_and_forward(
     request: Request,
     dispatcher: DispatcherDep,
+    body: dict[str, Any] | None = None,
 ) -> Response:
-    body = await _read_json_body(request)
+    if body is None:
+        body = await _read_json_body(request)
     call = await dispatcher.dispatch(request, body=body)
     # Remember the most recent model we sent to this upstream so the
     # dashboard's synthetic slot reflects what's actually being used,
@@ -228,14 +230,32 @@ async def audio_transcriptions(request: Request, dispatcher: DispatcherDep) -> R
     # raw multipart bytes unchanged so the upstream's own multipart parser
     # works. JSON re-encoding (the default dispatch path) would corrupt the
     # WAV payload.
-    return await _forward_multipart(request, dispatcher)
+    #
+    # Per OpenAI's contract the ``model`` form field is required. We surface
+    # the missing-model case as 400 (validation.invalid) rather than letting
+    # it fall through to the dispatcher's default-model + no-route 404,
+    # which obscured the real problem (issue #34).
+    return await _forward_multipart(request, dispatcher, require_model=True)
 
 
 @router.post("/audio/speech")
 async def audio_speech(request: Request, dispatcher: DispatcherDep) -> Response:
     # /v1/audio/speech is the TTS input direction — body is JSON
-    # ({"model": "...", "input": "...", "voice": "..."}). Standard path.
-    return await _dispatch_and_forward(request, dispatcher)
+    # ({"model": "...", "input": "...", "voice": "..."}). Standard path,
+    # but the ``model`` field is required by the OpenAI contract; raise
+    # 400 explicitly so the caller doesn't see a misleading 404 from the
+    # dispatcher's default-model fallback path (issue #34).
+    from hal0.errors import BadRequest
+
+    body = await _read_json_body(request)
+    model = body.get("model")
+    if not isinstance(model, str) or not model.strip():
+        raise BadRequest(
+            "Request body field 'model' is required",
+            details={"field": "model", "path": "/v1/audio/speech"},
+            code="validation.invalid",
+        )
+    return await _dispatch_and_forward(request, dispatcher, body=body)
 
 
 # ── /v1/images/generations (ComfyUI provider, hal0-managed translation) ────
@@ -403,7 +423,12 @@ def _extract_multipart_model(raw_body: bytes) -> str:
         return ""
 
 
-async def _forward_multipart(request: Request, dispatcher: DispatcherDep) -> Response:
+async def _forward_multipart(
+    request: Request,
+    dispatcher: DispatcherDep,
+    *,
+    require_model: bool = False,
+) -> Response:
     """Route a multipart request without re-serialising its body.
 
     The dispatcher's normal _remap_model path JSON-encodes the body, which
@@ -417,14 +442,27 @@ async def _forward_multipart(request: Request, dispatcher: DispatcherDep) -> Res
        so its route resolution still works.
     4. After dispatch picks an upstream, overwrite call.body with the
        original raw bytes + content-type header so httpx forwards verbatim.
+
+    When ``require_model`` is True, missing ``model`` raises a 400 BadRequest
+    instead of falling through to the dispatcher's default-model path
+    (issue #34 — surfaces a useful error to OpenAI clients that forgot it).
     """
     import httpx
 
+    from hal0.errors import BadRequest
+
     raw_body = await request.body()
     headers = dict(request.headers)
     content_type = headers.get("content-type") or "multipart/form-data"
     model_value = _extract_multipart_model(raw_body)
 
+    if require_model and not model_value:
+        raise BadRequest(
+            "Request body field 'model' is required",
+            details={"field": "model", "path": request.url.path},
+            code="validation.invalid",
+        )
+
     call = await dispatcher.dispatch(request, body={"model": model_value} if model_value else {})
 
     last_used = getattr(request.app.state, "last_used_model", None)

diff --git a/src/hal0/slots/manager.py b/src/hal0/slots/manager.py
@@ -1218,9 +1218,15 @@ async def _load_slot_config(self, slot_name: str) -> dict[str, Any]:
             # state.json record.  Real callers should always have a TOML.
             rec = self._states.get(slot_name)
             if rec is None:
-                raise SlotConfigError(
-                    f"slot config {path} not found and no in-memory state",
-                    details={"slot": slot_name},
+                # Issue #35: no TOML and no in-memory state means the slot
+                # simply doesn't exist — raise the 404-shaped SlotNotFound so
+                # the API surfaces 'slot.not_found' instead of the misleading
+                # 400 'slot.config_error'. A real config-parse failure on an
+                # existing slot still raises SlotConfigError below.
+                raise SlotNotFound(
+                    f"slot {slot_name!r} is not configured "
+                    f"(no config at {path} and no in-memory state)",
+                    details={"slot": slot_name, "path": str(path)},
                 )
             return {
                 "name": slot_name,

diff --git a/tests/api/test_slots_routes.py b/tests/api/test_slots_routes.py
@@ -306,6 +306,45 @@ def test_load_unknown_slot_returns_typed_envelope(
     assert r.json()["error"]["code"] == "slot.not_found"
 
 
+# ── issue #35: get_config on unknown slot → 404 not 400 ─────────────────────
+
+
+def test_get_config_unknown_slot_returns_404(
+    slot_root: Path,
+    isolated_client: TestClient,
+) -> None:
+    """GET /api/slots/doesntexist/config → 404 slot.not_found.
+
+    Pre-issue-#35 the route surfaced 400 'slot.config_error' because the
+    underlying _load_slot_config conflated "missing config file" with a
+    parse error. A UI distinguishing "slot doesn't exist" from "slot
+    exists but config is bad" got ambiguous signals. SlotManager.get_config
+    now raises SlotNotFound (404) when neither the TOML nor an in-memory
+    state record exists for the slot name.
+    """
+    r = isolated_client.get("/api/slots/doesntexist/config")
+    assert r.status_code == 404, r.text
+    assert r.json()["error"]["code"] == "slot.not_found"
+
+
+def test_get_config_invalid_toml_still_returns_400(
+    slot_root: Path,
+    isolated_client: TestClient,
+) -> None:
+    """An EXISTING slot with malformed TOML still surfaces 400 slot.config_error.
+
+    Acceptance for issue #35: the SlotNotFound mapping only applies to the
+    "no config and no state" branch. A real parse failure on a config that
+    is present on disk is still the operator's problem to fix, and the 400
+    code communicates "this slot exists but its config is broken".
+    """
+    # Write a syntactically broken TOML for a real slot name.
+    (slot_root / "broken.toml").write_text("name = \nport = not_an_int [oops", encoding="utf-8")
+    r = isolated_client.get("/api/slots/broken/config")
+    assert r.status_code == 400, r.text
+    assert r.json()["error"]["code"] == "slot.config_error"
+
+
 def test_unload_after_load_transitions_to_offline(
     slot_root: Path,
     systemctl_stub: dict[str, Any],

diff --git a/tests/api/test_updater_routes.py b/tests/api/test_updater_routes.py
@@ -105,9 +105,13 @@ def test_check_missing_manifest_returns_envelope(
 
 
 def test_apply_creates_a_queued_job_returning_id(isolated_client: TestClient) -> None:
-    """POST /api/updates/apply returns a job with id + state, runs in background."""
+    """POST /api/updates/apply returns a job with id + state, runs in background.
+
+    Status code is 202 Accepted (issue #37) — the route queues background
+    work and returns immediately, matching /api/models/{id}/pull's shape.
+    """
     r = isolated_client.post("/api/updates/apply", json={})
-    assert r.status_code == 200, r.text
+    assert r.status_code == 202, r.text
     body = r.json()
     assert "id" in body and isinstance(body["id"], str)
     assert body["state"] in ("queued", "running", "failed")

diff --git a/tests/api/test_v1_dispatch.py b/tests/api/test_v1_dispatch.py
@@ -61,3 +61,44 @@ def test_v1_routes_are_no_longer_501_stubs(client: TestClient) -> None:
         assert r.status_code != 501, f"{method} {path}: still a stub"
         if r.status_code >= 400:
             assert r.json()["error"]["code"] != "system.not_implemented"
+
+
+# ── issue #34: missing-model on /v1/audio/* → 400, not 404 ─────────────────
+
+
+def test_v1_audio_speech_missing_model_returns_400(client: TestClient) -> None:
+    """POST /v1/audio/speech without 'model' → 400 validation.invalid.
+
+    Pre-issue-#34 the dispatcher's default-model + no-route fallback
+    surfaced a confusing 404 ('dispatch.no_route'). The route now raises
+    BadRequest up front so OpenAI clients see a useful error message
+    naming the missing field.
+    """
+    r = client.post("/v1/audio/speech", json={"input": "hello", "voice": "alloy"})
+    assert r.status_code == 400, r.text
+    body = r.json()
+    assert body["error"]["code"] == "validation.invalid"
+    assert "model" in body["error"]["message"].lower()
+
+
+def test_v1_audio_speech_empty_model_returns_400(client: TestClient) -> None:
+    """An empty / whitespace-only model field is treated as missing."""
+    r = client.post("/v1/audio/speech", json={"model": "   ", "input": "hi"})
+    assert r.status_code == 400, r.text
+    assert r.json()["error"]["code"] == "validation.invalid"
+
+
+def test_v1_audio_transcriptions_missing_model_returns_400(client: TestClient) -> None:
+    """POST /v1/audio/transcriptions without a model form field → 400.
+
+    Multipart variant of the same contract — the regex-extracted model is
+    empty so the route raises BadRequest before dispatching to a default
+    that wouldn't route anyway.
+    """
+    # Minimal multipart body with a fake audio file but NO model field.
+    files = {"file": ("clip.wav", b"RIFFfake", "audio/wav")}
+    r = client.post("/v1/audio/transcriptions", files=files)
+    assert r.status_code == 400, r.text
+    body = r.json()
+    assert body["error"]["code"] == "validation.invalid"
+    assert "model" in body["error"]["message"].lower()