diff --git a/src/hal0/api/middleware/auth.py b/src/hal0/api/middleware/auth.py index 599bb2d4..21c99bab 100644 --- a/src/hal0/api/middleware/auth.py +++ b/src/hal0/api/middleware/auth.py @@ -38,7 +38,7 @@ - ``GET /api/health/system`` — liveness probe - ``GET /api/status`` — dashboard liveness ping - - ``GET /api/metrics`` — prometheus / dashboard scrape + - ``GET /api/metrics`` — JSON metrics / dashboard scrape - ``GET /api/features`` — feature-flag inspection - ``GET /api/install/state`` — first-run gating - ``POST /api/install/complete`` — first-run sentinel write @@ -127,10 +127,16 @@ { # Liveness / observability — must be reachable for monitoring # tools that pre-date a per-deployment token. + # + # NOTE (issue #36): /api/metrics/prometheus used to live here but + # no Prometheus exposition route ever shipped. Listing a 404 path + # as "public" was confusing for scraper operators following the + # documented bypass list. Add it back if/when a real exporter + # ships — keep this list in lockstep with packaging/caddy's + # @public matcher. "/api/health/system", "/api/status", "/api/metrics", - "/api/metrics/prometheus", "/api/features", # First-run wizard gating — the dashboard hits these before the # user has any chance to mint a token. diff --git a/src/hal0/api/routes/health.py b/src/hal0/api/routes/health.py index 039d0a31..6547c04e 100644 --- a/src/hal0/api/routes/health.py +++ b/src/hal0/api/routes/health.py @@ -4,9 +4,12 @@ GET /api/status — overall liveness + summary (dashboard polls this) GET /api/health/system — deep health (slots, disk, ram) GET /api/metrics — JSON metrics - GET /api/metrics/prometheus — text/plain prometheus exposition GET /api/features — feature flags PUT /api/features/{name} — toggle feature flag + +Note (issue #36): a /api/metrics/prometheus route was advertised in this +docstring and in PUBLIC_PATHS but was never implemented. Both stubs are +removed until a real prometheus_client-backed exporter ships. """ from __future__ import annotations diff --git a/src/hal0/api/routes/updater.py b/src/hal0/api/routes/updater.py index e13507f4..0504be4c 100644 --- a/src/hal0/api/routes/updater.py +++ b/src/hal0/api/routes/updater.py @@ -191,7 +191,7 @@ async def check_updates(request: Request) -> dict[str, Any]: # ── /apply ───────────────────────────────────────────────────────────────── -@router.post("/apply", dependencies=_writer) +@router.post("/apply", status_code=202, dependencies=_writer) async def apply_update(request: Request) -> dict[str, Any]: """Kick off an update job in the background; return a job id. @@ -200,8 +200,13 @@ async def apply_update(request: Request) -> dict[str, Any]: {"version": "0.1.0"} # pin a specific version; omit for latest The actual update work happens in ``_run_apply_job`` which calls - ``Updater.apply()``. The route returns immediately with the job id; - poll ``/api/updates/status/{job_id}`` for state transitions. + ``Updater.apply()``. The route returns immediately with the queued-job + snapshot; poll ``/api/updates/status/{job_id}`` for state transitions. + + Returns **202 Accepted** because the work is queued, not completed — + matches ``/api/models/{id}/pull`` and the rest of hal0's async-job + endpoints (issue #37). Failure paths still raise typed 4xx envelopes + via the middleware. """ try: body = await request.json() diff --git a/src/hal0/api/routes/v1.py b/src/hal0/api/routes/v1.py index adcc0bf8..589dc356 100644 --- a/src/hal0/api/routes/v1.py +++ b/src/hal0/api/routes/v1.py @@ -131,8 +131,10 @@ async def _read_json_body(request: Request) -> dict[str, Any]: async def _dispatch_and_forward( request: Request, dispatcher: DispatcherDep, + body: dict[str, Any] | None = None, ) -> Response: - body = await _read_json_body(request) + if body is None: + body = await _read_json_body(request) call = await dispatcher.dispatch(request, body=body) # Remember the most recent model we sent to this upstream so the # dashboard's synthetic slot reflects what's actually being used, @@ -228,14 +230,32 @@ async def audio_transcriptions(request: Request, dispatcher: DispatcherDep) -> R # raw multipart bytes unchanged so the upstream's own multipart parser # works. JSON re-encoding (the default dispatch path) would corrupt the # WAV payload. - return await _forward_multipart(request, dispatcher) + # + # Per OpenAI's contract the ``model`` form field is required. We surface + # the missing-model case as 400 (validation.invalid) rather than letting + # it fall through to the dispatcher's default-model + no-route 404, + # which obscured the real problem (issue #34). + return await _forward_multipart(request, dispatcher, require_model=True) @router.post("/audio/speech") async def audio_speech(request: Request, dispatcher: DispatcherDep) -> Response: # /v1/audio/speech is the TTS input direction — body is JSON - # ({"model": "...", "input": "...", "voice": "..."}). Standard path. - return await _dispatch_and_forward(request, dispatcher) + # ({"model": "...", "input": "...", "voice": "..."}). Standard path, + # but the ``model`` field is required by the OpenAI contract; raise + # 400 explicitly so the caller doesn't see a misleading 404 from the + # dispatcher's default-model fallback path (issue #34). + from hal0.errors import BadRequest + + body = await _read_json_body(request) + model = body.get("model") + if not isinstance(model, str) or not model.strip(): + raise BadRequest( + "Request body field 'model' is required", + details={"field": "model", "path": "/v1/audio/speech"}, + code="validation.invalid", + ) + return await _dispatch_and_forward(request, dispatcher, body=body) # ── /v1/images/generations (ComfyUI provider, hal0-managed translation) ──── @@ -403,7 +423,12 @@ def _extract_multipart_model(raw_body: bytes) -> str: return "" -async def _forward_multipart(request: Request, dispatcher: DispatcherDep) -> Response: +async def _forward_multipart( + request: Request, + dispatcher: DispatcherDep, + *, + require_model: bool = False, +) -> Response: """Route a multipart request without re-serialising its body. The dispatcher's normal _remap_model path JSON-encodes the body, which @@ -417,14 +442,27 @@ async def _forward_multipart(request: Request, dispatcher: DispatcherDep) -> Res so its route resolution still works. 4. After dispatch picks an upstream, overwrite call.body with the original raw bytes + content-type header so httpx forwards verbatim. + + When ``require_model`` is True, missing ``model`` raises a 400 BadRequest + instead of falling through to the dispatcher's default-model path + (issue #34 — surfaces a useful error to OpenAI clients that forgot it). """ import httpx + from hal0.errors import BadRequest + raw_body = await request.body() headers = dict(request.headers) content_type = headers.get("content-type") or "multipart/form-data" model_value = _extract_multipart_model(raw_body) + if require_model and not model_value: + raise BadRequest( + "Request body field 'model' is required", + details={"field": "model", "path": request.url.path}, + code="validation.invalid", + ) + call = await dispatcher.dispatch(request, body={"model": model_value} if model_value else {}) last_used = getattr(request.app.state, "last_used_model", None) diff --git a/src/hal0/slots/manager.py b/src/hal0/slots/manager.py index b981d162..7540c4e3 100644 --- a/src/hal0/slots/manager.py +++ b/src/hal0/slots/manager.py @@ -1218,9 +1218,15 @@ async def _load_slot_config(self, slot_name: str) -> dict[str, Any]: # state.json record. Real callers should always have a TOML. rec = self._states.get(slot_name) if rec is None: - raise SlotConfigError( - f"slot config {path} not found and no in-memory state", - details={"slot": slot_name}, + # Issue #35: no TOML and no in-memory state means the slot + # simply doesn't exist — raise the 404-shaped SlotNotFound so + # the API surfaces 'slot.not_found' instead of the misleading + # 400 'slot.config_error'. A real config-parse failure on an + # existing slot still raises SlotConfigError below. + raise SlotNotFound( + f"slot {slot_name!r} is not configured " + f"(no config at {path} and no in-memory state)", + details={"slot": slot_name, "path": str(path)}, ) return { "name": slot_name, diff --git a/tests/api/test_slots_routes.py b/tests/api/test_slots_routes.py index d99f0dc4..51dbcbb1 100644 --- a/tests/api/test_slots_routes.py +++ b/tests/api/test_slots_routes.py @@ -306,6 +306,45 @@ def test_load_unknown_slot_returns_typed_envelope( assert r.json()["error"]["code"] == "slot.not_found" +# ── issue #35: get_config on unknown slot → 404 not 400 ───────────────────── + + +def test_get_config_unknown_slot_returns_404( + slot_root: Path, + isolated_client: TestClient, +) -> None: + """GET /api/slots/doesntexist/config → 404 slot.not_found. + + Pre-issue-#35 the route surfaced 400 'slot.config_error' because the + underlying _load_slot_config conflated "missing config file" with a + parse error. A UI distinguishing "slot doesn't exist" from "slot + exists but config is bad" got ambiguous signals. SlotManager.get_config + now raises SlotNotFound (404) when neither the TOML nor an in-memory + state record exists for the slot name. + """ + r = isolated_client.get("/api/slots/doesntexist/config") + assert r.status_code == 404, r.text + assert r.json()["error"]["code"] == "slot.not_found" + + +def test_get_config_invalid_toml_still_returns_400( + slot_root: Path, + isolated_client: TestClient, +) -> None: + """An EXISTING slot with malformed TOML still surfaces 400 slot.config_error. + + Acceptance for issue #35: the SlotNotFound mapping only applies to the + "no config and no state" branch. A real parse failure on a config that + is present on disk is still the operator's problem to fix, and the 400 + code communicates "this slot exists but its config is broken". + """ + # Write a syntactically broken TOML for a real slot name. + (slot_root / "broken.toml").write_text("name = \nport = not_an_int [oops", encoding="utf-8") + r = isolated_client.get("/api/slots/broken/config") + assert r.status_code == 400, r.text + assert r.json()["error"]["code"] == "slot.config_error" + + def test_unload_after_load_transitions_to_offline( slot_root: Path, systemctl_stub: dict[str, Any], diff --git a/tests/api/test_updater_routes.py b/tests/api/test_updater_routes.py index 3609d9cc..aca15854 100644 --- a/tests/api/test_updater_routes.py +++ b/tests/api/test_updater_routes.py @@ -105,9 +105,13 @@ def test_check_missing_manifest_returns_envelope( def test_apply_creates_a_queued_job_returning_id(isolated_client: TestClient) -> None: - """POST /api/updates/apply returns a job with id + state, runs in background.""" + """POST /api/updates/apply returns a job with id + state, runs in background. + + Status code is 202 Accepted (issue #37) — the route queues background + work and returns immediately, matching /api/models/{id}/pull's shape. + """ r = isolated_client.post("/api/updates/apply", json={}) - assert r.status_code == 200, r.text + assert r.status_code == 202, r.text body = r.json() assert "id" in body and isinstance(body["id"], str) assert body["state"] in ("queued", "running", "failed") diff --git a/tests/api/test_v1_dispatch.py b/tests/api/test_v1_dispatch.py index 07ed9836..88f67e70 100644 --- a/tests/api/test_v1_dispatch.py +++ b/tests/api/test_v1_dispatch.py @@ -61,3 +61,44 @@ def test_v1_routes_are_no_longer_501_stubs(client: TestClient) -> None: assert r.status_code != 501, f"{method} {path}: still a stub" if r.status_code >= 400: assert r.json()["error"]["code"] != "system.not_implemented" + + +# ── issue #34: missing-model on /v1/audio/* → 400, not 404 ───────────────── + + +def test_v1_audio_speech_missing_model_returns_400(client: TestClient) -> None: + """POST /v1/audio/speech without 'model' → 400 validation.invalid. + + Pre-issue-#34 the dispatcher's default-model + no-route fallback + surfaced a confusing 404 ('dispatch.no_route'). The route now raises + BadRequest up front so OpenAI clients see a useful error message + naming the missing field. + """ + r = client.post("/v1/audio/speech", json={"input": "hello", "voice": "alloy"}) + assert r.status_code == 400, r.text + body = r.json() + assert body["error"]["code"] == "validation.invalid" + assert "model" in body["error"]["message"].lower() + + +def test_v1_audio_speech_empty_model_returns_400(client: TestClient) -> None: + """An empty / whitespace-only model field is treated as missing.""" + r = client.post("/v1/audio/speech", json={"model": " ", "input": "hi"}) + assert r.status_code == 400, r.text + assert r.json()["error"]["code"] == "validation.invalid" + + +def test_v1_audio_transcriptions_missing_model_returns_400(client: TestClient) -> None: + """POST /v1/audio/transcriptions without a model form field → 400. + + Multipart variant of the same contract — the regex-extracted model is + empty so the route raises BadRequest before dispatching to a default + that wouldn't route anyway. + """ + # Minimal multipart body with a fake audio file but NO model field. + files = {"file": ("clip.wav", b"RIFFfake", "audio/wav")} + r = client.post("/v1/audio/transcriptions", files=files) + assert r.status_code == 400, r.text + body = r.json() + assert body["error"]["code"] == "validation.invalid" + assert "model" in body["error"]["message"].lower()