From 42812f7927dd7d40e9dd00cbf4ffe5ca6d9abd3f Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 23 May 2026 19:03:25 -0400 Subject: [PATCH 1/2] fix(slots): POST normalizes Lemonade-shape model + auto-assigns port (#275 bugs 1+2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced 2026-05-23 by the v3 dashboard CRUD sweep. Two compat hops at the POST /api/slots boundary: **Bug 1** — body accepts top-level `model: "name"` (Lemonade-shape, per the slots audit + the v3 dashboard's create-slot modal) but the serializer at slots.py:191 reads `cfg.get("model").get("default")` (the nested [model] table that the SlotConfig pydantic model and persistent TOML loaders both use). Result: `model_default` MISSING from /api/slots responses for any slot created via POST. Cards rendered with no model name despite the TOML having the model. Workaround was hand-writing TOMLs in nested shape. **Bug 2** — POST never auto-assigns a port. New slots persisted as `port=0`, dashboard card chips showed port=0 instead of a useable port. Fix: add `_normalize_create_body()` that runs before `sm.create()`: - Top-level string `model` → nested `{"default": }`. - Missing/zero `port` → next free port in 8081-8099 via the new `_next_free_slot_port()` helper (walks existing slot TOMLs to find the lowest unclaimed port). Closes parts of #275 (bugs 1+2 of 7). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hal0/api/routes/slots.py | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/hal0/api/routes/slots.py b/src/hal0/api/routes/slots.py index 5cb0efb5..a23d3805 100644 --- a/src/hal0/api/routes/slots.py +++ b/src/hal0/api/routes/slots.py @@ -324,6 +324,70 @@ async def list_slots(request: Request) -> list[dict[str, object]]: return merged +def _next_free_slot_port(start: int = 8081, end: int = 8099) -> int: + """Return the next free port in the slots range (#275 bug 2). + + Walks ``/etc/hal0/slots/*.toml`` collecting both top-level ``port`` + and nested ``[server] port`` values. Returns the lowest port in + ``[start, end]`` not already claimed. The 8081-8099 range matches + PLAN.md §2 ports table. + """ + import tomllib + + from hal0.config.paths import hal0_etc_dir + + used: set[int] = set() + slots_dir = hal0_etc_dir() / "slots" + if slots_dir.is_dir(): + for f in slots_dir.glob("*.toml"): + try: + with f.open("rb") as fh: + cfg = tomllib.load(fh) + except (OSError, tomllib.TOMLDecodeError): + continue + top = cfg.get("port") + if isinstance(top, int): + used.add(top) + srv = cfg.get("server") + if isinstance(srv, dict): + nested = srv.get("port") + if isinstance(nested, int): + used.add(nested) + for p in range(start, end + 1): + if p not in used: + return p + raise BadRequest( + f"no free port in {start}-{end} (all slots occupied)", + code="slot.no_free_port", + ) + + +def _normalize_create_body(body: dict[str, Any]) -> dict[str, Any]: + """Normalize a POST /api/slots body to the canonical nested shape. + + Two compat hops (#275 bugs 1 + 2): + + 1. Top-level ``model: "name"`` (Lemonade-shape) → ``model: {"default": + "name"}`` (nested [model] table). The serializer at slots.py:191 + reads ``cfg.get("model").get("default")`` and the SlotConfig + pydantic model has a nested ModelConfig — but the audit- + recommended Lemonade-shape body POSTs a top-level string. The + result was ``model_default`` MISSING from /api/slots responses + for any slot created via POST. + 2. Missing or zero ``port`` → auto-assign via + :func:`_next_free_slot_port`. Without this, new slots persist + ``port=0`` and the dashboard card shows ``port=0`` instead of a + useable port. + """ + out = dict(body) + model_val = out.get("model") + if isinstance(model_val, str): + out["model"] = {"default": model_val} + if "port" not in out or not isinstance(out.get("port"), int) or out.get("port") in (0, None): + out["port"] = _next_free_slot_port() + return out + + @router.post("", status_code=201) async def create_slot(request: Request) -> dict[str, object]: """Create a new slot. Body: SlotConfig schema. @@ -331,6 +395,13 @@ async def create_slot(request: Request) -> dict[str, object]: Writes /etc/hal0/slots/.toml, the systemd drop-in override, the env file, and the initial state.json. Does NOT start the slot — the caller follows with POST /api/slots//load when ready. + + Accepts both the Lemonade-shape body (top-level ``model: "name"``, + ``device: "gpu-vulkan"``, no ``port``) and the legacy nested shape + (``[model] default = "name"``, ``[server] port = 8081``). The body + is normalized to the nested shape via :func:`_normalize_create_body` + before persistence so the serializer + persistent TOML loaders see + one canonical shape. """ sm = _get_slot_manager(request) try: @@ -351,6 +422,7 @@ async def create_slot(request: Request) -> dict[str, object]: code="slot.name_required", ) + body = _normalize_create_body(body) snap = await sm.create(name, body) return _slot_to_dict(snap, request) From b1b2be70e338ede8a2fab8e03b9469811f3b62d4 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 23 May 2026 19:06:49 -0400 Subject: [PATCH 2/2] feat(cli): hal0 slot create gains --type + derives Lemonade device (#275 bug 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced 2026-05-23 by the v3 dashboard CRUD sweep: the CLI couldn't create embed/rerank/transcription/tts slots because `hal0 slot create` had no `--type` flag and used the v0.1 hardware enum ([vulkan|rocm|cpu]) instead of the Lemonade `device` enum ([gpu-vulkan|gpu-rocm|cpu|npu]). Operators creating non-LLM slots had to bypass the CLI and POST to /api/slots directly. Adds: - `SlotType` enum (`llm | embedding | reranking | transcription | tts | image`) — the Lemonade type vocab. - `--type` / `-t` flag on `hal0 slot create`, defaults to `llm` for backward compat with the v0.1 chat-only create path. - Derives `device` from `--hardware` (vulkan→gpu-vulkan, rocm→gpu-rocm, cpu→cpu) so the body POST'd matches the audit-recommended Lemonade-shape SlotConfig. Keeps `--provider` + `--hardware` flags (now documented as legacy v0.1 compat). Pairs with PR #281 which normalizes the POST body server-side. Closes part of #275 (bug 3 of 7). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hal0/cli/slot_commands.py | 48 +++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/src/hal0/cli/slot_commands.py b/src/hal0/cli/slot_commands.py index 70afb8b1..1e256ee5 100644 --- a/src/hal0/cli/slot_commands.py +++ b/src/hal0/cli/slot_commands.py @@ -48,6 +48,24 @@ class SlotProvider(StrEnum): SlotBackend = SlotProvider +class SlotType(StrEnum): + """Lemonade-shape slot type enum (#275 bug 3). + + Maps to the Lemonade vocab from PLAN.md §1 v0.2 slot model: + ``llm | embedding | reranking | transcription | tts | image``. + The dispatcher routes by ``type`` (per ADR-0008 §6); without this + flag the CLI couldn't create embedding/rerank/transcription/tts + slots at all. + """ + + llm = "llm" + embedding = "embedding" + reranking = "reranking" + transcription = "transcription" + tts = "tts" + image = "image" + + class SlotHardware(StrEnum): """Hardware backends valid for a slot (mirrors SlotConfig.backend). @@ -289,10 +307,24 @@ def slot_logs( @app.command("create") def slot_create( name: str = typer.Argument(..., help="Slot name (e.g. primary, embed, stt)"), + type_: SlotType = typer.Option( + SlotType.llm, + "--type", + "-t", + help=( + "Lemonade slot type: llm | embedding | reranking | transcription | tts | image. " + "Determines how the dispatcher routes requests (ADR-0008 §6)." + ), + case_sensitive=False, + ), provider: SlotProvider = typer.Option( "llama-server", "--provider", - help="Inference provider (engine) for the slot.", + help=( + "[Legacy v0.1] Inference provider (engine) for the slot. " + "Under Lemonade (v0.2+), provider is determined by --type; " + "this flag is preserved for backward-compat with older slot TOMLs." + ), case_sensitive=False, ), hardware: SlotHardware | None = typer.Option( @@ -300,7 +332,8 @@ def slot_create( "--hardware", help=( "Hardware backend: vulkan | rocm | cpu. " - "Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe)." + "Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe). " + "Lemonade-shape `device` is derived: vulkan→gpu-vulkan, rocm→gpu-rocm, cpu→cpu." ), case_sensitive=False, ), @@ -351,8 +384,19 @@ def slot_create( return hw = hardware.value if hardware is not None else _detect_default_hardware() + # Lemonade-shape `device` (gpu-vulkan / gpu-rocm / cpu / npu) derives + # from the v0.1 hardware enum: vulkan/rocm → gpu-vulkan/gpu-rocm; cpu + # stays cpu; npu has no v0.1 hardware equivalent (set --hardware via + # the legacy schema upgrade path). + device = { + "vulkan": "gpu-vulkan", + "rocm": "gpu-rocm", + "cpu": "cpu", + }.get(hw, hw) body: dict[str, Any] = { "name": name, + "type": type_.value, + "device": device, "backend": hw, # SlotConfig.backend = hardware target (vulkan/rocm/cpu/...) "provider": str(provider), "model": {"default": model, "context_size": ctx_size},