From 42812f7927dd7d40e9dd00cbf4ffe5ca6d9abd3f Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Sat, 23 May 2026 19:03:25 -0400
Subject: [PATCH 1/2] fix(slots): POST normalizes Lemonade-shape model +
 auto-assigns port (#275 bugs 1+2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Surfaced 2026-05-23 by the v3 dashboard CRUD sweep. Two compat hops at
the POST /api/slots boundary:

**Bug 1** — body accepts top-level `model: "name"` (Lemonade-shape, per
the slots audit + the v3 dashboard's create-slot modal) but the
serializer at slots.py:191 reads `cfg.get("model").get("default")` (the
nested [model] table that the SlotConfig pydantic model and persistent
TOML loaders both use). Result: `model_default` MISSING from /api/slots
responses for any slot created via POST. Cards rendered with no model
name despite the TOML having the model. Workaround was hand-writing
TOMLs in nested shape.

**Bug 2** — POST never auto-assigns a port. New slots persisted as
`port=0`, dashboard card chips showed port=0 instead of a useable port.

Fix: add `_normalize_create_body()` that runs before `sm.create()`:
- Top-level string `model` → nested `{"default": <string>}`.
- Missing/zero `port` → next free port in 8081-8099 via the new
  `_next_free_slot_port()` helper (walks existing slot TOMLs to find
  the lowest unclaimed port).

Closes parts of #275 (bugs 1+2 of 7).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hal0/api/routes/slots.py | 72 ++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
diff --git a/src/hal0/api/routes/slots.py b/src/hal0/api/routes/slots.py
index 5cb0efb5..a23d3805 100644
--- a/src/hal0/api/routes/slots.py
+++ b/src/hal0/api/routes/slots.py
@@ -324,6 +324,70 @@ async def list_slots(request: Request) -> list[dict[str, object]]:
     return merged
 
 
+def _next_free_slot_port(start: int = 8081, end: int = 8099) -> int:
+    """Return the next free port in the slots range (#275 bug 2).
+
+    Walks ``/etc/hal0/slots/*.toml`` collecting both top-level ``port``
+    and nested ``[server] port`` values. Returns the lowest port in
+    ``[start, end]`` not already claimed. The 8081-8099 range matches
+    PLAN.md §2 ports table.
+    """
+    import tomllib
+
+    from hal0.config.paths import hal0_etc_dir
+
+    used: set[int] = set()
+    slots_dir = hal0_etc_dir() / "slots"
+    if slots_dir.is_dir():
+        for f in slots_dir.glob("*.toml"):
+            try:
+                with f.open("rb") as fh:
+                    cfg = tomllib.load(fh)
+            except (OSError, tomllib.TOMLDecodeError):
+                continue
+            top = cfg.get("port")
+            if isinstance(top, int):
+                used.add(top)
+            srv = cfg.get("server")
+            if isinstance(srv, dict):
+                nested = srv.get("port")
+                if isinstance(nested, int):
+                    used.add(nested)
+    for p in range(start, end + 1):
+        if p not in used:
+            return p
+    raise BadRequest(
+        f"no free port in {start}-{end} (all slots occupied)",
+        code="slot.no_free_port",
+    )
+
+
+def _normalize_create_body(body: dict[str, Any]) -> dict[str, Any]:
+    """Normalize a POST /api/slots body to the canonical nested shape.
+
+    Two compat hops (#275 bugs 1 + 2):
+
+    1. Top-level ``model: "name"`` (Lemonade-shape) → ``model: {"default":
+       "name"}`` (nested [model] table). The serializer at slots.py:191
+       reads ``cfg.get("model").get("default")`` and the SlotConfig
+       pydantic model has a nested ModelConfig — but the audit-
+       recommended Lemonade-shape body POSTs a top-level string. The
+       result was ``model_default`` MISSING from /api/slots responses
+       for any slot created via POST.
+    2. Missing or zero ``port`` → auto-assign via
+       :func:`_next_free_slot_port`. Without this, new slots persist
+       ``port=0`` and the dashboard card shows ``port=0`` instead of a
+       useable port.
+    """
+    out = dict(body)
+    model_val = out.get("model")
+    if isinstance(model_val, str):
+        out["model"] = {"default": model_val}
+    if "port" not in out or not isinstance(out.get("port"), int) or out.get("port") in (0, None):
+        out["port"] = _next_free_slot_port()
+    return out
+
+
 @router.post("", status_code=201)
 async def create_slot(request: Request) -> dict[str, object]:
     """Create a new slot. Body: SlotConfig schema.
@@ -331,6 +395,13 @@ async def create_slot(request: Request) -> dict[str, object]:
     Writes /etc/hal0/slots/<name>.toml, the systemd drop-in override, the
     env file, and the initial state.json. Does NOT start the slot — the
     caller follows with POST /api/slots/<name>/load when ready.
+
+    Accepts both the Lemonade-shape body (top-level ``model: "name"``,
+    ``device: "gpu-vulkan"``, no ``port``) and the legacy nested shape
+    (``[model] default = "name"``, ``[server] port = 8081``). The body
+    is normalized to the nested shape via :func:`_normalize_create_body`
+    before persistence so the serializer + persistent TOML loaders see
+    one canonical shape.
     """
     sm = _get_slot_manager(request)
     try:
@@ -351,6 +422,7 @@ async def create_slot(request: Request) -> dict[str, object]:
             code="slot.name_required",
         )
 
+    body = _normalize_create_body(body)
     snap = await sm.create(name, body)
     return _slot_to_dict(snap, request)
 

From b1b2be70e338ede8a2fab8e03b9469811f3b62d4 Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Sat, 23 May 2026 19:06:49 -0400
Subject: [PATCH 2/2] feat(cli): hal0 slot create gains --type + derives
 Lemonade device (#275 bug 3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Surfaced 2026-05-23 by the v3 dashboard CRUD sweep: the CLI couldn't
create embed/rerank/transcription/tts slots because `hal0 slot create`
had no `--type` flag and used the v0.1 hardware enum ([vulkan|rocm|cpu])
instead of the Lemonade `device` enum ([gpu-vulkan|gpu-rocm|cpu|npu]).
Operators creating non-LLM slots had to bypass the CLI and POST to
/api/slots directly.

Adds:
- `SlotType` enum (`llm | embedding | reranking | transcription | tts |
  image`) — the Lemonade type vocab.
- `--type` / `-t` flag on `hal0 slot create`, defaults to `llm` for
  backward compat with the v0.1 chat-only create path.
- Derives `device` from `--hardware` (vulkan→gpu-vulkan, rocm→gpu-rocm,
  cpu→cpu) so the body POST'd matches the audit-recommended
  Lemonade-shape SlotConfig.

Keeps `--provider` + `--hardware` flags (now documented as legacy v0.1
compat). Pairs with PR #281 which normalizes the POST body server-side.

Closes part of #275 (bug 3 of 7).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hal0/cli/slot_commands.py | 48 +++++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/hal0/cli/slot_commands.py b/src/hal0/cli/slot_commands.py
index 70afb8b1..1e256ee5 100644
--- a/src/hal0/cli/slot_commands.py
+++ b/src/hal0/cli/slot_commands.py
@@ -48,6 +48,24 @@ class SlotProvider(StrEnum):
 SlotBackend = SlotProvider
 
 
+class SlotType(StrEnum):
+    """Lemonade-shape slot type enum (#275 bug 3).
+
+    Maps to the Lemonade vocab from PLAN.md §1 v0.2 slot model:
+    ``llm | embedding | reranking | transcription | tts | image``.
+    The dispatcher routes by ``type`` (per ADR-0008 §6); without this
+    flag the CLI couldn't create embedding/rerank/transcription/tts
+    slots at all.
+    """
+
+    llm = "llm"
+    embedding = "embedding"
+    reranking = "reranking"
+    transcription = "transcription"
+    tts = "tts"
+    image = "image"
+
+
 class SlotHardware(StrEnum):
     """Hardware backends valid for a slot (mirrors SlotConfig.backend).
 
@@ -289,10 +307,24 @@ def slot_logs(
 @app.command("create")
 def slot_create(
     name: str = typer.Argument(..., help="Slot name (e.g. primary, embed, stt)"),
+    type_: SlotType = typer.Option(
+        SlotType.llm,
+        "--type",
+        "-t",
+        help=(
+            "Lemonade slot type: llm | embedding | reranking | transcription | tts | image. "
+            "Determines how the dispatcher routes requests (ADR-0008 §6)."
+        ),
+        case_sensitive=False,
+    ),
     provider: SlotProvider = typer.Option(
         "llama-server",
         "--provider",
-        help="Inference provider (engine) for the slot.",
+        help=(
+            "[Legacy v0.1] Inference provider (engine) for the slot. "
+            "Under Lemonade (v0.2+), provider is determined by --type; "
+            "this flag is preserved for backward-compat with older slot TOMLs."
+        ),
         case_sensitive=False,
     ),
     hardware: SlotHardware | None = typer.Option(
@@ -300,7 +332,8 @@ def slot_create(
         "--hardware",
         help=(
             "Hardware backend: vulkan | rocm | cpu. "
-            "Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe)."
+            "Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe). "
+            "Lemonade-shape `device` is derived: vulkan→gpu-vulkan, rocm→gpu-rocm, cpu→cpu."
         ),
         case_sensitive=False,
     ),
@@ -351,8 +384,19 @@ def slot_create(
             return
 
     hw = hardware.value if hardware is not None else _detect_default_hardware()
+    # Lemonade-shape `device` (gpu-vulkan / gpu-rocm / cpu / npu) derives
+    # from the v0.1 hardware enum: vulkan/rocm → gpu-vulkan/gpu-rocm; cpu
+    # stays cpu; npu has no v0.1 hardware equivalent (set --hardware via
+    # the legacy schema upgrade path).
+    device = {
+        "vulkan": "gpu-vulkan",
+        "rocm": "gpu-rocm",
+        "cpu": "cpu",
+    }.get(hw, hw)
     body: dict[str, Any] = {
         "name": name,
+        "type": type_.value,
+        "device": device,
         "backend": hw,  # SlotConfig.backend = hardware target (vulkan/rocm/cpu/...)
         "provider": str(provider),
         "model": {"default": model, "context_size": ctx_size},