Hal0ai · thinmintdev · May 23, 2026 · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/src/hal0/api/routes/slots.py b/src/hal0/api/routes/slots.py
@@ -324,13 +324,84 @@ async def list_slots(request: Request) -> list[dict[str, object]]:
     return merged
 
 
+def _next_free_slot_port(start: int = 8081, end: int = 8099) -> int:
+    """Return the next free port in the slots range (#275 bug 2).
+
+    Walks ``/etc/hal0/slots/*.toml`` collecting both top-level ``port``
+    and nested ``[server] port`` values. Returns the lowest port in
+    ``[start, end]`` not already claimed. The 8081-8099 range matches
+    PLAN.md §2 ports table.
+    """
+    import tomllib
+
+    from hal0.config.paths import hal0_etc_dir
+
+    used: set[int] = set()
+    slots_dir = hal0_etc_dir() / "slots"
+    if slots_dir.is_dir():
+        for f in slots_dir.glob("*.toml"):
+            try:
+                with f.open("rb") as fh:
+                    cfg = tomllib.load(fh)
+            except (OSError, tomllib.TOMLDecodeError):
+                continue
+            top = cfg.get("port")
+            if isinstance(top, int):
+                used.add(top)
+            srv = cfg.get("server")
+            if isinstance(srv, dict):
+                nested = srv.get("port")
+                if isinstance(nested, int):
+                    used.add(nested)
+    for p in range(start, end + 1):
+        if p not in used:
+            return p
+    raise BadRequest(
+        f"no free port in {start}-{end} (all slots occupied)",
+        code="slot.no_free_port",
+    )
+
+
+def _normalize_create_body(body: dict[str, Any]) -> dict[str, Any]:
+    """Normalize a POST /api/slots body to the canonical nested shape.
+
+    Two compat hops (#275 bugs 1 + 2):
+
+    1. Top-level ``model: "name"`` (Lemonade-shape) → ``model: {"default":
+       "name"}`` (nested [model] table). The serializer at slots.py:191
+       reads ``cfg.get("model").get("default")`` and the SlotConfig
+       pydantic model has a nested ModelConfig — but the audit-
+       recommended Lemonade-shape body POSTs a top-level string. The
+       result was ``model_default`` MISSING from /api/slots responses
+       for any slot created via POST.
+    2. Missing or zero ``port`` → auto-assign via
+       :func:`_next_free_slot_port`. Without this, new slots persist
+       ``port=0`` and the dashboard card shows ``port=0`` instead of a
+       useable port.
+    """
+    out = dict(body)
+    model_val = out.get("model")
+    if isinstance(model_val, str):
+        out["model"] = {"default": model_val}
+    if "port" not in out or not isinstance(out.get("port"), int) or out.get("port") in (0, None):
+        out["port"] = _next_free_slot_port()
+    return out
+
+
 @router.post("", status_code=201)
 async def create_slot(request: Request) -> dict[str, object]:
     """Create a new slot. Body: SlotConfig schema.
 
     Writes /etc/hal0/slots/<name>.toml, the systemd drop-in override, the
     env file, and the initial state.json. Does NOT start the slot — the
     caller follows with POST /api/slots/<name>/load when ready.
+
+    Accepts both the Lemonade-shape body (top-level ``model: "name"``,
+    ``device: "gpu-vulkan"``, no ``port``) and the legacy nested shape
+    (``[model] default = "name"``, ``[server] port = 8081``). The body
+    is normalized to the nested shape via :func:`_normalize_create_body`
+    before persistence so the serializer + persistent TOML loaders see
+    one canonical shape.
     """
     sm = _get_slot_manager(request)
     try:
@@ -351,6 +422,7 @@ async def create_slot(request: Request) -> dict[str, object]:
             code="slot.name_required",
         )
 
+    body = _normalize_create_body(body)
     snap = await sm.create(name, body)
     return _slot_to_dict(snap, request)
 

diff --git a/src/hal0/cli/slot_commands.py b/src/hal0/cli/slot_commands.py
@@ -48,6 +48,24 @@ class SlotProvider(StrEnum):
 SlotBackend = SlotProvider
 
 
+class SlotType(StrEnum):
+    """Lemonade-shape slot type enum (#275 bug 3).
+
+    Maps to the Lemonade vocab from PLAN.md §1 v0.2 slot model:
+    ``llm | embedding | reranking | transcription | tts | image``.
+    The dispatcher routes by ``type`` (per ADR-0008 §6); without this
+    flag the CLI couldn't create embedding/rerank/transcription/tts
+    slots at all.
+    """
+
+    llm = "llm"
+    embedding = "embedding"
+    reranking = "reranking"
+    transcription = "transcription"
+    tts = "tts"
+    image = "image"
+
+
 class SlotHardware(StrEnum):
     """Hardware backends valid for a slot (mirrors SlotConfig.backend).
 
@@ -289,18 +307,33 @@ def slot_logs(
 @app.command("create")
 def slot_create(
     name: str = typer.Argument(..., help="Slot name (e.g. primary, embed, stt)"),
+    type_: SlotType = typer.Option(
+        SlotType.llm,
+        "--type",
+        "-t",
+        help=(
+            "Lemonade slot type: llm | embedding | reranking | transcription | tts | image. "
+            "Determines how the dispatcher routes requests (ADR-0008 §6)."
+        ),
+        case_sensitive=False,
+    ),
     provider: SlotProvider = typer.Option(
         "llama-server",
         "--provider",
-        help="Inference provider (engine) for the slot.",
+        help=(
+            "[Legacy v0.1] Inference provider (engine) for the slot. "
+            "Under Lemonade (v0.2+), provider is determined by --type; "
+            "this flag is preserved for backward-compat with older slot TOMLs."
+        ),
         case_sensitive=False,
     ),
     hardware: SlotHardware | None = typer.Option(
         None,
         "--hardware",
         help=(
             "Hardware backend: vulkan | rocm | cpu. "
-            "Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe)."
+            "Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe). "
+            "Lemonade-shape `device` is derived: vulkan→gpu-vulkan, rocm→gpu-rocm, cpu→cpu."
         ),
         case_sensitive=False,
     ),
@@ -351,8 +384,19 @@ def slot_create(
             return
 
     hw = hardware.value if hardware is not None else _detect_default_hardware()
+    # Lemonade-shape `device` (gpu-vulkan / gpu-rocm / cpu / npu) derives
+    # from the v0.1 hardware enum: vulkan/rocm → gpu-vulkan/gpu-rocm; cpu
+    # stays cpu; npu has no v0.1 hardware equivalent (set --hardware via
+    # the legacy schema upgrade path).
+    device = {
+        "vulkan": "gpu-vulkan",
+        "rocm": "gpu-rocm",
+        "cpu": "cpu",
+    }.get(hw, hw)
     body: dict[str, Any] = {
         "name": name,
+        "type": type_.value,
+        "device": device,
         "backend": hw,  # SlotConfig.backend = hardware target (vulkan/rocm/cpu/...)
         "provider": str(provider),
         "model": {"default": model, "context_size": ctx_size},