Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions src/hal0/api/routes/slots.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,84 @@ async def list_slots(request: Request) -> list[dict[str, object]]:
return merged


def _next_free_slot_port(start: int = 8081, end: int = 8099) -> int:
"""Return the next free port in the slots range (#275 bug 2).

Walks ``/etc/hal0/slots/*.toml`` collecting both top-level ``port``
and nested ``[server] port`` values. Returns the lowest port in
``[start, end]`` not already claimed. The 8081-8099 range matches
PLAN.md §2 ports table.
"""
import tomllib

from hal0.config.paths import hal0_etc_dir

used: set[int] = set()
slots_dir = hal0_etc_dir() / "slots"
if slots_dir.is_dir():
for f in slots_dir.glob("*.toml"):
try:
with f.open("rb") as fh:
cfg = tomllib.load(fh)
except (OSError, tomllib.TOMLDecodeError):
continue
top = cfg.get("port")
if isinstance(top, int):
used.add(top)
srv = cfg.get("server")
if isinstance(srv, dict):
nested = srv.get("port")
if isinstance(nested, int):
used.add(nested)
for p in range(start, end + 1):
if p not in used:
return p
raise BadRequest(
f"no free port in {start}-{end} (all slots occupied)",
code="slot.no_free_port",
)


def _normalize_create_body(body: dict[str, Any]) -> dict[str, Any]:
"""Normalize a POST /api/slots body to the canonical nested shape.

Two compat hops (#275 bugs 1 + 2):

1. Top-level ``model: "name"`` (Lemonade-shape) → ``model: {"default":
"name"}`` (nested [model] table). The serializer at slots.py:191
reads ``cfg.get("model").get("default")`` and the SlotConfig
pydantic model has a nested ModelConfig — but the audit-
recommended Lemonade-shape body POSTs a top-level string. The
result was ``model_default`` MISSING from /api/slots responses
for any slot created via POST.
2. Missing or zero ``port`` → auto-assign via
:func:`_next_free_slot_port`. Without this, new slots persist
``port=0`` and the dashboard card shows ``port=0`` instead of a
useable port.
"""
out = dict(body)
model_val = out.get("model")
if isinstance(model_val, str):
out["model"] = {"default": model_val}
if "port" not in out or not isinstance(out.get("port"), int) or out.get("port") in (0, None):
out["port"] = _next_free_slot_port()
return out


@router.post("", status_code=201)
async def create_slot(request: Request) -> dict[str, object]:
"""Create a new slot. Body: SlotConfig schema.

Writes /etc/hal0/slots/<name>.toml, the systemd drop-in override, the
env file, and the initial state.json. Does NOT start the slot — the
caller follows with POST /api/slots/<name>/load when ready.

Accepts both the Lemonade-shape body (top-level ``model: "name"``,
``device: "gpu-vulkan"``, no ``port``) and the legacy nested shape
(``[model] default = "name"``, ``[server] port = 8081``). The body
is normalized to the nested shape via :func:`_normalize_create_body`
before persistence so the serializer + persistent TOML loaders see
one canonical shape.
"""
sm = _get_slot_manager(request)
try:
Expand All @@ -351,6 +422,7 @@ async def create_slot(request: Request) -> dict[str, object]:
code="slot.name_required",
)

body = _normalize_create_body(body)
snap = await sm.create(name, body)
return _slot_to_dict(snap, request)

Expand Down
48 changes: 46 additions & 2 deletions src/hal0/cli/slot_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,24 @@ class SlotProvider(StrEnum):
SlotBackend = SlotProvider


class SlotType(StrEnum):
"""Lemonade-shape slot type enum (#275 bug 3).

Maps to the Lemonade vocab from PLAN.md §1 v0.2 slot model:
``llm | embedding | reranking | transcription | tts | image``.
The dispatcher routes by ``type`` (per ADR-0008 §6); without this
flag the CLI couldn't create embedding/rerank/transcription/tts
slots at all.
"""

llm = "llm"
embedding = "embedding"
reranking = "reranking"
transcription = "transcription"
tts = "tts"
image = "image"


class SlotHardware(StrEnum):
"""Hardware backends valid for a slot (mirrors SlotConfig.backend).

Expand Down Expand Up @@ -289,18 +307,33 @@ def slot_logs(
@app.command("create")
def slot_create(
name: str = typer.Argument(..., help="Slot name (e.g. primary, embed, stt)"),
type_: SlotType = typer.Option(
SlotType.llm,
"--type",
"-t",
help=(
"Lemonade slot type: llm | embedding | reranking | transcription | tts | image. "
"Determines how the dispatcher routes requests (ADR-0008 §6)."
),
case_sensitive=False,
),
provider: SlotProvider = typer.Option(
"llama-server",
"--provider",
help="Inference provider (engine) for the slot.",
help=(
"[Legacy v0.1] Inference provider (engine) for the slot. "
"Under Lemonade (v0.2+), provider is determined by --type; "
"this flag is preserved for backward-compat with older slot TOMLs."
),
case_sensitive=False,
),
hardware: SlotHardware | None = typer.Option(
None,
"--hardware",
help=(
"Hardware backend: vulkan | rocm | cpu. "
"Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe)."
"Default: auto-detected from /etc/hal0/hardware.json (vulkan if no probe). "
"Lemonade-shape `device` is derived: vulkan→gpu-vulkan, rocm→gpu-rocm, cpu→cpu."
),
case_sensitive=False,
),
Expand Down Expand Up @@ -351,8 +384,19 @@ def slot_create(
return

hw = hardware.value if hardware is not None else _detect_default_hardware()
# Lemonade-shape `device` (gpu-vulkan / gpu-rocm / cpu / npu) derives
# from the v0.1 hardware enum: vulkan/rocm → gpu-vulkan/gpu-rocm; cpu
# stays cpu; npu has no v0.1 hardware equivalent (set --hardware via
# the legacy schema upgrade path).
device = {
"vulkan": "gpu-vulkan",
"rocm": "gpu-rocm",
"cpu": "cpu",
}.get(hw, hw)
body: dict[str, Any] = {
"name": name,
"type": type_.value,
"device": device,
"backend": hw, # SlotConfig.backend = hardware target (vulkan/rocm/cpu/...)
"provider": str(provider),
"model": {"default": model, "context_size": ctx_size},
Expand Down
Loading