From 0e9967719b6040aea86a9c9ce59c89857353cb4a Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Wed, 27 May 2026 13:08:45 -0400
Subject: [PATCH 1/3] fix(dash): slot swap popover + create-slot modal read
 live /api/models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The InlineSwapPopover and CreateSlotModal were both reading from
HAL0_DATA.models (the dashboard's static seed in ui/src/dash/data.jsx),
which carries fictional ids like `qwen3.6-27b-mtp` and `qwen3-coder-30b`.
Clicking one tunneled into POST /api/slots/{name}/swap and the slot
orchestrator (correctly) bounced the bad id against the real registry:

  code=model.not_found message="model 'qwen3.6-27b-mtp' is not in
    the registry (slot 'hermes-agent' not touched)"

The comment at slot-modals.jsx:54 acknowledged the gap — "Model
catalogue still lives in HAL0_DATA — replaced when the models hook
ships (parallel teammate)" — but the wire-up never landed.

Changes
- Add normalizeApiModel() that maps the /api/models shape
  (capabilities, backends, size_bytes, name, hf_repo) onto the legacy
  HAL0_DATA shape (type, device, size, longName, repo) that the JSX
  expected. Done in JSX rather than at the API layer so models.jsx
  (Models view) still gets the unchanged response.
- CreateSlotModal + InlineSwapPopover now consume useModels() and
  useHardware() instead of HAL0_DATA.{models,host}.
- InlineSwapPopover: move `if (!open) return null` AFTER the hook calls
  to keep rules-of-hooks legal (useQuery's caching makes the cost ~zero
  when closed).
- CreateSlotModal device filter switched from single-string match
  (m.device === "rocm") to backend-list membership (m.backends.includes
  ("rocm")) since /api/models advertises multiple backends per row.

Verified live on LXC 105: qwen3.6-27b-q5kxl swaps cleanly into
hermes-agent (idle, lemonade backend) — was the spike repro for #345.

Closes #345.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ui/src/dash/slot-modals.jsx | 68 +++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 7 deletions(-)

diff --git a/ui/src/dash/slot-modals.jsx b/ui/src/dash/slot-modals.jsx
index f95f1d8b..e8529151 100644
--- a/ui/src/dash/slot-modals.jsx
+++ b/ui/src/dash/slot-modals.jsx
@@ -11,10 +11,49 @@ import {
   useSlotDelete,
 } from '@/api/hooks/useSlots'
 import { useHardware } from '@/api/hooks/useHardware'
+import { useModels } from '@/api/hooks/useModels'
 import { ENDPOINTS } from '@/api/endpoints'
 
 const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React;
 
+// Map /api/models registry rows → the shape this file's swap popover and
+// create-slot modal grew up around (HAL0_DATA seed). Done in JSX rather
+// than at the API layer so the response stays identical to what the
+// Models view (models.jsx) already consumes. NEVER ship HAL0_DATA model
+// ids to the backend — they're fictional (`qwen3.6-27b-mtp` etc.) and
+// the slot orchestrator correctly rejects them against the real registry.
+function normalizeApiModel(m) {
+  const caps = Array.isArray(m.capabilities) ? m.capabilities : [];
+  const type =
+    caps.includes('chat') || caps.includes('coding') ? 'llm'
+    : caps.includes('rerank') ? 'reranking'
+    : caps.includes('embed') || caps.includes('embeddings') ? 'embedding'
+    : caps.includes('transcription') || caps.includes('asr') ? 'transcription'
+    : caps.includes('tts') ? 'tts'
+    : caps.includes('image') ? 'image'
+    : '';
+  const backends = Array.isArray(m.backends) ? m.backends : [];
+  const device =
+    backends.includes('rocm') ? 'rocm'
+    : backends.includes('vulkan') ? 'vulkan'
+    : backends.includes('cpu') ? 'cpu'
+    : backends[0] || '';
+  const b = m.size_bytes || 0;
+  const size = !b
+    ? '—'
+    : b < 1024 ** 2 ? `${(b / 1024).toFixed(1)} KB`
+    : b < 1024 ** 3 ? `${(b / 1024 ** 2).toFixed(1)} MB`
+    : `${(b / 1024 ** 3).toFixed(2)} GB`;
+  return {
+    ...m,
+    type,
+    device,
+    longName: m.name || m.id,
+    size,
+    repo: m.hf_repo || m.path || '',
+  };
+}
+
 // ─── Create-slot modal ──────────────────────────────────────────
 function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
   const [name, setName] = useStateSM(defaults.name || "");
@@ -30,6 +69,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
 
   const createMut = useSlotCreate();
   const hwQuery = useHardware();
+  const modelsQuery = useModels();
 
   useEffectSM(() => {
     if (open) {
@@ -51,11 +91,17 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
   const nameInvalid = name && !/^[a-z][a-z0-9-]{0,30}$/.test(name);
   const nameError = nameCollision ? "name already in use" : nameInvalid ? "lowercase + dashes only" : null;
 
-  // Model catalogue still lives in HAL0_DATA — replaced when the models
-  // hook ships (parallel teammate). NPU availability is now live.
-  const compatible = HAL0_DATA.models.filter(m =>
+  // Live catalogue from /api/models (normalized to the legacy HAL0_DATA
+  // shape so the existing filter + render code keeps working). Sending a
+  // mock id like `qwen3.6-27b-mtp` here would tunnel into POST
+  // /api/slots/{name}/swap and the slot orchestrator would reject it
+  // against the real registry (slot.not_found).
+  const allModels = (modelsQuery.data ?? []).map(normalizeApiModel);
+  const compatible = allModels.filter(m =>
     m.type === type &&
-    (device === "cpu" || m.device === (device || "cpu").replace("gpu-", "") || (device === "npu" && m.device === "npu"))
+    (device === "cpu"
+      || (Array.isArray(m.backends) && m.backends.includes((device || "cpu").replace("gpu-", "")))
+      || (device === "npu" && m.device === "npu"))
   );
 
   const npuAvailable = !!hwQuery.data?.npu?.present;
@@ -182,7 +228,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
             ))}
           </select>
           {model && compatible.find(m => m.id === model) && (
-            <div className="ok">✓ fits in available memory ({HAL0_DATA.host.ram.free} GB free)</div>
+            <div className="ok">✓ fits in available memory ({hwQuery.data?.ram?.free ?? "?"} GB free)</div>
           )}
         </div>
       </div>
@@ -517,14 +563,22 @@ function ReadOnlyStrip({ k, v }) {
 
 // ─── Inline swap popover ────────────────────────────────────────
 function InlineSwapPopover({ slot, open, onClose, onPick }) {
+  // Hooks first — React rules-of-hooks forbid an early return before
+  // them. The popover is mounted unconditionally and toggles via `open`;
+  // useQuery's own caching means useModels() costs ~nothing when closed.
+  const modelsQuery = useModels();
+  const hwQuery = useHardware();
   if (!open) return null;
-  const compatible = HAL0_DATA.models.filter(m => m.type === slot.type);
+  const ramFreeGb = hwQuery.data?.ram?.free ?? 0;
+  const compatible = (modelsQuery.data ?? [])
+    .map(normalizeApiModel)
+    .filter(m => m.type === slot.type);
   return (
     <div className="swap-pop" onClick={e => e.stopPropagation()}>
       <div className="swap-pop-h">Swap model · type {slot.type}</div>
       {compatible.map(m => {
         const isCur = slot.model_id === m.id;
-        const fits = HAL0_DATA.host.ram.free > parseSizeGB(m.size);
+        const fits = ramFreeGb > parseSizeGB(m.size);
         return (
           // The whole row is a mouse-click target (convenience) but the
           // nested chevron button is the single keyboard/AT-accessible

From f9736422bfd5a7adf9754ac52b392c0d4e15475e Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Wed, 27 May 2026 13:12:07 -0400
Subject: [PATCH 2/3] =?UTF-8?q?test(=CE=B3):=20seed=20/api/models=20fixtur?=
 =?UTF-8?q?e=20for=20swap-popover=20suite?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The popover now reads from useModels(); without a fixture route the
apiMock catch-all returned {} → useModels()=[] → 0 .swap-pop-items →
slots-wireup-v3.spec.ts:135 toBeVisible() timed out.

Adds 4 representative MOCK_DATA.models rows (chat, coding, embed,
rerank) keyed by capabilities/backends so the JSX normalizer derives
matching slot.type values, and a /api/models route on
installDefaultMocks that fulfils them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ui/tests/e2e/fixtures/apiMock.ts   |  4 +++
 ui/tests/e2e/fixtures/mock-data.ts | 51 ++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/ui/tests/e2e/fixtures/apiMock.ts b/ui/tests/e2e/fixtures/apiMock.ts
index a473d6ea..e2750c36 100644
--- a/ui/tests/e2e/fixtures/apiMock.ts
+++ b/ui/tests/e2e/fixtures/apiMock.ts
@@ -25,6 +25,7 @@ export type MockState = {
   host: typeof MOCK_DATA.host
   lemond: typeof MOCK_DATA.lemond
   slots: typeof MOCK_DATA.slots
+  models: typeof MOCK_DATA.models
   backends: typeof MOCK_DATA.backends
   approvals: any[]
 }
@@ -66,6 +67,9 @@ export async function installDefaultMocks(page: Page, state: MockState) {
     }),
   )
   await page.route('**/api/hardware', (route) => json(route, state.host))
+  await page.route('**/api/models', (route) =>
+    json(route, { models: state.models, count: state.models.length }),
+  )
   await page.route('**/api/slots', (route) => json(route, { slots: state.slots }))
   await page.route('**/api/slots/metrics', (route) => json(route, {}))
   await page.route('**/api/backends', (route) => json(route, { backends: state.backends }))
diff --git a/ui/tests/e2e/fixtures/mock-data.ts b/ui/tests/e2e/fixtures/mock-data.ts
index 1f034005..33ab7a10 100644
--- a/ui/tests/e2e/fixtures/mock-data.ts
+++ b/ui/tests/e2e/fixtures/mock-data.ts
@@ -62,6 +62,57 @@ export const MOCK_DATA = {
     },
   ],
 
+  /** Subset of /api/models rows the swap popover + create-slot modal
+   *  now consume (PR fix(dash): slot swap popover reads live /api/models).
+   *  Shape matches the registry serializer: capabilities + backends drive
+   *  the JSX-side normalizer's type + device derivation. */
+  models: [
+    {
+      id: 'qwen3.6-27b-q5kxl',
+      name: 'Qwen3.6-27B UD-Q5_K_XL',
+      path: '/mnt/ai-models/qwen3.6-27b-q5kxl/Qwen3.6-27B-UD-Q5_K_XL.gguf',
+      size_bytes: 19_000_000_000,
+      capabilities: ['chat'],
+      backends: ['vulkan', 'rocm', 'cpu'],
+      hf_repo: 'unsloth/Qwen3.6-27B-GGUF',
+      installed: true,
+      ns: 'pulled',
+    },
+    {
+      id: 'qwen3-coder-next-q4kxl',
+      name: 'Qwen3-Coder-Next UD-Q4_K_XL',
+      path: '/mnt/ai-models/qwen3-coder-next-q4kxl/Qwen3-Coder-Next-UD-Q4_K_XL.gguf',
+      size_bytes: 19_000_000_000,
+      capabilities: ['chat', 'coding'],
+      backends: ['vulkan', 'rocm', 'cpu'],
+      hf_repo: 'unsloth/Qwen3-Coder-Next-GGUF',
+      installed: true,
+      ns: 'pulled',
+    },
+    {
+      id: 'nomic-embed-text-v1.5-q8',
+      name: 'nomic-embed-text-v1.5 Q8',
+      path: '/mnt/ai-models/nomic-embed-text-v1.5-q8/nomic-embed-text-v1.5.Q8_0.gguf',
+      size_bytes: 350_000_000,
+      capabilities: ['embed'],
+      backends: ['vulkan', 'rocm', 'cpu'],
+      hf_repo: 'nomic-ai/nomic-embed-text-v1.5-GGUF',
+      installed: true,
+      ns: 'blessed',
+    },
+    {
+      id: 'bge-reranker-v2-m3-q4_k_m',
+      name: 'BGE Reranker v2 M3 (Q4_K_M)',
+      path: '/mnt/ai-models/local/bge-reranker-v2-m3-q4_k_m/bge-reranker-v2-m3-Q4_K_M.gguf',
+      size_bytes: 438_376_864,
+      capabilities: ['rerank'],
+      backends: ['vulkan', 'rocm', 'cpu'],
+      hf_repo: 'gpustack/bge-reranker-v2-m3-GGUF',
+      installed: true,
+      ns: 'pulled',
+    },
+  ],
+
   backends: [
     { id: 'llamacpp:rocm', name: 'llamacpp:rocm', ver: 'v1.0 (b9253)', state: 'installed', recommended: true },
     { id: 'llamacpp:vulkan', name: 'llamacpp:vulkan', ver: 'v1.0 (b9253)', state: 'installed' },

From fafafd5f7588836bf934f9e7fd197f2d23f98e38 Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Wed, 27 May 2026 13:24:08 -0400
Subject: [PATCH 3/3] fix(dash): normalizer handles HAL0_DATA shape
 (labels/type) too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the Vite dev server's /api proxy target is down (or in CI before
the page.route handlers attach), src/api/mock.ts falls back to
HAL0_DATA.models — which use the legacy seed shape (labels + type +
device + size as strings) rather than the registry shape (capabilities
+ backends + size_bytes). The first version of normalizeApiModel only
read capabilities/backends/size_bytes, so every row got type='' and
the popover filter rejected all 14 entries → empty .swap-pop list.

Tolerate both shapes: prefer existing m.type/m.device/m.size if the
row already carries them, else derive from capabilities/backends/
size_bytes. Same for longName + repo.

Verified slots-wireup-v3 'Swap model — inline popover' passes locally
(1.3s, was failing at 5s timeout in CI).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ui/src/dash/slot-modals.jsx | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/ui/src/dash/slot-modals.jsx b/ui/src/dash/slot-modals.jsx
index e8529151..b9ae5d88 100644
--- a/ui/src/dash/slot-modals.jsx
+++ b/ui/src/dash/slot-modals.jsx
@@ -23,34 +23,46 @@ const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React
 // ids to the backend — they're fictional (`qwen3.6-27b-mtp` etc.) and
 // the slot orchestrator correctly rejects them against the real registry.
 function normalizeApiModel(m) {
-  const caps = Array.isArray(m.capabilities) ? m.capabilities : [];
-  const type =
-    caps.includes('chat') || caps.includes('coding') ? 'llm'
-    : caps.includes('rerank') ? 'reranking'
-    : caps.includes('embed') || caps.includes('embeddings') ? 'embedding'
-    : caps.includes('transcription') || caps.includes('asr') ? 'transcription'
-    : caps.includes('tts') ? 'tts'
-    : caps.includes('image') ? 'image'
+  // Accept both shapes: the registry/API shape (capabilities + backends +
+  // size_bytes + name + hf_repo) and the legacy HAL0_DATA seed shape
+  // (labels + device + size + longName + repo + type). Local dev without
+  // a backend falls back via src/api/mock.ts to HAL0_DATA.models, and the
+  // γ-suite hits that fallback when fetch fails before page.route catches
+  // (race + connection-refused on the Vite proxy target). Tolerating both
+  // shapes keeps the popover non-empty in every mock path.
+  const sourceCaps = Array.isArray(m.capabilities)
+    ? m.capabilities
+    : Array.isArray(m.labels) ? m.labels : [];
+  const derivedType =
+    sourceCaps.includes('chat') || sourceCaps.includes('coding') ? 'llm'
+    : sourceCaps.includes('rerank') || sourceCaps.includes('reranking') ? 'reranking'
+    : sourceCaps.includes('embed') || sourceCaps.includes('embeddings') ? 'embedding'
+    : sourceCaps.includes('transcription') || sourceCaps.includes('asr') ? 'transcription'
+    : sourceCaps.includes('tts') ? 'tts'
+    : sourceCaps.includes('image') ? 'image'
     : '';
+  const type = typeof m.type === 'string' && m.type ? m.type : derivedType;
   const backends = Array.isArray(m.backends) ? m.backends : [];
-  const device =
+  const derivedDevice =
     backends.includes('rocm') ? 'rocm'
     : backends.includes('vulkan') ? 'vulkan'
     : backends.includes('cpu') ? 'cpu'
     : backends[0] || '';
+  const device = typeof m.device === 'string' && m.device ? m.device : derivedDevice;
   const b = m.size_bytes || 0;
-  const size = !b
+  const derivedSize = !b
     ? '—'
     : b < 1024 ** 2 ? `${(b / 1024).toFixed(1)} KB`
     : b < 1024 ** 3 ? `${(b / 1024 ** 2).toFixed(1)} MB`
     : `${(b / 1024 ** 3).toFixed(2)} GB`;
+  const size = typeof m.size === 'string' && m.size ? m.size : derivedSize;
   return {
     ...m,
     type,
     device,
-    longName: m.name || m.id,
+    longName: m.longName || m.name || m.id,
     size,
-    repo: m.hf_repo || m.path || '',
+    repo: m.repo || m.hf_repo || m.path || '',
   };
 }