From 0e9967719b6040aea86a9c9ce59c89857353cb4a Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 27 May 2026 13:08:45 -0400 Subject: [PATCH 1/3] fix(dash): slot swap popover + create-slot modal read live /api/models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The InlineSwapPopover and CreateSlotModal were both reading from HAL0_DATA.models (the dashboard's static seed in ui/src/dash/data.jsx), which carries fictional ids like `qwen3.6-27b-mtp` and `qwen3-coder-30b`. Clicking one tunneled into POST /api/slots/{name}/swap and the slot orchestrator (correctly) bounced the bad id against the real registry: code=model.not_found message="model 'qwen3.6-27b-mtp' is not in the registry (slot 'hermes-agent' not touched)" The comment at slot-modals.jsx:54 acknowledged the gap — "Model catalogue still lives in HAL0_DATA — replaced when the models hook ships (parallel teammate)" — but the wire-up never landed. Changes - Add normalizeApiModel() that maps the /api/models shape (capabilities, backends, size_bytes, name, hf_repo) onto the legacy HAL0_DATA shape (type, device, size, longName, repo) that the JSX expected. Done in JSX rather than at the API layer so models.jsx (Models view) still gets the unchanged response. - CreateSlotModal + InlineSwapPopover now consume useModels() and useHardware() instead of HAL0_DATA.{models,host}. - InlineSwapPopover: move `if (!open) return null` AFTER the hook calls to keep rules-of-hooks legal (useQuery's caching makes the cost ~zero when closed). - CreateSlotModal device filter switched from single-string match (m.device === "rocm") to backend-list membership (m.backends.includes ("rocm")) since /api/models advertises multiple backends per row. Verified live on LXC 105: qwen3.6-27b-q5kxl swaps cleanly into hermes-agent (idle, lemonade backend) — was the spike repro for #345. Closes #345. Co-Authored-By: Claude Opus 4.7 (1M context) --- ui/src/dash/slot-modals.jsx | 68 +++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/ui/src/dash/slot-modals.jsx b/ui/src/dash/slot-modals.jsx index f95f1d8b..e8529151 100644 --- a/ui/src/dash/slot-modals.jsx +++ b/ui/src/dash/slot-modals.jsx @@ -11,10 +11,49 @@ import { useSlotDelete, } from '@/api/hooks/useSlots' import { useHardware } from '@/api/hooks/useHardware' +import { useModels } from '@/api/hooks/useModels' import { ENDPOINTS } from '@/api/endpoints' const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React; +// Map /api/models registry rows → the shape this file's swap popover and +// create-slot modal grew up around (HAL0_DATA seed). Done in JSX rather +// than at the API layer so the response stays identical to what the +// Models view (models.jsx) already consumes. NEVER ship HAL0_DATA model +// ids to the backend — they're fictional (`qwen3.6-27b-mtp` etc.) and +// the slot orchestrator correctly rejects them against the real registry. +function normalizeApiModel(m) { + const caps = Array.isArray(m.capabilities) ? m.capabilities : []; + const type = + caps.includes('chat') || caps.includes('coding') ? 'llm' + : caps.includes('rerank') ? 'reranking' + : caps.includes('embed') || caps.includes('embeddings') ? 'embedding' + : caps.includes('transcription') || caps.includes('asr') ? 'transcription' + : caps.includes('tts') ? 'tts' + : caps.includes('image') ? 'image' + : ''; + const backends = Array.isArray(m.backends) ? m.backends : []; + const device = + backends.includes('rocm') ? 'rocm' + : backends.includes('vulkan') ? 'vulkan' + : backends.includes('cpu') ? 'cpu' + : backends[0] || ''; + const b = m.size_bytes || 0; + const size = !b + ? '—' + : b < 1024 ** 2 ? `${(b / 1024).toFixed(1)} KB` + : b < 1024 ** 3 ? `${(b / 1024 ** 2).toFixed(1)} MB` + : `${(b / 1024 ** 3).toFixed(2)} GB`; + return { + ...m, + type, + device, + longName: m.name || m.id, + size, + repo: m.hf_repo || m.path || '', + }; +} + // ─── Create-slot modal ────────────────────────────────────────── function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { const [name, setName] = useStateSM(defaults.name || ""); @@ -30,6 +69,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { const createMut = useSlotCreate(); const hwQuery = useHardware(); + const modelsQuery = useModels(); useEffectSM(() => { if (open) { @@ -51,11 +91,17 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { const nameInvalid = name && !/^[a-z][a-z0-9-]{0,30}$/.test(name); const nameError = nameCollision ? "name already in use" : nameInvalid ? "lowercase + dashes only" : null; - // Model catalogue still lives in HAL0_DATA — replaced when the models - // hook ships (parallel teammate). NPU availability is now live. - const compatible = HAL0_DATA.models.filter(m => + // Live catalogue from /api/models (normalized to the legacy HAL0_DATA + // shape so the existing filter + render code keeps working). Sending a + // mock id like `qwen3.6-27b-mtp` here would tunnel into POST + // /api/slots/{name}/swap and the slot orchestrator would reject it + // against the real registry (slot.not_found). + const allModels = (modelsQuery.data ?? []).map(normalizeApiModel); + const compatible = allModels.filter(m => m.type === type && - (device === "cpu" || m.device === (device || "cpu").replace("gpu-", "") || (device === "npu" && m.device === "npu")) + (device === "cpu" + || (Array.isArray(m.backends) && m.backends.includes((device || "cpu").replace("gpu-", ""))) + || (device === "npu" && m.device === "npu")) ); const npuAvailable = !!hwQuery.data?.npu?.present; @@ -182,7 +228,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { ))} {model && compatible.find(m => m.id === model) && ( -
✓ fits in available memory ({HAL0_DATA.host.ram.free} GB free)
+
✓ fits in available memory ({hwQuery.data?.ram?.free ?? "?"} GB free)
)} @@ -517,14 +563,22 @@ function ReadOnlyStrip({ k, v }) { // ─── Inline swap popover ──────────────────────────────────────── function InlineSwapPopover({ slot, open, onClose, onPick }) { + // Hooks first — React rules-of-hooks forbid an early return before + // them. The popover is mounted unconditionally and toggles via `open`; + // useQuery's own caching means useModels() costs ~nothing when closed. + const modelsQuery = useModels(); + const hwQuery = useHardware(); if (!open) return null; - const compatible = HAL0_DATA.models.filter(m => m.type === slot.type); + const ramFreeGb = hwQuery.data?.ram?.free ?? 0; + const compatible = (modelsQuery.data ?? []) + .map(normalizeApiModel) + .filter(m => m.type === slot.type); return (
e.stopPropagation()}>
Swap model · type {slot.type}
{compatible.map(m => { const isCur = slot.model_id === m.id; - const fits = HAL0_DATA.host.ram.free > parseSizeGB(m.size); + const fits = ramFreeGb > parseSizeGB(m.size); return ( // The whole row is a mouse-click target (convenience) but the // nested chevron button is the single keyboard/AT-accessible From f9736422bfd5a7adf9754ac52b392c0d4e15475e Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 27 May 2026 13:12:07 -0400 Subject: [PATCH 2/3] =?UTF-8?q?test(=CE=B3):=20seed=20/api/models=20fixtur?= =?UTF-8?q?e=20for=20swap-popover=20suite?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The popover now reads from useModels(); without a fixture route the apiMock catch-all returned {} → useModels()=[] → 0 .swap-pop-items → slots-wireup-v3.spec.ts:135 toBeVisible() timed out. Adds 4 representative MOCK_DATA.models rows (chat, coding, embed, rerank) keyed by capabilities/backends so the JSX normalizer derives matching slot.type values, and a /api/models route on installDefaultMocks that fulfils them. Co-Authored-By: Claude Opus 4.7 (1M context) --- ui/tests/e2e/fixtures/apiMock.ts | 4 +++ ui/tests/e2e/fixtures/mock-data.ts | 51 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/ui/tests/e2e/fixtures/apiMock.ts b/ui/tests/e2e/fixtures/apiMock.ts index a473d6ea..e2750c36 100644 --- a/ui/tests/e2e/fixtures/apiMock.ts +++ b/ui/tests/e2e/fixtures/apiMock.ts @@ -25,6 +25,7 @@ export type MockState = { host: typeof MOCK_DATA.host lemond: typeof MOCK_DATA.lemond slots: typeof MOCK_DATA.slots + models: typeof MOCK_DATA.models backends: typeof MOCK_DATA.backends approvals: any[] } @@ -66,6 +67,9 @@ export async function installDefaultMocks(page: Page, state: MockState) { }), ) await page.route('**/api/hardware', (route) => json(route, state.host)) + await page.route('**/api/models', (route) => + json(route, { models: state.models, count: state.models.length }), + ) await page.route('**/api/slots', (route) => json(route, { slots: state.slots })) await page.route('**/api/slots/metrics', (route) => json(route, {})) await page.route('**/api/backends', (route) => json(route, { backends: state.backends })) diff --git a/ui/tests/e2e/fixtures/mock-data.ts b/ui/tests/e2e/fixtures/mock-data.ts index 1f034005..33ab7a10 100644 --- a/ui/tests/e2e/fixtures/mock-data.ts +++ b/ui/tests/e2e/fixtures/mock-data.ts @@ -62,6 +62,57 @@ export const MOCK_DATA = { }, ], + /** Subset of /api/models rows the swap popover + create-slot modal + * now consume (PR fix(dash): slot swap popover reads live /api/models). + * Shape matches the registry serializer: capabilities + backends drive + * the JSX-side normalizer's type + device derivation. */ + models: [ + { + id: 'qwen3.6-27b-q5kxl', + name: 'Qwen3.6-27B UD-Q5_K_XL', + path: '/mnt/ai-models/qwen3.6-27b-q5kxl/Qwen3.6-27B-UD-Q5_K_XL.gguf', + size_bytes: 19_000_000_000, + capabilities: ['chat'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'unsloth/Qwen3.6-27B-GGUF', + installed: true, + ns: 'pulled', + }, + { + id: 'qwen3-coder-next-q4kxl', + name: 'Qwen3-Coder-Next UD-Q4_K_XL', + path: '/mnt/ai-models/qwen3-coder-next-q4kxl/Qwen3-Coder-Next-UD-Q4_K_XL.gguf', + size_bytes: 19_000_000_000, + capabilities: ['chat', 'coding'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'unsloth/Qwen3-Coder-Next-GGUF', + installed: true, + ns: 'pulled', + }, + { + id: 'nomic-embed-text-v1.5-q8', + name: 'nomic-embed-text-v1.5 Q8', + path: '/mnt/ai-models/nomic-embed-text-v1.5-q8/nomic-embed-text-v1.5.Q8_0.gguf', + size_bytes: 350_000_000, + capabilities: ['embed'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'nomic-ai/nomic-embed-text-v1.5-GGUF', + installed: true, + ns: 'blessed', + }, + { + id: 'bge-reranker-v2-m3-q4_k_m', + name: 'BGE Reranker v2 M3 (Q4_K_M)', + path: '/mnt/ai-models/local/bge-reranker-v2-m3-q4_k_m/bge-reranker-v2-m3-Q4_K_M.gguf', + size_bytes: 438_376_864, + capabilities: ['rerank'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'gpustack/bge-reranker-v2-m3-GGUF', + installed: true, + ns: 'pulled', + }, + ], + backends: [ { id: 'llamacpp:rocm', name: 'llamacpp:rocm', ver: 'v1.0 (b9253)', state: 'installed', recommended: true }, { id: 'llamacpp:vulkan', name: 'llamacpp:vulkan', ver: 'v1.0 (b9253)', state: 'installed' }, From fafafd5f7588836bf934f9e7fd197f2d23f98e38 Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 27 May 2026 13:24:08 -0400 Subject: [PATCH 3/3] fix(dash): normalizer handles HAL0_DATA shape (labels/type) too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the Vite dev server's /api proxy target is down (or in CI before the page.route handlers attach), src/api/mock.ts falls back to HAL0_DATA.models — which use the legacy seed shape (labels + type + device + size as strings) rather than the registry shape (capabilities + backends + size_bytes). The first version of normalizeApiModel only read capabilities/backends/size_bytes, so every row got type='' and the popover filter rejected all 14 entries → empty .swap-pop list. Tolerate both shapes: prefer existing m.type/m.device/m.size if the row already carries them, else derive from capabilities/backends/ size_bytes. Same for longName + repo. Verified slots-wireup-v3 'Swap model — inline popover' passes locally (1.3s, was failing at 5s timeout in CI). Co-Authored-By: Claude Opus 4.7 (1M context) --- ui/src/dash/slot-modals.jsx | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/ui/src/dash/slot-modals.jsx b/ui/src/dash/slot-modals.jsx index e8529151..b9ae5d88 100644 --- a/ui/src/dash/slot-modals.jsx +++ b/ui/src/dash/slot-modals.jsx @@ -23,34 +23,46 @@ const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React // ids to the backend — they're fictional (`qwen3.6-27b-mtp` etc.) and // the slot orchestrator correctly rejects them against the real registry. function normalizeApiModel(m) { - const caps = Array.isArray(m.capabilities) ? m.capabilities : []; - const type = - caps.includes('chat') || caps.includes('coding') ? 'llm' - : caps.includes('rerank') ? 'reranking' - : caps.includes('embed') || caps.includes('embeddings') ? 'embedding' - : caps.includes('transcription') || caps.includes('asr') ? 'transcription' - : caps.includes('tts') ? 'tts' - : caps.includes('image') ? 'image' + // Accept both shapes: the registry/API shape (capabilities + backends + + // size_bytes + name + hf_repo) and the legacy HAL0_DATA seed shape + // (labels + device + size + longName + repo + type). Local dev without + // a backend falls back via src/api/mock.ts to HAL0_DATA.models, and the + // γ-suite hits that fallback when fetch fails before page.route catches + // (race + connection-refused on the Vite proxy target). Tolerating both + // shapes keeps the popover non-empty in every mock path. + const sourceCaps = Array.isArray(m.capabilities) + ? m.capabilities + : Array.isArray(m.labels) ? m.labels : []; + const derivedType = + sourceCaps.includes('chat') || sourceCaps.includes('coding') ? 'llm' + : sourceCaps.includes('rerank') || sourceCaps.includes('reranking') ? 'reranking' + : sourceCaps.includes('embed') || sourceCaps.includes('embeddings') ? 'embedding' + : sourceCaps.includes('transcription') || sourceCaps.includes('asr') ? 'transcription' + : sourceCaps.includes('tts') ? 'tts' + : sourceCaps.includes('image') ? 'image' : ''; + const type = typeof m.type === 'string' && m.type ? m.type : derivedType; const backends = Array.isArray(m.backends) ? m.backends : []; - const device = + const derivedDevice = backends.includes('rocm') ? 'rocm' : backends.includes('vulkan') ? 'vulkan' : backends.includes('cpu') ? 'cpu' : backends[0] || ''; + const device = typeof m.device === 'string' && m.device ? m.device : derivedDevice; const b = m.size_bytes || 0; - const size = !b + const derivedSize = !b ? '—' : b < 1024 ** 2 ? `${(b / 1024).toFixed(1)} KB` : b < 1024 ** 3 ? `${(b / 1024 ** 2).toFixed(1)} MB` : `${(b / 1024 ** 3).toFixed(2)} GB`; + const size = typeof m.size === 'string' && m.size ? m.size : derivedSize; return { ...m, type, device, - longName: m.name || m.id, + longName: m.longName || m.name || m.id, size, - repo: m.hf_repo || m.path || '', + repo: m.repo || m.hf_repo || m.path || '', }; }