diff --git a/ui/src/dash/slot-modals.jsx b/ui/src/dash/slot-modals.jsx index f95f1d8b..b9ae5d88 100644 --- a/ui/src/dash/slot-modals.jsx +++ b/ui/src/dash/slot-modals.jsx @@ -11,10 +11,61 @@ import { useSlotDelete, } from '@/api/hooks/useSlots' import { useHardware } from '@/api/hooks/useHardware' +import { useModels } from '@/api/hooks/useModels' import { ENDPOINTS } from '@/api/endpoints' const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React; +// Map /api/models registry rows → the shape this file's swap popover and +// create-slot modal grew up around (HAL0_DATA seed). Done in JSX rather +// than at the API layer so the response stays identical to what the +// Models view (models.jsx) already consumes. NEVER ship HAL0_DATA model +// ids to the backend — they're fictional (`qwen3.6-27b-mtp` etc.) and +// the slot orchestrator correctly rejects them against the real registry. +function normalizeApiModel(m) { + // Accept both shapes: the registry/API shape (capabilities + backends + + // size_bytes + name + hf_repo) and the legacy HAL0_DATA seed shape + // (labels + device + size + longName + repo + type). Local dev without + // a backend falls back via src/api/mock.ts to HAL0_DATA.models, and the + // γ-suite hits that fallback when fetch fails before page.route catches + // (race + connection-refused on the Vite proxy target). Tolerating both + // shapes keeps the popover non-empty in every mock path. + const sourceCaps = Array.isArray(m.capabilities) + ? m.capabilities + : Array.isArray(m.labels) ? m.labels : []; + const derivedType = + sourceCaps.includes('chat') || sourceCaps.includes('coding') ? 'llm' + : sourceCaps.includes('rerank') || sourceCaps.includes('reranking') ? 'reranking' + : sourceCaps.includes('embed') || sourceCaps.includes('embeddings') ? 'embedding' + : sourceCaps.includes('transcription') || sourceCaps.includes('asr') ? 'transcription' + : sourceCaps.includes('tts') ? 'tts' + : sourceCaps.includes('image') ? 'image' + : ''; + const type = typeof m.type === 'string' && m.type ? m.type : derivedType; + const backends = Array.isArray(m.backends) ? m.backends : []; + const derivedDevice = + backends.includes('rocm') ? 'rocm' + : backends.includes('vulkan') ? 'vulkan' + : backends.includes('cpu') ? 'cpu' + : backends[0] || ''; + const device = typeof m.device === 'string' && m.device ? m.device : derivedDevice; + const b = m.size_bytes || 0; + const derivedSize = !b + ? '—' + : b < 1024 ** 2 ? `${(b / 1024).toFixed(1)} KB` + : b < 1024 ** 3 ? `${(b / 1024 ** 2).toFixed(1)} MB` + : `${(b / 1024 ** 3).toFixed(2)} GB`; + const size = typeof m.size === 'string' && m.size ? m.size : derivedSize; + return { + ...m, + type, + device, + longName: m.longName || m.name || m.id, + size, + repo: m.repo || m.hf_repo || m.path || '', + }; +} + // ─── Create-slot modal ────────────────────────────────────────── function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { const [name, setName] = useStateSM(defaults.name || ""); @@ -30,6 +81,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { const createMut = useSlotCreate(); const hwQuery = useHardware(); + const modelsQuery = useModels(); useEffectSM(() => { if (open) { @@ -51,11 +103,17 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { const nameInvalid = name && !/^[a-z][a-z0-9-]{0,30}$/.test(name); const nameError = nameCollision ? "name already in use" : nameInvalid ? "lowercase + dashes only" : null; - // Model catalogue still lives in HAL0_DATA — replaced when the models - // hook ships (parallel teammate). NPU availability is now live. - const compatible = HAL0_DATA.models.filter(m => + // Live catalogue from /api/models (normalized to the legacy HAL0_DATA + // shape so the existing filter + render code keeps working). Sending a + // mock id like `qwen3.6-27b-mtp` here would tunnel into POST + // /api/slots/{name}/swap and the slot orchestrator would reject it + // against the real registry (slot.not_found). + const allModels = (modelsQuery.data ?? []).map(normalizeApiModel); + const compatible = allModels.filter(m => m.type === type && - (device === "cpu" || m.device === (device || "cpu").replace("gpu-", "") || (device === "npu" && m.device === "npu")) + (device === "cpu" + || (Array.isArray(m.backends) && m.backends.includes((device || "cpu").replace("gpu-", ""))) + || (device === "npu" && m.device === "npu")) ); const npuAvailable = !!hwQuery.data?.npu?.present; @@ -182,7 +240,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) { ))} {model && compatible.find(m => m.id === model) && ( -
✓ fits in available memory ({HAL0_DATA.host.ram.free} GB free)
+
✓ fits in available memory ({hwQuery.data?.ram?.free ?? "?"} GB free)
)} @@ -517,14 +575,22 @@ function ReadOnlyStrip({ k, v }) { // ─── Inline swap popover ──────────────────────────────────────── function InlineSwapPopover({ slot, open, onClose, onPick }) { + // Hooks first — React rules-of-hooks forbid an early return before + // them. The popover is mounted unconditionally and toggles via `open`; + // useQuery's own caching means useModels() costs ~nothing when closed. + const modelsQuery = useModels(); + const hwQuery = useHardware(); if (!open) return null; - const compatible = HAL0_DATA.models.filter(m => m.type === slot.type); + const ramFreeGb = hwQuery.data?.ram?.free ?? 0; + const compatible = (modelsQuery.data ?? []) + .map(normalizeApiModel) + .filter(m => m.type === slot.type); return (
e.stopPropagation()}>
Swap model · type {slot.type}
{compatible.map(m => { const isCur = slot.model_id === m.id; - const fits = HAL0_DATA.host.ram.free > parseSizeGB(m.size); + const fits = ramFreeGb > parseSizeGB(m.size); return ( // The whole row is a mouse-click target (convenience) but the // nested chevron button is the single keyboard/AT-accessible diff --git a/ui/tests/e2e/fixtures/apiMock.ts b/ui/tests/e2e/fixtures/apiMock.ts index a473d6ea..e2750c36 100644 --- a/ui/tests/e2e/fixtures/apiMock.ts +++ b/ui/tests/e2e/fixtures/apiMock.ts @@ -25,6 +25,7 @@ export type MockState = { host: typeof MOCK_DATA.host lemond: typeof MOCK_DATA.lemond slots: typeof MOCK_DATA.slots + models: typeof MOCK_DATA.models backends: typeof MOCK_DATA.backends approvals: any[] } @@ -66,6 +67,9 @@ export async function installDefaultMocks(page: Page, state: MockState) { }), ) await page.route('**/api/hardware', (route) => json(route, state.host)) + await page.route('**/api/models', (route) => + json(route, { models: state.models, count: state.models.length }), + ) await page.route('**/api/slots', (route) => json(route, { slots: state.slots })) await page.route('**/api/slots/metrics', (route) => json(route, {})) await page.route('**/api/backends', (route) => json(route, { backends: state.backends })) diff --git a/ui/tests/e2e/fixtures/mock-data.ts b/ui/tests/e2e/fixtures/mock-data.ts index 1f034005..33ab7a10 100644 --- a/ui/tests/e2e/fixtures/mock-data.ts +++ b/ui/tests/e2e/fixtures/mock-data.ts @@ -62,6 +62,57 @@ export const MOCK_DATA = { }, ], + /** Subset of /api/models rows the swap popover + create-slot modal + * now consume (PR fix(dash): slot swap popover reads live /api/models). + * Shape matches the registry serializer: capabilities + backends drive + * the JSX-side normalizer's type + device derivation. */ + models: [ + { + id: 'qwen3.6-27b-q5kxl', + name: 'Qwen3.6-27B UD-Q5_K_XL', + path: '/mnt/ai-models/qwen3.6-27b-q5kxl/Qwen3.6-27B-UD-Q5_K_XL.gguf', + size_bytes: 19_000_000_000, + capabilities: ['chat'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'unsloth/Qwen3.6-27B-GGUF', + installed: true, + ns: 'pulled', + }, + { + id: 'qwen3-coder-next-q4kxl', + name: 'Qwen3-Coder-Next UD-Q4_K_XL', + path: '/mnt/ai-models/qwen3-coder-next-q4kxl/Qwen3-Coder-Next-UD-Q4_K_XL.gguf', + size_bytes: 19_000_000_000, + capabilities: ['chat', 'coding'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'unsloth/Qwen3-Coder-Next-GGUF', + installed: true, + ns: 'pulled', + }, + { + id: 'nomic-embed-text-v1.5-q8', + name: 'nomic-embed-text-v1.5 Q8', + path: '/mnt/ai-models/nomic-embed-text-v1.5-q8/nomic-embed-text-v1.5.Q8_0.gguf', + size_bytes: 350_000_000, + capabilities: ['embed'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'nomic-ai/nomic-embed-text-v1.5-GGUF', + installed: true, + ns: 'blessed', + }, + { + id: 'bge-reranker-v2-m3-q4_k_m', + name: 'BGE Reranker v2 M3 (Q4_K_M)', + path: '/mnt/ai-models/local/bge-reranker-v2-m3-q4_k_m/bge-reranker-v2-m3-Q4_K_M.gguf', + size_bytes: 438_376_864, + capabilities: ['rerank'], + backends: ['vulkan', 'rocm', 'cpu'], + hf_repo: 'gpustack/bge-reranker-v2-m3-GGUF', + installed: true, + ns: 'pulled', + }, + ], + backends: [ { id: 'llamacpp:rocm', name: 'llamacpp:rocm', ver: 'v1.0 (b9253)', state: 'installed', recommended: true }, { id: 'llamacpp:vulkan', name: 'llamacpp:vulkan', ver: 'v1.0 (b9253)', state: 'installed' },