Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 73 additions & 7 deletions ui/src/dash/slot-modals.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,61 @@ import {
useSlotDelete,
} from '@/api/hooks/useSlots'
import { useHardware } from '@/api/hooks/useHardware'
import { useModels } from '@/api/hooks/useModels'
import { ENDPOINTS } from '@/api/endpoints'

const { useState: useStateSM, useEffect: useEffectSM, useRef: useRefSM } = React;

// Map /api/models registry rows → the shape this file's swap popover and
// create-slot modal grew up around (HAL0_DATA seed). Done in JSX rather
// than at the API layer so the response stays identical to what the
// Models view (models.jsx) already consumes. NEVER ship HAL0_DATA model
// ids to the backend — they're fictional (`qwen3.6-27b-mtp` etc.) and
// the slot orchestrator correctly rejects them against the real registry.
function normalizeApiModel(m) {
// Accept both shapes: the registry/API shape (capabilities + backends +
// size_bytes + name + hf_repo) and the legacy HAL0_DATA seed shape
// (labels + device + size + longName + repo + type). Local dev without
// a backend falls back via src/api/mock.ts to HAL0_DATA.models, and the
// γ-suite hits that fallback when fetch fails before page.route catches
// (race + connection-refused on the Vite proxy target). Tolerating both
// shapes keeps the popover non-empty in every mock path.
const sourceCaps = Array.isArray(m.capabilities)
? m.capabilities
: Array.isArray(m.labels) ? m.labels : [];
const derivedType =
sourceCaps.includes('chat') || sourceCaps.includes('coding') ? 'llm'
: sourceCaps.includes('rerank') || sourceCaps.includes('reranking') ? 'reranking'
: sourceCaps.includes('embed') || sourceCaps.includes('embeddings') ? 'embedding'
: sourceCaps.includes('transcription') || sourceCaps.includes('asr') ? 'transcription'
: sourceCaps.includes('tts') ? 'tts'
: sourceCaps.includes('image') ? 'image'
: '';
const type = typeof m.type === 'string' && m.type ? m.type : derivedType;
const backends = Array.isArray(m.backends) ? m.backends : [];
const derivedDevice =
backends.includes('rocm') ? 'rocm'
: backends.includes('vulkan') ? 'vulkan'
: backends.includes('cpu') ? 'cpu'
: backends[0] || '';
const device = typeof m.device === 'string' && m.device ? m.device : derivedDevice;
const b = m.size_bytes || 0;
const derivedSize = !b
? '—'
: b < 1024 ** 2 ? `${(b / 1024).toFixed(1)} KB`
: b < 1024 ** 3 ? `${(b / 1024 ** 2).toFixed(1)} MB`
: `${(b / 1024 ** 3).toFixed(2)} GB`;
const size = typeof m.size === 'string' && m.size ? m.size : derivedSize;
return {
...m,
type,
device,
longName: m.longName || m.name || m.id,
size,
repo: m.repo || m.hf_repo || m.path || '',
};
}

// ─── Create-slot modal ──────────────────────────────────────────
function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
const [name, setName] = useStateSM(defaults.name || "");
Expand All @@ -30,6 +81,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {

const createMut = useSlotCreate();
const hwQuery = useHardware();
const modelsQuery = useModels();

useEffectSM(() => {
if (open) {
Expand All @@ -51,11 +103,17 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
const nameInvalid = name && !/^[a-z][a-z0-9-]{0,30}$/.test(name);
const nameError = nameCollision ? "name already in use" : nameInvalid ? "lowercase + dashes only" : null;

// Model catalogue still lives in HAL0_DATA — replaced when the models
// hook ships (parallel teammate). NPU availability is now live.
const compatible = HAL0_DATA.models.filter(m =>
// Live catalogue from /api/models (normalized to the legacy HAL0_DATA
// shape so the existing filter + render code keeps working). Sending a
// mock id like `qwen3.6-27b-mtp` here would tunnel into POST
// /api/slots/{name}/swap and the slot orchestrator would reject it
// against the real registry (slot.not_found).
const allModels = (modelsQuery.data ?? []).map(normalizeApiModel);
const compatible = allModels.filter(m =>
m.type === type &&
(device === "cpu" || m.device === (device || "cpu").replace("gpu-", "") || (device === "npu" && m.device === "npu"))
(device === "cpu"
|| (Array.isArray(m.backends) && m.backends.includes((device || "cpu").replace("gpu-", "")))
|| (device === "npu" && m.device === "npu"))
);

const npuAvailable = !!hwQuery.data?.npu?.present;
Expand Down Expand Up @@ -182,7 +240,7 @@ function CreateSlotModal({ open, onClose, defaults = {}, existingSlots = [] }) {
))}
</select>
{model && compatible.find(m => m.id === model) && (
<div className="ok">✓ fits in available memory ({HAL0_DATA.host.ram.free} GB free)</div>
<div className="ok">✓ fits in available memory ({hwQuery.data?.ram?.free ?? "?"} GB free)</div>
)}
</div>
</div>
Expand Down Expand Up @@ -517,14 +575,22 @@ function ReadOnlyStrip({ k, v }) {

// ─── Inline swap popover ────────────────────────────────────────
function InlineSwapPopover({ slot, open, onClose, onPick }) {
// Hooks first — React rules-of-hooks forbid an early return before
// them. The popover is mounted unconditionally and toggles via `open`;
// useQuery's own caching means useModels() costs ~nothing when closed.
const modelsQuery = useModels();
const hwQuery = useHardware();
if (!open) return null;
const compatible = HAL0_DATA.models.filter(m => m.type === slot.type);
const ramFreeGb = hwQuery.data?.ram?.free ?? 0;
const compatible = (modelsQuery.data ?? [])
.map(normalizeApiModel)
.filter(m => m.type === slot.type);
return (
<div className="swap-pop" onClick={e => e.stopPropagation()}>
<div className="swap-pop-h">Swap model · type {slot.type}</div>
{compatible.map(m => {
const isCur = slot.model_id === m.id;
const fits = HAL0_DATA.host.ram.free > parseSizeGB(m.size);
const fits = ramFreeGb > parseSizeGB(m.size);
return (
// The whole row is a mouse-click target (convenience) but the
// nested chevron button is the single keyboard/AT-accessible
Expand Down
4 changes: 4 additions & 0 deletions ui/tests/e2e/fixtures/apiMock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export type MockState = {
host: typeof MOCK_DATA.host
lemond: typeof MOCK_DATA.lemond
slots: typeof MOCK_DATA.slots
models: typeof MOCK_DATA.models
backends: typeof MOCK_DATA.backends
approvals: any[]
}
Expand Down Expand Up @@ -66,6 +67,9 @@ export async function installDefaultMocks(page: Page, state: MockState) {
}),
)
await page.route('**/api/hardware', (route) => json(route, state.host))
await page.route('**/api/models', (route) =>
json(route, { models: state.models, count: state.models.length }),
)
await page.route('**/api/slots', (route) => json(route, { slots: state.slots }))
await page.route('**/api/slots/metrics', (route) => json(route, {}))
await page.route('**/api/backends', (route) => json(route, { backends: state.backends }))
Expand Down
51 changes: 51 additions & 0 deletions ui/tests/e2e/fixtures/mock-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,57 @@ export const MOCK_DATA = {
},
],

/** Subset of /api/models rows the swap popover + create-slot modal
* now consume (PR fix(dash): slot swap popover reads live /api/models).
* Shape matches the registry serializer: capabilities + backends drive
* the JSX-side normalizer's type + device derivation. */
models: [
{
id: 'qwen3.6-27b-q5kxl',
name: 'Qwen3.6-27B UD-Q5_K_XL',
path: '/mnt/ai-models/qwen3.6-27b-q5kxl/Qwen3.6-27B-UD-Q5_K_XL.gguf',
size_bytes: 19_000_000_000,
capabilities: ['chat'],
backends: ['vulkan', 'rocm', 'cpu'],
hf_repo: 'unsloth/Qwen3.6-27B-GGUF',
installed: true,
ns: 'pulled',
},
{
id: 'qwen3-coder-next-q4kxl',
name: 'Qwen3-Coder-Next UD-Q4_K_XL',
path: '/mnt/ai-models/qwen3-coder-next-q4kxl/Qwen3-Coder-Next-UD-Q4_K_XL.gguf',
size_bytes: 19_000_000_000,
capabilities: ['chat', 'coding'],
backends: ['vulkan', 'rocm', 'cpu'],
hf_repo: 'unsloth/Qwen3-Coder-Next-GGUF',
installed: true,
ns: 'pulled',
},
{
id: 'nomic-embed-text-v1.5-q8',
name: 'nomic-embed-text-v1.5 Q8',
path: '/mnt/ai-models/nomic-embed-text-v1.5-q8/nomic-embed-text-v1.5.Q8_0.gguf',
size_bytes: 350_000_000,
capabilities: ['embed'],
backends: ['vulkan', 'rocm', 'cpu'],
hf_repo: 'nomic-ai/nomic-embed-text-v1.5-GGUF',
installed: true,
ns: 'blessed',
},
{
id: 'bge-reranker-v2-m3-q4_k_m',
name: 'BGE Reranker v2 M3 (Q4_K_M)',
path: '/mnt/ai-models/local/bge-reranker-v2-m3-q4_k_m/bge-reranker-v2-m3-Q4_K_M.gguf',
size_bytes: 438_376_864,
capabilities: ['rerank'],
backends: ['vulkan', 'rocm', 'cpu'],
hf_repo: 'gpustack/bge-reranker-v2-m3-GGUF',
installed: true,
ns: 'pulled',
},
],

backends: [
{ id: 'llamacpp:rocm', name: 'llamacpp:rocm', ver: 'v1.0 (b9253)', state: 'installed', recommended: true },
{ id: 'llamacpp:vulkan', name: 'llamacpp:vulkan', ver: 'v1.0 (b9253)', state: 'installed' },
Expand Down
Loading