From 5d84e67926e1c8d2a6a01ac40a3445d46e42162b Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Fri, 15 May 2026 00:45:17 +0200 Subject: [PATCH] =?UTF-8?q?refactor(core):=20rename=20benchmark=20?= =?UTF-8?q?=E2=86=92=20project=20for=20registry=20+=20sync?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal-only rename (PR 1 of 4). The user-facing "benchmark" terminology in HTTP routes (/api/benchmarks/...), JSON field names (benchmark_id, benchmark_name), CLI flags, Studio components, and docs is unchanged in this PR — those land in PR 2 (HTTP API), PR 3 (Studio frontend), and PR 4 (docs). Renamed: - packages/core/src/benchmarks.ts → projects.ts - packages/core/src/benchmark-sync.ts → project-sync.ts - BenchmarkEntry → ProjectEntry, BenchmarkSource → ProjectSource, BenchmarkRegistry → ProjectRegistry - loadBenchmarkRegistry → loadProjectRegistry, saveBenchmarkRegistry → saveProjectRegistry, addBenchmark → addProject, removeBenchmark → removeProject, getBenchmark → getProject, touchBenchmark → touchProject, discoverBenchmarks → discoverProjects, deriveBenchmarkId → deriveProjectId, getBenchmarksRegistryPath → getProjectsRegistryPath, syncBenchmark → syncProject, syncBenchmarks → syncProjects - ~/.agentv/benchmarks.yaml → projects.yaml, top-level key `benchmarks:` → `projects:` One-time migration: - loadProjectRegistry() calls migrateLegacyBenchmarksFile() before reading the registry. If only benchmarks.yaml exists, it is read, transformed (top-level key rewritten), written to a temp file, atomically renamed to projects.yaml, and the legacy file is unlinked. If both files exist, projects.yaml wins and a warning is logged. Idempotent: subsequent loads are a no-op. Rationale: 5 of 6 LLM observability tools (Phoenix, Langfuse, Braintrust, W&B Weave, LangSmith) use "project" for the container that holds eval runs, traces, datasets, and other telemetry. agentv is adding trace/span/latency capture alongside eval runs, making "benchmark" too narrow. The rename also disambiguates from the academic "benchmark = eval suite" usage that survives in example directory names (benchmark-tooling, multi-model-benchmark, etc.). Co-Authored-By: Claude Opus 4.7 --- apps/cli/src/commands/results/serve.ts | 66 +-- apps/cli/test/commands/results/serve.test.ts | 6 +- packages/core/src/benchmarks.ts | 292 ------------- packages/core/src/index.ts | 28 +- .../{benchmark-sync.ts => project-sync.ts} | 28 +- packages/core/src/projects.ts | 393 ++++++++++++++++++ packages/core/test/benchmarks.test.ts | 157 ------- ...mark-sync.test.ts => project-sync.test.ts} | 26 +- packages/core/test/projects.test.ts | 277 ++++++++++++ 9 files changed, 747 insertions(+), 526 deletions(-) delete mode 100644 packages/core/src/benchmarks.ts rename packages/core/src/{benchmark-sync.ts => project-sync.ts} (53%) create mode 100644 packages/core/src/projects.ts delete mode 100644 packages/core/test/benchmarks.test.ts rename packages/core/test/{benchmark-sync.test.ts => project-sync.test.ts} (80%) create mode 100644 packages/core/test/projects.test.ts diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts index ea679a85..31050575 100644 --- a/apps/cli/src/commands/results/serve.ts +++ b/apps/cli/src/commands/results/serve.ts @@ -40,12 +40,12 @@ import { command, flag, number, option, optional, positional, string } from 'cmd import { DEFAULT_CATEGORY, type EvaluationResult, - addBenchmark, - getBenchmark, - loadBenchmarkRegistry, + addProject, + getProject, loadConfig, - removeBenchmark, - syncBenchmarks, + loadProjectRegistry, + removeProject, + syncProjects, } from '@agentv/core'; import type { Context } from 'hono'; import { Hono } from 'hono'; @@ -989,13 +989,13 @@ export function createApp( c: C, handler: (c: C, ctx: DataContext) => Response | Promise, ): Response | Promise { - const benchmark = getBenchmark(c.req.param('benchmarkId') ?? ''); - if (!benchmark || !existsSync(benchmark.path)) { + const project = getProject(c.req.param('benchmarkId') ?? ''); + if (!project || !existsSync(project.path)) { return c.json({ error: 'Benchmark not found' }, 404); } return handler(c, { - searchDir: benchmark.path, - agentvDir: path.join(benchmark.path, '.agentv'), + searchDir: project.path, + agentvDir: path.join(project.path, '.agentv'), }); } @@ -1021,7 +1021,7 @@ export function createApp( // ── Benchmark management endpoints ─────────────────────────────────── - /** Convert a BenchmarkEntry to snake_case wire format. */ + /** Convert a ProjectEntry to snake_case wire format. */ function benchmarkEntryToWire(entry: { id: string; name: string; @@ -1039,9 +1039,9 @@ export function createApp( } app.get('/api/benchmarks', async (c) => { - const registry = loadBenchmarkRegistry(); + const registry = loadProjectRegistry(); const benchmarks = await Promise.all( - registry.benchmarks.map(async (p) => { + registry.projects.map(async (p) => { let runCount = 0; let passRate = 0; let lastRun: string | null = null; @@ -1074,7 +1074,7 @@ export function createApp( try { const body = await c.req.json<{ path: string }>(); if (!body.path) return c.json({ error: 'Missing path' }, 400); - const entry = addBenchmark(body.path); + const entry = addProject(body.path); return c.json(benchmarkEntryToWire(entry), 201); } catch (err) { return c.json({ error: (err as Error).message }, 400); @@ -1082,17 +1082,17 @@ export function createApp( }); app.get('/api/benchmarks/:benchmarkId/summary', async (c) => { - const benchmark = getBenchmark(c.req.param('benchmarkId') ?? ''); - if (!benchmark) return c.json({ error: 'Benchmark not found' }, 404); + const project = getProject(c.req.param('benchmarkId') ?? ''); + if (!project) return c.json({ error: 'Benchmark not found' }, 404); try { - const { runs: metas } = await listMergedResultFiles(benchmark.path); + const { runs: metas } = await listMergedResultFiles(project.path); const runCount = metas.length; const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0; const lastRun = metas.length > 0 ? metas[0].timestamp : null; return c.json({ - id: benchmark.id, - name: benchmark.name, - path: benchmark.path, + id: project.id, + name: project.name, + path: project.path, run_count: runCount, pass_rate: passRate, last_run: lastRun, @@ -1104,7 +1104,7 @@ export function createApp( /** Aggregate runs from all registered benchmarks, sorted by timestamp descending. */ app.get('/api/benchmarks/all-runs', async (c) => { - const registry = loadBenchmarkRegistry(); + const registry = loadProjectRegistry(); const allRuns: Array<{ filename: string; display_name: string; @@ -1121,7 +1121,7 @@ export function createApp( benchmark_name: string; }> = []; - for (const p of registry.benchmarks) { + for (const p of registry.projects) { try { const { runs: metas } = await listMergedResultFiles(p.path); for (const m of metas) { @@ -1165,7 +1165,7 @@ export function createApp( if (readOnly) { return c.json({ error: 'Studio is running in read-only mode' }, 403); } - const removed = removeBenchmark(c.req.param('benchmarkId') ?? ''); + const removed = removeProject(c.req.param('benchmarkId') ?? ''); if (!removed) return c.json({ error: 'Benchmark not found' }, 404); return c.json({ ok: true }); }); @@ -1351,8 +1351,8 @@ export function createApp( // For benchmark-scoped routes, resolve to benchmark path; otherwise use searchDir const benchmarkId = c.req.param('benchmarkId'); if (benchmarkId) { - const benchmark = getBenchmark(benchmarkId); - if (benchmark) return benchmark.path; + const project = getProject(benchmarkId); + if (project) return project.path; } return searchDir; }, @@ -1492,7 +1492,7 @@ export const resultsServeCommand = command({ // ── Benchmark management commands (non-server) ─────────────────── if (add) { try { - const entry = addBenchmark(add); + const entry = addProject(add); console.log(`Registered benchmark: ${entry.name} (${entry.id}) at ${entry.path}`); } catch (err) { console.error(`Error: ${(err as Error).message}`); @@ -1502,7 +1502,7 @@ export const resultsServeCommand = command({ } if (remove) { - const removed = removeBenchmark(remove); + const removed = removeProject(remove); if (removed) { console.log(`Unregistered benchmark: ${remove}`); } else { @@ -1525,15 +1525,15 @@ export const resultsServeCommand = command({ } // ── Determine multi-benchmark mode ─────────────────────────────── - const registry = loadBenchmarkRegistry(); - const { isMultiBenchmark, showMultiWarning } = resolveDashboardMode( - registry.benchmarks.length, - { multi, single }, - ); + const registry = loadProjectRegistry(); + const { isMultiBenchmark, showMultiWarning } = resolveDashboardMode(registry.projects.length, { + multi, + single, + }); // ── Benchmark sync preflight ───────────────────────────────────── // Clone or pull any benchmark entries that declare a source. - await syncBenchmarks(registry.benchmarks); + await syncProjects(registry.projects); try { let results: EvaluationResult[] = []; @@ -1575,7 +1575,7 @@ export const resultsServeCommand = command({ } if (isMultiBenchmark) { - console.log(`Multi-benchmark mode: ${registry.benchmarks.length} benchmark(s) registered`); + console.log(`Multi-benchmark mode: ${registry.projects.length} benchmark(s) registered`); } else if (results.length > 0 && sourceFile) { console.log(`Serving ${results.length} result(s) from ${sourceFile}`); } else { diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts index 30c04ee6..c6d811c7 100644 --- a/apps/cli/test/commands/results/serve.test.ts +++ b/apps/cli/test/commands/results/serve.test.ts @@ -3,7 +3,7 @@ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync import { tmpdir } from 'node:os'; import path from 'node:path'; -import { addBenchmark } from '@agentv/core'; +import { addProject } from '@agentv/core'; import { createApp, @@ -577,7 +577,7 @@ describe('serve app', () => { ); mkdirSync(runDir, { recursive: true }); writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(RESULT_A)); - const benchmark = addBenchmark(benchmarkDir); + const project = addProject(benchmarkDir); const app = createApp([], tempDir, tempDir, undefined, { studioDir }); const res = await app.request('/api/benchmarks/all-runs'); @@ -588,7 +588,7 @@ describe('serve app', () => { }; expect(data.runs).toHaveLength(1); expect(data.runs[0]).toMatchObject({ - benchmark_id: benchmark.id, + benchmark_id: project.id, experiment: 'issue-1198-benchmark', target: 'gpt-4o', }); diff --git a/packages/core/src/benchmarks.ts b/packages/core/src/benchmarks.ts deleted file mode 100644 index a220c36c..00000000 --- a/packages/core/src/benchmarks.ts +++ /dev/null @@ -1,292 +0,0 @@ -/** - * Benchmark registry for AgentV Studio multi-benchmark support. - * - * A Benchmark = any directory containing a `.agentv/` folder. - * The registry lives at `~/.agentv/benchmarks.yaml` and is the single source of - * truth for which benchmarks Studio shows. Studio re-reads the file on every - * `/api/benchmarks` request, so edits (direct, via POST /api/benchmarks, via - * the CLI's --add/--remove, or via a Kubernetes ConfigMap mount) are reflected - * without restarting `agentv serve`. - * - * YAML format (all keys snake_case per AGENTS.md §"Wire Format Convention"): - * benchmarks: - * - id: my-app - * name: My App - * path: /home/user/projects/my-app - * source: - * url: ${{ BENCHMARK_REPO_URL }} - * ref: ${{ BENCHMARK_REPO_REF:-main }} - * added_at: "2026-03-20T10:00:00Z" - * last_opened_at: "2026-03-30T14:00:00Z" - * - * The optional `source` field enables remote sync via syncBenchmarks(): - * first run — git clone --depth 1 --filter=blob:none - * subsequent runs — git pull --ff-only - * - * Concurrency: the registry assumes a single writer. All mutating calls - * (add/remove/touchBenchmark) do read-modify-write on benchmarks.yaml - * without a lock. Studio's HTTP handlers are serialized by Node's - * single-threaded event loop, which satisfies the 24/7 deployment case. - * Run only one `agentv` process against a given home at a time. - * - * To extend: - * - CRUD: loadBenchmarkRegistry() / saveBenchmarkRegistry() + the - * add/remove/touch helpers. - * - discoverBenchmarks() is a one-shot filesystem utility for bulk - * registration; it does not run in the request path. - */ - -import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs'; -import path from 'node:path'; - -import { stringify as stringifyYaml } from 'yaml'; - -import { interpolateEnv } from './evaluation/interpolation.js'; -import { parseYamlValue } from './evaluation/yaml-loader.js'; -import { getAgentvConfigDir } from './paths.js'; - -// ── Types ─────────────────────────────────────────────────────────────── - -export interface BenchmarkSource { - url: string; - ref: string; -} - -export interface BenchmarkEntry { - id: string; - name: string; - path: string; - addedAt: string; - lastOpenedAt: string; - source?: BenchmarkSource; -} - -export interface BenchmarkRegistry { - benchmarks: BenchmarkEntry[]; -} - -// ── Registry path ─────────────────────────────────────────────────────── - -export function getBenchmarksRegistryPath(): string { - return path.join(getAgentvConfigDir(), 'benchmarks.yaml'); -} - -// ── Load / Save ───────────────────────────────────────────────────────── -// YAML uses snake_case per AGENTS.md §"Wire Format Convention"; TypeScript -// internals stay camelCase. fromYaml / toYaml handle the translation; every -// other function in this module works in camelCase only. - -interface BenchmarkSourceYaml { - url: string; - ref: string; -} - -interface BenchmarkEntryYaml { - id: string; - name: string; - path: string; - added_at: string; - last_opened_at: string; - source?: BenchmarkSourceYaml; -} - -function fromYaml(raw: unknown): BenchmarkEntry | null { - if (!raw || typeof raw !== 'object') return null; - const e = raw as Partial; - if (typeof e.id !== 'string' || typeof e.name !== 'string' || typeof e.path !== 'string') { - return null; - } - const entry: BenchmarkEntry = { - id: e.id, - name: e.name, - path: e.path, - addedAt: typeof e.added_at === 'string' ? e.added_at : '', - lastOpenedAt: typeof e.last_opened_at === 'string' ? e.last_opened_at : '', - }; - if (e.source && typeof e.source === 'object') { - const s = e.source as Partial; - if (typeof s.url === 'string' && typeof s.ref === 'string') { - entry.source = { url: s.url, ref: s.ref }; - } - } - return entry; -} - -function toYaml(entry: BenchmarkEntry): BenchmarkEntryYaml { - const yaml: BenchmarkEntryYaml = { - id: entry.id, - name: entry.name, - path: entry.path, - added_at: entry.addedAt, - last_opened_at: entry.lastOpenedAt, - }; - if (entry.source) { - yaml.source = { url: entry.source.url, ref: entry.source.ref }; - } - return yaml; -} - -export function loadBenchmarkRegistry(): BenchmarkRegistry { - const registryPath = getBenchmarksRegistryPath(); - if (!existsSync(registryPath)) { - return { benchmarks: [] }; - } - try { - const raw = readFileSync(registryPath, 'utf-8'); - const parsed = parseYamlValue(raw) as { benchmarks?: unknown } | null | undefined; - if (!parsed || typeof parsed !== 'object') { - return { benchmarks: [] }; - } - const env = process.env as Record; - const benchmarks = Array.isArray(parsed.benchmarks) - ? (parsed.benchmarks as unknown[]) - .map((e) => fromYaml(interpolateEnv(e, env))) - .filter((e): e is BenchmarkEntry => e !== null) - : []; - return { benchmarks }; - } catch { - return { benchmarks: [] }; - } -} - -export function saveBenchmarkRegistry(registry: BenchmarkRegistry): void { - const registryPath = getBenchmarksRegistryPath(); - const dir = path.dirname(registryPath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); - } - const payload = { benchmarks: registry.benchmarks.map(toYaml) }; - writeFileSync(registryPath, stringifyYaml(payload), 'utf-8'); -} - -// ── CRUD operations ───────────────────────────────────────────────────── - -/** - * Derive a URL-safe benchmark ID from a directory path. - * Uses the directory basename, lowercased, with non-alphanumeric chars replaced by hyphens. - * Appends a numeric suffix if the ID already exists in the registry. - */ -export function deriveBenchmarkId(dirPath: string, existingIds: string[]): string { - const base = path - .basename(dirPath) - .toLowerCase() - .replace(/[^a-z0-9-]/g, '-') - .replace(/-+/g, '-') - .replace(/^-|-$/g, ''); - let candidate = base || 'benchmark'; - let suffix = 2; - while (existingIds.includes(candidate)) { - candidate = `${base}-${suffix}`; - suffix++; - } - return candidate; -} - -/** - * Register a benchmark by path. Returns the new entry, or the existing one if already registered. - * Validates that the path exists and contains a `.agentv/` directory. - */ -export function addBenchmark(benchmarkPath: string): BenchmarkEntry { - const absPath = path.resolve(benchmarkPath); - if (!existsSync(absPath)) { - throw new Error(`Directory not found: ${absPath}`); - } - if (!existsSync(path.join(absPath, '.agentv'))) { - throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`); - } - - const registry = loadBenchmarkRegistry(); - const existing = registry.benchmarks.find((p) => p.path === absPath); - if (existing) { - return existing; - } - - const now = new Date().toISOString(); - const entry: BenchmarkEntry = { - id: deriveBenchmarkId( - absPath, - registry.benchmarks.map((p) => p.id), - ), - name: path.basename(absPath), - path: absPath, - addedAt: now, - lastOpenedAt: now, - }; - registry.benchmarks.push(entry); - saveBenchmarkRegistry(registry); - return entry; -} - -/** - * Remove a benchmark by ID. Returns true if removed, false if not found. - */ -export function removeBenchmark(benchmarkId: string): boolean { - const registry = loadBenchmarkRegistry(); - const idx = registry.benchmarks.findIndex((p) => p.id === benchmarkId); - if (idx < 0) return false; - registry.benchmarks.splice(idx, 1); - saveBenchmarkRegistry(registry); - return true; -} - -/** - * Look up a benchmark by ID. Returns undefined if not found. - */ -export function getBenchmark(benchmarkId: string): BenchmarkEntry | undefined { - return loadBenchmarkRegistry().benchmarks.find((p) => p.id === benchmarkId); -} - -/** - * Update lastOpenedAt for a benchmark. - */ -export function touchBenchmark(benchmarkId: string): void { - const registry = loadBenchmarkRegistry(); - const entry = registry.benchmarks.find((p) => p.id === benchmarkId); - if (entry) { - entry.lastOpenedAt = new Date().toISOString(); - saveBenchmarkRegistry(registry); - } -} - -// ── Discovery utility ─────────────────────────────────────────────────── - -/** - * Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`. - * Returns absolute paths of discovered benchmark directories, sorted for - * deterministic iteration. This is a one-shot helper for bulk registration; - * Studio does not scan at request time. - */ -export function discoverBenchmarks(rootDir: string, maxDepth = 2): string[] { - const absRoot = path.resolve(rootDir); - if (!existsSync(absRoot) || !statSync(absRoot).isDirectory()) { - return []; - } - - const results: string[] = []; - - function scan(dir: string, depth: number) { - if (depth > maxDepth) return; - - // Check if this directory itself is a benchmark - if (existsSync(path.join(dir, '.agentv'))) { - results.push(dir); - return; // Don't scan subdirectories of a benchmark - } - - if (depth === maxDepth) return; - - try { - const entries = readdirSync(dir, { withFileTypes: true }); - for (const entry of entries) { - if (!entry.isDirectory()) continue; - if (entry.name.startsWith('.') || entry.name === 'node_modules') continue; - scan(path.join(dir, entry.name), depth + 1); - } - } catch { - // Permission denied or other FS errors — skip - } - } - - scan(absRoot, 0); - return results.sort(); -} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 48690d2e..870c0c5f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -88,20 +88,20 @@ export { getWorkspacePoolRoot, } from './paths.js'; export { - type BenchmarkEntry, - type BenchmarkSource, - type BenchmarkRegistry, - loadBenchmarkRegistry, - saveBenchmarkRegistry, - addBenchmark, - removeBenchmark, - getBenchmark, - touchBenchmark, - discoverBenchmarks, - deriveBenchmarkId, - getBenchmarksRegistryPath, -} from './benchmarks.js'; -export { syncBenchmark, syncBenchmarks } from './benchmark-sync.js'; + type ProjectEntry, + type ProjectSource, + type ProjectRegistry, + loadProjectRegistry, + saveProjectRegistry, + addProject, + removeProject, + getProject, + touchProject, + discoverProjects, + deriveProjectId, + getProjectsRegistryPath, +} from './projects.js'; +export { syncProject, syncProjects } from './project-sync.js'; export { trimBaselineResult } from './evaluation/baseline.js'; export { DEFAULT_CATEGORY, deriveCategory } from './evaluation/category.js'; export * from './observability/index.js'; diff --git a/packages/core/src/benchmark-sync.ts b/packages/core/src/project-sync.ts similarity index 53% rename from packages/core/src/benchmark-sync.ts rename to packages/core/src/project-sync.ts index c83db317..b470fc42 100644 --- a/packages/core/src/benchmark-sync.ts +++ b/packages/core/src/project-sync.ts @@ -1,32 +1,32 @@ /** - * Benchmark sync — pulls remote git repos to the local path declared in the - * benchmark registry before Studio/eval startup. + * Project sync — pulls remote git repos to the local path declared in the + * project registry before Studio/eval startup. * * Sync is oneshot only, triggered by the Studio UI "Sync" button or the - * `agentv benchmark sync` CLI command. There is no daemon or continuous mode. + * `agentv project sync` CLI command. There is no daemon or continuous mode. * * First run — git clone --depth 1 --filter=blob:none --branch * Subsequent — git pull --ff-only (when /.git already exists) * * Usage: - * import { syncBenchmarks } from './benchmark-sync.js'; - * await syncBenchmarks(registry.benchmarks); + * import { syncProjects } from './project-sync.js'; + * await syncProjects(registry.projects); */ import * as childProcess from 'node:child_process'; import { existsSync } from 'node:fs'; -import type { BenchmarkEntry } from './benchmarks.js'; +import type { ProjectEntry } from './projects.js'; /** - * Clone or pull a single benchmark entry from its declared source. + * Clone or pull a single project entry from its declared source. * - No .git present: shallow clone into entry.path. * - .git present: git pull --ff-only to update in place. * Throws on git error or missing source. */ -export async function syncBenchmark(entry: BenchmarkEntry): Promise { +export async function syncProject(entry: ProjectEntry): Promise { if (!entry.source) { - throw new Error(`Benchmark '${entry.id}' has no source defined`); + throw new Error(`Project '${entry.id}' has no source defined`); } const { url, ref } = entry.source; const dest = entry.path; @@ -43,14 +43,14 @@ export async function syncBenchmark(entry: BenchmarkEntry): Promise { } /** - * Iterate benchmark entries and sync any that have a source declared. + * Iterate project entries and sync any that have a source declared. * Entries without source are skipped silently. */ -export async function syncBenchmarks(entries: BenchmarkEntry[]): Promise { +export async function syncProjects(entries: ProjectEntry[]): Promise { for (const entry of entries) { if (!entry.source) continue; - console.log(`Syncing benchmark '${entry.id}' from ${entry.source.url}...`); - await syncBenchmark(entry); - console.log(`Benchmark '${entry.id}' synced.`); + console.log(`Syncing project '${entry.id}' from ${entry.source.url}...`); + await syncProject(entry); + console.log(`Project '${entry.id}' synced.`); } } diff --git a/packages/core/src/projects.ts b/packages/core/src/projects.ts new file mode 100644 index 00000000..7377f595 --- /dev/null +++ b/packages/core/src/projects.ts @@ -0,0 +1,393 @@ +/** + * Project registry for AgentV Studio multi-project support. + * + * A Project = any directory containing a `.agentv/` folder. Projects hold + * eval runs, and (incrementally) traces, spans, and other telemetry — + * matching the "project" terminology used by Arize Phoenix, Langfuse, + * Braintrust, W&B Weave, and LangSmith. + * + * The registry lives at `~/.agentv/projects.yaml` and is the single source + * of truth for which projects Studio shows. Studio re-reads the file on every + * `/api/projects` request, so edits (direct, via POST /api/projects, via + * the CLI's --add/--remove, or via a Kubernetes ConfigMap mount) are reflected + * without restarting `agentv serve`. + * + * YAML format (all keys snake_case per AGENTS.md §"Wire Format Convention"): + * projects: + * - id: my-app + * name: My App + * path: /home/user/projects/my-app + * source: + * url: ${{ PROJECT_REPO_URL }} + * ref: ${{ PROJECT_REPO_REF:-main }} + * added_at: "2026-03-20T10:00:00Z" + * last_opened_at: "2026-03-30T14:00:00Z" + * + * The optional `source` field enables remote sync via syncProjects(): + * first run — git clone --depth 1 --filter=blob:none + * subsequent runs — git pull --ff-only + * + * Concurrency: the registry assumes a single writer. All mutating calls + * (add/remove/touchProject) do read-modify-write on projects.yaml + * without a lock. Studio's HTTP handlers are serialized by Node's + * single-threaded event loop, which satisfies the 24/7 deployment case. + * Run only one `agentv` process against a given home at a time. + * + * Legacy registry filename: the registry used to be called `benchmarks.yaml` + * with a top-level `benchmarks:` key. On first load, a one-time migration + * detects the old file, rewrites the top-level key to `projects:`, and + * atomically renames the file. See migrateLegacyBenchmarksFile() below. + * + * To extend: + * - CRUD: loadProjectRegistry() / saveProjectRegistry() + the + * add/remove/touch helpers. + * - discoverProjects() is a one-shot filesystem utility for bulk + * registration; it does not run in the request path. + */ + +import { + existsSync, + mkdirSync, + readFileSync, + readdirSync, + renameSync, + statSync, + unlinkSync, + writeFileSync, +} from 'node:fs'; +import path from 'node:path'; + +import { stringify as stringifyYaml } from 'yaml'; + +import { interpolateEnv } from './evaluation/interpolation.js'; +import { parseYamlValue } from './evaluation/yaml-loader.js'; +import { getAgentvConfigDir } from './paths.js'; + +// ── Types ─────────────────────────────────────────────────────────────── + +export interface ProjectSource { + url: string; + ref: string; +} + +export interface ProjectEntry { + id: string; + name: string; + path: string; + addedAt: string; + lastOpenedAt: string; + source?: ProjectSource; +} + +export interface ProjectRegistry { + projects: ProjectEntry[]; +} + +// ── Registry path ─────────────────────────────────────────────────────── + +export function getProjectsRegistryPath(): string { + return path.join(getAgentvConfigDir(), 'projects.yaml'); +} + +/** Legacy registry path, kept private — only the migration helper reads it. */ +function getLegacyBenchmarksRegistryPath(): string { + return path.join(getAgentvConfigDir(), 'benchmarks.yaml'); +} + +// ── Legacy file migration ─────────────────────────────────────────────── +// One-time, idempotent. Called at the top of loadProjectRegistry() so any +// entry point (CLI, Studio server, tests) picks the new file up transparently. +// +// Rules: +// - projects.yaml exists, benchmarks.yaml missing → no-op (already migrated). +// - benchmarks.yaml exists, projects.yaml missing → migrate: read → rewrite +// top-level key benchmarks: → projects: → atomic rename temp → projects.yaml, +// then unlink benchmarks.yaml. Logs one line to stderr. +// - both exist → projects.yaml wins; benchmarks.yaml is left alone but a +// one-line warning goes to stderr so the operator can investigate. +// - neither exists → no-op (fresh install). +// +// The migration only rewrites the top-level key; entry shapes are unchanged. + +function migrateLegacyBenchmarksFile(): void { + const newPath = getProjectsRegistryPath(); + const oldPath = getLegacyBenchmarksRegistryPath(); + const newExists = existsSync(newPath); + const oldExists = existsSync(oldPath); + + if (!oldExists) return; + + if (newExists) { + console.warn( + `[agentv] Both ${oldPath} and ${newPath} exist. Using ${path.basename(newPath)}; ` + + `delete ${path.basename(oldPath)} when you've confirmed the new file is correct.`, + ); + return; + } + + let parsed: { benchmarks?: unknown } | null = null; + try { + const raw = readFileSync(oldPath, 'utf-8'); + parsed = parseYamlValue(raw) as { benchmarks?: unknown } | null; + } catch (err) { + console.warn( + `[agentv] Failed to read legacy ${path.basename(oldPath)} for migration: ${(err as Error).message}. Leaving the file in place; you may need to migrate it manually.`, + ); + return; + } + + // Rewrite top-level key only; entries themselves stay snake_case on disk. + const entries = + parsed && typeof parsed === 'object' && Array.isArray(parsed.benchmarks) + ? (parsed.benchmarks as unknown[]) + : []; + const newContent = stringifyYaml({ projects: entries }); + + // Atomic temp + rename so a crash mid-write never leaves a corrupted + // projects.yaml. Only after the rename succeeds do we unlink the old file. + const tempPath = `${newPath}.migrating`; + try { + mkdirSync(path.dirname(newPath), { recursive: true }); + writeFileSync(tempPath, newContent, 'utf-8'); + renameSync(tempPath, newPath); + unlinkSync(oldPath); + } catch (err) { + // Clean up the temp if rename failed. + try { + if (existsSync(tempPath)) unlinkSync(tempPath); + } catch { + /* best-effort */ + } + console.warn( + `[agentv] Failed to migrate ${path.basename(oldPath)} → ${path.basename(newPath)}: ` + + `${(err as Error).message}. Legacy file left in place.`, + ); + return; + } + + console.log( + `[agentv] Migrated registry: ${path.basename(oldPath)} → ${path.basename(newPath)} ` + + `(${entries.length} entr${entries.length === 1 ? 'y' : 'ies'})`, + ); +} + +// ── Load / Save ───────────────────────────────────────────────────────── +// YAML uses snake_case per AGENTS.md §"Wire Format Convention"; TypeScript +// internals stay camelCase. fromYaml / toYaml handle the translation; every +// other function in this module works in camelCase only. + +interface ProjectSourceYaml { + url: string; + ref: string; +} + +interface ProjectEntryYaml { + id: string; + name: string; + path: string; + added_at: string; + last_opened_at: string; + source?: ProjectSourceYaml; +} + +function fromYaml(raw: unknown): ProjectEntry | null { + if (!raw || typeof raw !== 'object') return null; + const e = raw as Partial; + if (typeof e.id !== 'string' || typeof e.name !== 'string' || typeof e.path !== 'string') { + return null; + } + const entry: ProjectEntry = { + id: e.id, + name: e.name, + path: e.path, + addedAt: typeof e.added_at === 'string' ? e.added_at : '', + lastOpenedAt: typeof e.last_opened_at === 'string' ? e.last_opened_at : '', + }; + if (e.source && typeof e.source === 'object') { + const s = e.source as Partial; + if (typeof s.url === 'string' && typeof s.ref === 'string') { + entry.source = { url: s.url, ref: s.ref }; + } + } + return entry; +} + +function toYaml(entry: ProjectEntry): ProjectEntryYaml { + const yaml: ProjectEntryYaml = { + id: entry.id, + name: entry.name, + path: entry.path, + added_at: entry.addedAt, + last_opened_at: entry.lastOpenedAt, + }; + if (entry.source) { + yaml.source = { url: entry.source.url, ref: entry.source.ref }; + } + return yaml; +} + +export function loadProjectRegistry(): ProjectRegistry { + migrateLegacyBenchmarksFile(); + const registryPath = getProjectsRegistryPath(); + if (!existsSync(registryPath)) { + return { projects: [] }; + } + try { + const raw = readFileSync(registryPath, 'utf-8'); + const parsed = parseYamlValue(raw) as { projects?: unknown } | null | undefined; + if (!parsed || typeof parsed !== 'object') { + return { projects: [] }; + } + const env = process.env as Record; + const projects = Array.isArray(parsed.projects) + ? (parsed.projects as unknown[]) + .map((e) => fromYaml(interpolateEnv(e, env))) + .filter((e): e is ProjectEntry => e !== null) + : []; + return { projects }; + } catch { + return { projects: [] }; + } +} + +export function saveProjectRegistry(registry: ProjectRegistry): void { + const registryPath = getProjectsRegistryPath(); + const dir = path.dirname(registryPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + const payload = { projects: registry.projects.map(toYaml) }; + writeFileSync(registryPath, stringifyYaml(payload), 'utf-8'); +} + +// ── CRUD operations ───────────────────────────────────────────────────── + +/** + * Derive a URL-safe project ID from a directory path. + * Uses the directory basename, lowercased, with non-alphanumeric chars replaced by hyphens. + * Appends a numeric suffix if the ID already exists in the registry. + */ +export function deriveProjectId(dirPath: string, existingIds: string[]): string { + const base = path + .basename(dirPath) + .toLowerCase() + .replace(/[^a-z0-9-]/g, '-') + .replace(/-+/g, '-') + .replace(/^-|-$/g, ''); + let candidate = base || 'project'; + let suffix = 2; + while (existingIds.includes(candidate)) { + candidate = `${base}-${suffix}`; + suffix++; + } + return candidate; +} + +/** + * Register a project by path. Returns the new entry, or the existing one if already registered. + * Validates that the path exists and contains a `.agentv/` directory. + */ +export function addProject(projectPath: string): ProjectEntry { + const absPath = path.resolve(projectPath); + if (!existsSync(absPath)) { + throw new Error(`Directory not found: ${absPath}`); + } + if (!existsSync(path.join(absPath, '.agentv'))) { + throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`); + } + + const registry = loadProjectRegistry(); + const existing = registry.projects.find((p) => p.path === absPath); + if (existing) { + return existing; + } + + const now = new Date().toISOString(); + const entry: ProjectEntry = { + id: deriveProjectId( + absPath, + registry.projects.map((p) => p.id), + ), + name: path.basename(absPath), + path: absPath, + addedAt: now, + lastOpenedAt: now, + }; + registry.projects.push(entry); + saveProjectRegistry(registry); + return entry; +} + +/** + * Remove a project by ID. Returns true if removed, false if not found. + */ +export function removeProject(projectId: string): boolean { + const registry = loadProjectRegistry(); + const idx = registry.projects.findIndex((p) => p.id === projectId); + if (idx < 0) return false; + registry.projects.splice(idx, 1); + saveProjectRegistry(registry); + return true; +} + +/** + * Look up a project by ID. Returns undefined if not found. + */ +export function getProject(projectId: string): ProjectEntry | undefined { + return loadProjectRegistry().projects.find((p) => p.id === projectId); +} + +/** + * Update lastOpenedAt for a project. + */ +export function touchProject(projectId: string): void { + const registry = loadProjectRegistry(); + const entry = registry.projects.find((p) => p.id === projectId); + if (entry) { + entry.lastOpenedAt = new Date().toISOString(); + saveProjectRegistry(registry); + } +} + +// ── Discovery utility ─────────────────────────────────────────────────── + +/** + * Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`. + * Returns absolute paths of discovered project directories, sorted for + * deterministic iteration. This is a one-shot helper for bulk registration; + * Studio does not scan at request time. + */ +export function discoverProjects(rootDir: string, maxDepth = 2): string[] { + const absRoot = path.resolve(rootDir); + if (!existsSync(absRoot) || !statSync(absRoot).isDirectory()) { + return []; + } + + const results: string[] = []; + + function scan(dir: string, depth: number) { + if (depth > maxDepth) return; + + // Check if this directory itself is a project + if (existsSync(path.join(dir, '.agentv'))) { + results.push(dir); + return; // Don't scan subdirectories of a project + } + + if (depth === maxDepth) return; + + try { + const entries = readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isDirectory()) continue; + if (entry.name.startsWith('.') || entry.name === 'node_modules') continue; + scan(path.join(dir, entry.name), depth + 1); + } + } catch { + // Permission denied or other FS errors — skip + } + } + + scan(absRoot, 0); + return results.sort(); +} diff --git a/packages/core/test/benchmarks.test.ts b/packages/core/test/benchmarks.test.ts deleted file mode 100644 index b164a8f1..00000000 --- a/packages/core/test/benchmarks.test.ts +++ /dev/null @@ -1,157 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'; -import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { - addBenchmark, - getBenchmark, - getBenchmarksRegistryPath, - loadBenchmarkRegistry, - removeBenchmark, - touchBenchmark, -} from '../src/benchmarks.js'; - -describe('benchmarks registry', () => { - let fakeHome: string; - let reposRoot: string; - // biome-ignore lint/suspicious/noExplicitAny: spy typing from bun:test is intentionally loose. - let homedirSpy: any; - - beforeEach(() => { - fakeHome = mkdtempSync(path.join(os.tmpdir(), 'agentv-benchmarks-')); - reposRoot = mkdtempSync(path.join(os.tmpdir(), 'agentv-repos-')); - homedirSpy = spyOn(os, 'homedir').mockReturnValue(fakeHome); - }); - - afterEach(() => { - homedirSpy?.mockRestore?.(); - rmSync(fakeHome, { recursive: true, force: true }); - rmSync(reposRoot, { recursive: true, force: true }); - }); - - function makeRepo(name: string): string { - const dir = path.join(reposRoot, name); - mkdirSync(path.join(dir, '.agentv'), { recursive: true }); - return dir; - } - - it('starts empty and surfaces new entries after addBenchmark', () => { - expect(loadBenchmarkRegistry().benchmarks).toEqual([]); - - const repoPath = makeRepo('alpha'); - const entry = addBenchmark(repoPath); - expect(entry.name).toBe('alpha'); - expect(entry.path).toBe(path.resolve(repoPath)); - - // Subsequent load reflects the write (per-request reload model). - expect(loadBenchmarkRegistry().benchmarks).toHaveLength(1); - expect(getBenchmark(entry.id)?.path).toBe(entry.path); - }); - - it('addBenchmark refuses a path with no .agentv/ directory', () => { - const bare = mkdtempSync(path.join(os.tmpdir(), 'agentv-bare-')); - expect(() => addBenchmark(bare)).toThrow(/No \.agentv\/ directory found/); - rmSync(bare, { recursive: true, force: true }); - }); - - it('addBenchmark is idempotent on the same path', () => { - const repoPath = makeRepo('idempotent'); - const first = addBenchmark(repoPath); - const second = addBenchmark(repoPath); - expect(first.id).toBe(second.id); - expect(loadBenchmarkRegistry().benchmarks).toHaveLength(1); - }); - - it('removeBenchmark drops the entry by id', () => { - const entry = addBenchmark(makeRepo('to-remove')); - expect(removeBenchmark(entry.id)).toBe(true); - expect(loadBenchmarkRegistry().benchmarks).toEqual([]); - expect(removeBenchmark(entry.id)).toBe(false); - }); - - it('touchBenchmark updates lastOpenedAt without affecting other entries', () => { - const a = addBenchmark(makeRepo('a')); - const b = addBenchmark(makeRepo('b')); - const originalB = loadBenchmarkRegistry().benchmarks.find((e) => e.id === b.id); - - touchBenchmark(a.id); - const reloadedA = loadBenchmarkRegistry().benchmarks.find((e) => e.id === a.id); - const reloadedB = loadBenchmarkRegistry().benchmarks.find((e) => e.id === b.id); - expect(reloadedA?.lastOpenedAt).not.toBe(a.lastOpenedAt); - expect(reloadedB?.lastOpenedAt).toBe(originalB?.lastOpenedAt); - }); - - it('serializes benchmark entries with snake_case keys on disk', () => { - const entry = addBenchmark(makeRepo('snake')); - - const yamlOnDisk = readFileSync(getBenchmarksRegistryPath(), 'utf-8'); - expect(yamlOnDisk).toContain('added_at:'); - expect(yamlOnDisk).toContain('last_opened_at:'); - expect(yamlOnDisk).not.toContain('addedAt:'); - expect(yamlOnDisk).not.toContain('lastOpenedAt:'); - - // Round-trips cleanly back into the camelCase TS shape. - const reloaded = loadBenchmarkRegistry().benchmarks.find((b) => b.id === entry.id); - expect(reloaded).toMatchObject({ - id: entry.id, - addedAt: entry.addedAt, - lastOpenedAt: entry.lastOpenedAt, - }); - }); - - it('round-trips source field through YAML', () => { - const registryPath = getBenchmarksRegistryPath(); - mkdirSync(path.dirname(registryPath), { recursive: true }); - writeFileSync( - registryPath, - `benchmarks: - - id: remote-bench - name: Remote Bench - path: /srv/agentv/repo - source: - url: https://github.com/example/repo - ref: main - added_at: "2026-01-01T00:00:00Z" - last_opened_at: "2026-01-01T00:00:00Z" -`, - 'utf-8', - ); - - const registry = loadBenchmarkRegistry(); - expect(registry.benchmarks).toHaveLength(1); - const entry = registry.benchmarks[0]; - expect(entry.source).toEqual({ url: 'https://github.com/example/repo', ref: 'main' }); - }); - - it('interpolates env vars in source url', () => { - const registryPath = getBenchmarksRegistryPath(); - mkdirSync(path.dirname(registryPath), { recursive: true }); - // Use concatenation to avoid JS template literal evaluating ${{ ... }} - const d = '$'; - writeFileSync( - registryPath, - `benchmarks:\n - id: env-bench\n name: Env Bench\n path: /srv/agentv/repo\n source:\n url: "${d}{{ BENCH_URL }}"\n ref: main\n added_at: "2026-01-01T00:00:00Z"\n last_opened_at: "2026-01-01T00:00:00Z"\n`, - 'utf-8', - ); - - const origUrl = process.env.BENCH_URL; - try { - process.env.BENCH_URL = 'https://github.com/example/bench-repo'; - const registry = loadBenchmarkRegistry(); - expect(registry.benchmarks[0].source?.url).toBe('https://github.com/example/bench-repo'); - } finally { - if (origUrl === undefined) process.env.BENCH_URL = undefined; - else process.env.BENCH_URL = origUrl; - } - }); - - it('entries without source work unchanged', () => { - const repoPath = makeRepo('no-source'); - const entry = addBenchmark(repoPath); - expect(entry.source).toBeUndefined(); - - const reloaded = loadBenchmarkRegistry().benchmarks.find((b) => b.id === entry.id); - expect(reloaded?.source).toBeUndefined(); - }); -}); diff --git a/packages/core/test/benchmark-sync.test.ts b/packages/core/test/project-sync.test.ts similarity index 80% rename from packages/core/test/benchmark-sync.test.ts rename to packages/core/test/project-sync.test.ts index 1170892f..afe988b2 100644 --- a/packages/core/test/benchmark-sync.test.ts +++ b/packages/core/test/project-sync.test.ts @@ -4,21 +4,21 @@ import { mkdirSync, mkdtempSync, rmSync } from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { syncBenchmark, syncBenchmarks } from '../src/benchmark-sync.js'; -import type { BenchmarkEntry } from '../src/benchmarks.js'; +import { syncProject, syncProjects } from '../src/project-sync.js'; +import type { ProjectEntry } from '../src/projects.js'; -function makeEntry(overrides: Partial = {}): BenchmarkEntry { +function makeEntry(overrides: Partial = {}): ProjectEntry { return { - id: 'test-bench', - name: 'Test Bench', - path: '/tmp/fake-bench', + id: 'test-project', + name: 'Test Project', + path: '/tmp/fake-project', addedAt: '', lastOpenedAt: '', ...overrides, }; } -describe('syncBenchmark', () => { +describe('syncProject', () => { let tmpDir: string; beforeEach(() => { @@ -32,7 +32,7 @@ describe('syncBenchmark', () => { it('throws when entry has no source', async () => { const entry = makeEntry({ path: tmpDir }); - await expect(syncBenchmark(entry)).rejects.toThrow(/no source defined/); + await expect(syncProject(entry)).rejects.toThrow(/no source defined/); }); it('runs git clone when .git does not exist', async () => { @@ -42,7 +42,7 @@ describe('syncBenchmark', () => { path: dest, source: { url: 'https://github.com/example/repo', ref: 'main' }, }); - await syncBenchmark(entry); + await syncProject(entry); expect(spy).toHaveBeenCalledWith( 'git', [ @@ -66,7 +66,7 @@ describe('syncBenchmark', () => { path: tmpDir, source: { url: 'https://github.com/example/repo', ref: 'main' }, }); - await syncBenchmark(entry); + await syncProject(entry); expect(spy).toHaveBeenCalledWith( 'git', ['-C', tmpDir, 'pull', '--ff-only'], @@ -75,14 +75,14 @@ describe('syncBenchmark', () => { }); }); -describe('syncBenchmarks', () => { +describe('syncProjects', () => { afterEach(() => { mock.restore(); }); it('skips entries with no source', async () => { const spy = spyOn(childProcess, 'execFileSync').mockReturnValue(Buffer.from('')); - await syncBenchmarks([makeEntry()]); + await syncProjects([makeEntry()]); expect(spy).not.toHaveBeenCalled(); }); @@ -91,7 +91,7 @@ describe('syncBenchmarks', () => { const entries = [ makeEntry({ source: { url: 'https://github.com/example/repo', ref: 'main' } }), ]; - await syncBenchmarks(entries); + await syncProjects(entries); expect(spy).toHaveBeenCalled(); }); }); diff --git a/packages/core/test/projects.test.ts b/packages/core/test/projects.test.ts new file mode 100644 index 00000000..ba731c39 --- /dev/null +++ b/packages/core/test/projects.test.ts @@ -0,0 +1,277 @@ +import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { + addProject, + getProject, + getProjectsRegistryPath, + loadProjectRegistry, + removeProject, + touchProject, +} from '../src/projects.js'; + +describe('projects registry', () => { + let fakeHome: string; + let reposRoot: string; + // biome-ignore lint/suspicious/noExplicitAny: spy typing from bun:test is intentionally loose. + let homedirSpy: any; + + beforeEach(() => { + fakeHome = mkdtempSync(path.join(os.tmpdir(), 'agentv-projects-')); + reposRoot = mkdtempSync(path.join(os.tmpdir(), 'agentv-repos-')); + homedirSpy = spyOn(os, 'homedir').mockReturnValue(fakeHome); + }); + + afterEach(() => { + homedirSpy?.mockRestore?.(); + rmSync(fakeHome, { recursive: true, force: true }); + rmSync(reposRoot, { recursive: true, force: true }); + }); + + function makeRepo(name: string): string { + const dir = path.join(reposRoot, name); + mkdirSync(path.join(dir, '.agentv'), { recursive: true }); + return dir; + } + + it('starts empty and surfaces new entries after addProject', () => { + expect(loadProjectRegistry().projects).toEqual([]); + + const repoPath = makeRepo('alpha'); + const entry = addProject(repoPath); + expect(entry.name).toBe('alpha'); + expect(entry.path).toBe(path.resolve(repoPath)); + + // Subsequent load reflects the write (per-request reload model). + expect(loadProjectRegistry().projects).toHaveLength(1); + expect(getProject(entry.id)?.path).toBe(entry.path); + }); + + it('addProject refuses a path with no .agentv/ directory', () => { + const bare = mkdtempSync(path.join(os.tmpdir(), 'agentv-bare-')); + expect(() => addProject(bare)).toThrow(/No \.agentv\/ directory found/); + rmSync(bare, { recursive: true, force: true }); + }); + + it('addProject is idempotent on the same path', () => { + const repoPath = makeRepo('idempotent'); + const first = addProject(repoPath); + const second = addProject(repoPath); + expect(first.id).toBe(second.id); + expect(loadProjectRegistry().projects).toHaveLength(1); + }); + + it('removeProject drops the entry by id', () => { + const entry = addProject(makeRepo('to-remove')); + expect(removeProject(entry.id)).toBe(true); + expect(loadProjectRegistry().projects).toEqual([]); + expect(removeProject(entry.id)).toBe(false); + }); + + it('touchProject updates lastOpenedAt without affecting other entries', () => { + const a = addProject(makeRepo('a')); + const b = addProject(makeRepo('b')); + const originalB = loadProjectRegistry().projects.find((e) => e.id === b.id); + + touchProject(a.id); + const reloadedA = loadProjectRegistry().projects.find((e) => e.id === a.id); + const reloadedB = loadProjectRegistry().projects.find((e) => e.id === b.id); + expect(reloadedA?.lastOpenedAt).not.toBe(a.lastOpenedAt); + expect(reloadedB?.lastOpenedAt).toBe(originalB?.lastOpenedAt); + }); + + it('serializes project entries with snake_case keys on disk', () => { + const entry = addProject(makeRepo('snake')); + + const yamlOnDisk = readFileSync(getProjectsRegistryPath(), 'utf-8'); + expect(yamlOnDisk).toContain('added_at:'); + expect(yamlOnDisk).toContain('last_opened_at:'); + expect(yamlOnDisk).not.toContain('addedAt:'); + expect(yamlOnDisk).not.toContain('lastOpenedAt:'); + + // Round-trips cleanly back into the camelCase TS shape. + const reloaded = loadProjectRegistry().projects.find((p) => p.id === entry.id); + expect(reloaded).toMatchObject({ + id: entry.id, + addedAt: entry.addedAt, + lastOpenedAt: entry.lastOpenedAt, + }); + }); + + it('round-trips source field through YAML', () => { + const registryPath = getProjectsRegistryPath(); + mkdirSync(path.dirname(registryPath), { recursive: true }); + writeFileSync( + registryPath, + `projects: + - id: remote-bench + name: Remote Bench + path: /srv/agentv/repo + source: + url: https://github.com/example/repo + ref: main + added_at: "2026-01-01T00:00:00Z" + last_opened_at: "2026-01-01T00:00:00Z" +`, + 'utf-8', + ); + + const registry = loadProjectRegistry(); + expect(registry.projects).toHaveLength(1); + const entry = registry.projects[0]; + expect(entry.source).toEqual({ url: 'https://github.com/example/repo', ref: 'main' }); + }); + + it('interpolates env vars in source url', () => { + const registryPath = getProjectsRegistryPath(); + mkdirSync(path.dirname(registryPath), { recursive: true }); + // Use concatenation to avoid JS template literal evaluating ${{ ... }} + const d = '$'; + writeFileSync( + registryPath, + `projects:\n - id: env-bench\n name: Env Bench\n path: /srv/agentv/repo\n source:\n url: "${d}{{ BENCH_URL }}"\n ref: main\n added_at: "2026-01-01T00:00:00Z"\n last_opened_at: "2026-01-01T00:00:00Z"\n`, + 'utf-8', + ); + + const origUrl = process.env.BENCH_URL; + try { + process.env.BENCH_URL = 'https://github.com/example/bench-repo'; + const registry = loadProjectRegistry(); + expect(registry.projects[0].source?.url).toBe('https://github.com/example/bench-repo'); + } finally { + if (origUrl === undefined) process.env.BENCH_URL = undefined; + else process.env.BENCH_URL = origUrl; + } + }); + + it('entries without source work unchanged', () => { + const repoPath = makeRepo('no-source'); + const entry = addProject(repoPath); + expect(entry.source).toBeUndefined(); + + const reloaded = loadProjectRegistry().projects.find((p) => p.id === entry.id); + expect(reloaded?.source).toBeUndefined(); + }); +}); + +// ── Legacy benchmarks.yaml → projects.yaml migration ───────────────────── +// Migration runs on every loadProjectRegistry() call but only acts when the +// state demands it. These tests cover the four state transitions: legacy +// only, new only, both present, neither. + +describe('legacy benchmarks.yaml migration', () => { + let fakeHome: string; + // biome-ignore lint/suspicious/noExplicitAny: spy typing from bun:test is intentionally loose. + let homedirSpy: any; + + beforeEach(() => { + fakeHome = mkdtempSync(path.join(os.tmpdir(), 'agentv-migration-')); + homedirSpy = spyOn(os, 'homedir').mockReturnValue(fakeHome); + }); + + afterEach(() => { + homedirSpy?.mockRestore?.(); + rmSync(fakeHome, { recursive: true, force: true }); + }); + + function legacyPath(): string { + return path.join(fakeHome, '.agentv', 'benchmarks.yaml'); + } + + function writeLegacy(content: string): void { + mkdirSync(path.dirname(legacyPath()), { recursive: true }); + writeFileSync(legacyPath(), content, 'utf-8'); + } + + it('migrates legacy benchmarks.yaml to projects.yaml on first load', () => { + writeLegacy(`benchmarks: + - id: legacy-app + name: Legacy App + path: /srv/legacy + added_at: "2026-01-01T00:00:00Z" + last_opened_at: "2026-01-02T00:00:00Z" +`); + + const registry = loadProjectRegistry(); + + // The migration ran: legacy gone, new file present, content preserved. + expect(existsSync(legacyPath())).toBe(false); + expect(existsSync(getProjectsRegistryPath())).toBe(true); + expect(registry.projects).toHaveLength(1); + expect(registry.projects[0]).toMatchObject({ + id: 'legacy-app', + name: 'Legacy App', + path: '/srv/legacy', + addedAt: '2026-01-01T00:00:00Z', + lastOpenedAt: '2026-01-02T00:00:00Z', + }); + + // On-disk YAML has the new top-level key. + const yamlOnDisk = readFileSync(getProjectsRegistryPath(), 'utf-8'); + expect(yamlOnDisk).toContain('projects:'); + expect(yamlOnDisk).not.toMatch(/^benchmarks:/m); + }); + + it('is idempotent — second load is a no-op once migrated', () => { + writeLegacy(`benchmarks: + - id: legacy-app + name: Legacy App + path: /srv/legacy + added_at: "2026-01-01T00:00:00Z" + last_opened_at: "2026-01-01T00:00:00Z" +`); + loadProjectRegistry(); // migrate + const firstMtime = readFileSync(getProjectsRegistryPath(), 'utf-8'); + loadProjectRegistry(); // should be no-op + const secondMtime = readFileSync(getProjectsRegistryPath(), 'utf-8'); + expect(secondMtime).toBe(firstMtime); + expect(existsSync(legacyPath())).toBe(false); + }); + + it('prefers projects.yaml and warns when both files exist', () => { + // Both files present, with different content. + writeLegacy(`benchmarks: + - id: stale + name: Stale Legacy + path: /srv/stale + added_at: "2026-01-01T00:00:00Z" + last_opened_at: "2026-01-01T00:00:00Z" +`); + mkdirSync(path.dirname(getProjectsRegistryPath()), { recursive: true }); + writeFileSync( + getProjectsRegistryPath(), + `projects: + - id: fresh + name: Fresh + path: /srv/fresh + added_at: "2026-02-01T00:00:00Z" + last_opened_at: "2026-02-01T00:00:00Z" +`, + 'utf-8', + ); + + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); + try { + const registry = loadProjectRegistry(); + // Loaded from projects.yaml, not the legacy file. + expect(registry.projects).toHaveLength(1); + expect(registry.projects[0].id).toBe('fresh'); + // Legacy file is left in place for the operator to inspect/delete. + expect(existsSync(legacyPath())).toBe(true); + // Warning was emitted. + expect(warnSpy).toHaveBeenCalled(); + } finally { + warnSpy.mockRestore?.(); + } + }); + + it('is a no-op when neither file exists (fresh install)', () => { + const registry = loadProjectRegistry(); + expect(registry.projects).toEqual([]); + expect(existsSync(legacyPath())).toBe(false); + // loadProjectRegistry doesn't pre-create the new file; saveProjectRegistry does. + expect(existsSync(getProjectsRegistryPath())).toBe(false); + }); +});