Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/cli/src/commands/inspect/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ export interface ResultFileMeta {
sizeBytes: number;
}

function buildRunId(relativeRunPath: string): string {
export function buildRunId(relativeRunPath: string): string {
const normalized = relativeRunPath.split(path.sep).join('/');
const segments = normalized.split('/').filter(Boolean);
if (segments.length >= 2) {
Expand Down
2 changes: 2 additions & 0 deletions apps/cli/src/commands/results/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { subcommands } from 'cmd-ts';

import { resultsExportCommand } from './export.js';
import { resultsFailuresCommand } from './failures.js';
import { resultsReindexCommand } from './reindex.js';
import { resultsReportCommand } from './report.js';
import { resultsShowCommand } from './show.js';
import { resultsSummaryCommand } from './summary.js';
Expand All @@ -17,5 +18,6 @@ export const resultsCommand = subcommands({
failures: resultsFailuresCommand,
show: resultsShowCommand,
validate: resultsValidateCommand,
reindex: resultsReindexCommand,
},
});
123 changes: 123 additions & 0 deletions apps/cli/src/commands/results/reindex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/**
* `agentv results reindex` — rebuild index/runs.jsonl from the existing run tree.
*
* Use this once to backfill the index after upgrading an existing results repo.
* After the first push following the upgrade, new runs are appended automatically.
*
* How it works:
* 1. Fetch/pull the latest state of the results repo.
* 2. Walk all run directories via listResultFilesFromRunsDir.
* 3. Read each run's first JSONL result to extract target/experiment.
* 4. Write a complete index/runs.jsonl and commit+push it.
*/

import { readFileSync } from 'node:fs';
import path from 'node:path';

import { command, flag, option, optional, string } from 'cmd-ts';

import {
type ResultsConfig,
type RunIndexEntry,
loadConfig,
normalizeResultsConfig,
reindexResultsRepo,
resolveResultsRepoRunsDir,
} from '@agentv/core';

import { findRepoRoot } from '../eval/shared.js';
import { listResultFilesFromRunsDir } from '../inspect/utils.js';

async function loadNormalizedResultsConfig(
cwd: string,
): Promise<Required<ResultsConfig> | undefined> {
const repoRoot = (await findRepoRoot(cwd)) ?? cwd;
const config = await loadConfig(path.join(cwd, '_'), repoRoot);
if (!config?.results) return undefined;
return normalizeResultsConfig(config.results);
}

export const resultsReindexCommand = command({
name: 'reindex',
description:
'Backfill index/runs.jsonl in the results repo from the existing run tree (migration helper)',
args: {
dir: option({
type: optional(string),
long: 'dir',
short: 'd',
description: 'Working directory (default: current directory)',
}),
dryRun: flag({
long: 'dry-run',
description: 'Print the entries that would be written without committing',
}),
},
handler: async ({ dir, dryRun }) => {
const cwd = dir ?? process.cwd();
const config = await loadNormalizedResultsConfig(cwd);
if (!config) {
console.error(
'Error: No results repo configured. Add a results section to .agentv/config.yaml',
);
process.exit(1);
}

const runsDir = resolveResultsRepoRunsDir(config);
console.log(`Scanning runs from ${runsDir}…`);
const metas = listResultFilesFromRunsDir(runsDir);

const entries: RunIndexEntry[] = [];

for (const meta of metas) {
let target = '';
const sepIdx = meta.filename.indexOf('::');
let experiment = sepIdx === -1 ? 'default' : meta.filename.slice(0, sepIdx);

try {
const content = readFileSync(meta.path, 'utf8');
const firstLine = content.split('\n').find((l) => l.trim());
if (firstLine) {
const first = JSON.parse(firstLine) as {
target?: string;
experiment?: string;
};
if (first.target) target = first.target;
if (first.experiment) experiment = first.experiment;
}
} catch {
// skip unreadable manifests
}

const passed = Math.round(meta.passRate * meta.testCount);

entries.push({
run_id: meta.filename,
timestamp: meta.timestamp,
experiment,
target,
test_count: meta.testCount,
passed,
pass_rate: meta.passRate,
avg_score: meta.avgScore,
size_bytes: meta.sizeBytes,
tags: [],
});
}

if (dryRun) {
console.log(`Would write ${entries.length} entries to index/runs.jsonl:`);
for (const e of entries) {
console.log(` ${e.run_id} (${e.test_count} tests, pass_rate=${e.pass_rate.toFixed(2)})`);
}
return;
}

const written = await reindexResultsRepo({ config, entries });
if (written === 0) {
console.log('Index is already up to date — no changes committed.');
} else {
console.log(`Reindexed ${written} runs and pushed index/runs.jsonl to ${config.repo}.`);
}
},
});
95 changes: 86 additions & 9 deletions apps/cli/src/commands/results/remote.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
import { existsSync } from 'node:fs';
import path from 'node:path';

import {
DEFAULT_THRESHOLD,
type EvaluationResult,
type ResultsConfig,
type ResultsRepoStatus,
type RunIndexEntry,
directPushResults,
directorySizeBytes,
getResultsRepoCachePaths,
getResultsRepoStatus,
loadConfig,
readRunIndex,
resolveResultsRepoRunsDir,
syncResultsRepo,
} from '@agentv/core';

import { RESULT_INDEX_FILENAME } from '../eval/result-layout.js';
import { findRepoRoot } from '../eval/shared.js';
import {
type ResultFileMeta,
buildRunId,
listResultFiles,
listResultFilesFromRunsDir,
} from '../inspect/utils.js';
Expand Down Expand Up @@ -128,6 +134,49 @@ export function decodeRemoteRunId(filename: string): string {
return filename.replace(REMOTE_RUN_PREFIX, '');
}

/**
* Reconstruct the filesystem manifest path from a run_id and the runs directory.
* Inverse of buildRunId: "experiment::timestamp" → runsDir/experiment/timestamp/index.jsonl
* Default experiment: "timestamp" → runsDir/default/timestamp/index.jsonl
*/
function runIdToManifestPath(runId: string, runsDir: string): string {
const sepIdx = runId.indexOf('::');
const relPath =
sepIdx === -1
? path.join('default', runId)
: path.join(runId.slice(0, sepIdx), runId.slice(sepIdx + 2));
return path.join(runsDir, relPath, RESULT_INDEX_FILENAME);
}

/**
* Read remote runs from the index file. Returns null if index doesn't exist (triggers fallback).
*/
function listRemoteRunsFromIndex(
repoDir: string,
config: Required<ResultsConfig>,
): SourcedResultFileMeta[] | null {
const indexFile = path.join(repoDir, 'index', 'runs.jsonl');
if (!existsSync(indexFile)) return null;

const runsDir = resolveResultsRepoRunsDir(config);
const entries = readRunIndex(indexFile);

return entries.map((entry) => ({
path: runIdToManifestPath(entry.run_id, runsDir),
filename: encodeRemoteRunId(entry.run_id),
raw_filename: entry.run_id,
displayName: entry.run_id.includes('::')
? (entry.run_id.split('::').at(-1) ?? entry.run_id)
: entry.run_id,
timestamp: entry.timestamp,
testCount: entry.test_count,
passRate: entry.pass_rate,
avgScore: entry.avg_score,
sizeBytes: entry.size_bytes,
source: 'remote' as const,
}));
}

export async function getRemoteResultsStatus(cwd: string): Promise<RemoteResultsStatus> {
const config = await loadNormalizedResultsConfig(cwd);
const status = getResultsRepoStatus(config);
Expand Down Expand Up @@ -185,15 +234,20 @@ export async function listMergedResultFiles(
};
}

const remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
(meta) =>
({
...meta,
filename: encodeRemoteRunId(meta.filename),
raw_filename: meta.filename,
source: 'remote' as const,
}) satisfies SourcedResultFileMeta,
);
const repoDir = getResultsRepoCachePaths(config.repo).repoDir;

// Prefer index for O(1) listing; fall back to directory walk for repos without an index.
const remoteRuns =
listRemoteRunsFromIndex(repoDir, config) ??
listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
(meta) =>
({
...meta,
filename: encodeRemoteRunId(meta.filename),
raw_filename: meta.filename,
source: 'remote' as const,
}) satisfies SourcedResultFileMeta,
);

const merged = [...localRuns, ...remoteRuns].sort((a, b) =>
b.timestamp.localeCompare(a.timestamp),
Expand Down Expand Up @@ -223,12 +277,35 @@ export async function maybeAutoExportRunArtifacts(payload: RemoteExportPayload):

const relativeRunPath = getRelativeRunPath(payload.cwd, payload.run_dir);
const commitTitle = buildCommitTitle(payload);
const runId = buildRunId(relativeRunPath);
const results = payload.results;
const passed = results.filter((r) => r.score >= DEFAULT_THRESHOLD).length;
const testCount = results.length;
const avgScore = testCount > 0 ? results.reduce((sum, r) => sum + r.score, 0) / testCount : 0;
const passRate = testCount > 0 ? passed / testCount : 0;
const experiment = payload.experiment ?? 'default';
const target = results[0]?.target ?? '';
const sizeBytes = await directorySizeBytes(payload.run_dir);

const indexEntry: Omit<RunIndexEntry, 'sha'> = {
run_id: runId,
timestamp: results[0]?.timestamp ?? new Date().toISOString(),
experiment,
target,
test_count: testCount,
passed,
pass_rate: passRate,
avg_score: avgScore,
size_bytes: sizeBytes,
tags: [],
};

const pushed = await directPushResults({
config,
sourceDir: payload.run_dir,
destinationPath: relativeRunPath,
commitMessage: commitTitle,
indexEntry,
});

if (!pushed) {
Expand Down
72 changes: 72 additions & 0 deletions apps/cli/test/commands/results/remote.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import path from 'node:path';

import { type RunIndexEntry, appendToRunIndex, readRunIndex } from '@agentv/core';

// We test the pure helper that maps index entries to SourcedResultFileMeta.
// Import the module under test, then poke at its internals via the public API.

import {
decodeRemoteRunId,
encodeRemoteRunId,
isRemoteRunId,
} from '../../../src/commands/results/remote.js';

describe('encodeRemoteRunId / decodeRemoteRunId / isRemoteRunId', () => {
it('encodes a plain run id', () => {
expect(encodeRemoteRunId('2026-05-21T10-00-00-000Z')).toBe('remote::2026-05-21T10-00-00-000Z');
});

it('decodes a remote-prefixed run id', () => {
expect(decodeRemoteRunId('remote::2026-05-21T10-00-00-000Z')).toBe('2026-05-21T10-00-00-000Z');
});

it('identifies remote run ids', () => {
expect(isRemoteRunId('remote::2026-05-21T10-00-00-000Z')).toBe(true);
expect(isRemoteRunId('2026-05-21T10-00-00-000Z')).toBe(false);
});
});

// ── Index fallback behaviour ─────────────────────────────────────────────

describe('listRemoteRunsFromIndex fallback (via file system)', () => {
let tmpDir: string;

beforeEach(() => {
tmpDir = mkdtempSync(path.join(tmpdir(), 'agentv-remote-'));
});

afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});

it('index/runs.jsonl is absent → no crash (confirmed by readRunIndex returning [])', () => {
const indexFile = path.join(tmpDir, 'index', 'runs.jsonl');
expect(readRunIndex(indexFile)).toEqual([]);
});

it('index/runs.jsonl present → entries parse correctly', () => {
const indexFile = path.join(tmpDir, 'index', 'runs.jsonl');
const entry: RunIndexEntry = {
run_id: '2026-05-21T10-00-00-000Z',
timestamp: '2026-05-21T10:00:01.000Z',
experiment: 'default',
target: 'gpt-4o',
test_count: 5,
passed: 4,
pass_rate: 0.8,
avg_score: 0.85,
size_bytes: 12345,
tags: [],
};
appendToRunIndex(indexFile, entry);

const entries = readRunIndex(indexFile);
expect(entries).toHaveLength(1);
expect(entries[0]?.run_id).toBe('2026-05-21T10-00-00-000Z');
expect(entries[0]?.target).toBe('gpt-4o');
expect(entries[0]?.pass_rate).toBe(0.8);
});
});
Loading
Loading