EntityProcess · christso · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
@@ -20,7 +20,7 @@ export const evalRunCommand = command({
     evalPaths: restPositionals({
       type: string,
       displayName: 'eval-paths',
-      description: 'Path(s) or glob(s) to evaluation .yaml file(s)',
+      description: 'Path(s) or glob(s) to evaluation files (.yaml, .eval.ts)',
     }),
     target: multioption({
       type: array(string),

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
@@ -20,6 +20,7 @@ import {
   loadConfig,
   loadTestSuite,
   loadTsConfig,
+  resolveTargetDefinition,
   shouldEnableCache,
   shouldSkipCacheForTemperature,
   subscribeToCodexLogEntries,
@@ -531,6 +532,9 @@ async function prepareFileMetadata(params: {
   readonly failOnError?: FailOnError;
   readonly threshold?: number;
   readonly tags?: readonly string[];
+  readonly providerFactory?: (
+    target: import('@agentv/core').ResolvedTarget,
+  ) => import('@agentv/core').Provider;
 }> {
   const { testFilePath, repoRoot, cwd, options } = params;
 
@@ -574,6 +578,54 @@ async function prepareFileMetadata(params: {
         inlineTargetLabel: `transcript (${path.basename(options.transcript)})`,
       },
     ];
+  } else if (suite.inlineTarget && options.cliTargets.length === 0) {
+    const targetDefinition = suite.inlineTarget;
+    const resolvedTarget = options.dryRun
+      ? ({
+          kind: 'mock',
+          name: `${targetDefinition.name}-dry-run`,
+          graderTarget: undefined,
+          config: {
+            response: '{"answer":"Mock dry-run response"}',
+            delayMs: options.dryRunDelay,
+            delayMinMs: options.dryRunDelayMin,
+            delayMaxMs: options.dryRunDelayMax,
+          },
+        } satisfies ResolvedTarget)
+      : resolveTargetDefinition(targetDefinition, process.env, testFilePath, {
+          emitDeprecationWarnings: false,
+        });
+    selections = [
+      {
+        selection: {
+          definitions: [targetDefinition],
+          resolvedTarget,
+          targetName: targetDefinition.name,
+          targetSource: 'test-file',
+          targetsFilePath: testFilePath,
+        },
+        inlineTargetLabel: resolveTargetLabel(targetDefinition.name, resolvedTarget.name),
+      },
+    ];
+  } else if (suite.providerFactory && options.cliTargets.length === 0) {
+    const taskTarget: ResolvedTarget = {
+      kind: 'mock',
+      name: 'custom-task',
+      graderTarget: undefined,
+      config: {},
+    };
+    selections = [
+      {
+        selection: {
+          definitions: [],
+          resolvedTarget: taskTarget,
+          targetName: 'custom-task',
+          targetSource: 'test-file',
+          targetsFilePath: testFilePath,
+        },
+        inlineTargetLabel: 'custom-task',
+      },
+    ];
   } else {
     // Determine target names: CLI --target flags override YAML
     const cliTargets = options.cliTargets;
@@ -658,6 +710,7 @@ async function prepareFileMetadata(params: {
     failOnError: suite.failOnError,
     threshold: suite.threshold,
     tags: suite.metadata?.tags,
+    providerFactory: suite.providerFactory,
   };
 }
 
@@ -1170,33 +1223,12 @@ export async function runEvalCommand(
       readonly failOnError?: FailOnError;
       readonly threshold?: number;
       readonly tags?: readonly string[];
+      readonly providerFactory?: (
+        target: import('@agentv/core').ResolvedTarget,
+      ) => import('@agentv/core').Provider;
     }
   >();
-  // Separate TypeScript/JS eval files from YAML files.
-  // TS files are self-contained scripts that call evaluate() directly.
-  const tsFiles: string[] = [];
-  const yamlFiles: string[] = [];
   for (const testFilePath of resolvedTestFiles) {
-    if (/\.(ts|js|mts|mjs)$/.test(testFilePath)) {
-      tsFiles.push(testFilePath);
-    } else {
-      yamlFiles.push(testFilePath);
-    }
-  }
-
-  // Run TypeScript eval files by importing them.
-  // evaluate() runs during import via top-level await and handles its own output.
-  for (const tsFile of tsFiles) {
-    await ensureFileExists(tsFile, 'TypeScript eval file');
-    await import(pathToFileURL(tsFile).href);
-  }
-
-  // If only TS files were provided, we're done — evaluate() handled everything.
-  if (yamlFiles.length === 0 && tsFiles.length > 0) {
-    return;
-  }
-
-  for (const testFilePath of yamlFiles) {
     const meta = await prepareFileMetadata({
       testFilePath,
       repoRoot,
@@ -1355,7 +1387,7 @@ export async function runEvalCommand(
     }
   }
 
-  // Use only files that survived tag filtering (fileMetadata keys)
+  // Use only files that survived tag filtering.
   const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
 
   // --transcript: create a shared TranscriptProvider and validate entry count
@@ -1442,7 +1474,7 @@ export async function runEvalCommand(
               budgetUsd: targetPrep.budgetUsd,
               failOnError: targetPrep.failOnError,
               threshold: resolvedThreshold,
-              providerFactory: transcriptProviderFactory,
+              providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory,
             });
             const evalFile = path.relative(cwd, testFilePath);
             const existingSummary = remoteEvalSummaries.find(

diff --git a/apps/cli/src/commands/eval/shared.ts b/apps/cli/src/commands/eval/shared.ts
@@ -34,13 +34,16 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       : path.resolve(cwd, pattern);
     try {
       const stats = await stat(candidatePath);
-      if (stats.isFile() && /\.(ya?ml|jsonl|json)$/i.test(candidatePath)) {
+      if (stats.isFile() && /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(candidatePath)) {
         results.add(candidatePath);
         continue;
       }
       if (stats.isDirectory()) {
         // Auto-expand directory to recursive eval file glob
-        const dirGlob = path.posix.join(candidatePath.replace(/\\/g, '/'), '**/*.eval.{yaml,yml}');
+        const dirGlob = path.posix.join(
+          candidatePath.replace(/\\/g, '/'),
+          '**/{*.eval.yaml,*.eval.yml,eval.yaml,eval.yml,*.eval.ts,*.eval.mts}',
+        );
         const dirMatches = await fg(dirGlob, {
           absolute: true,
           onlyFiles: true,
@@ -69,7 +72,9 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       ignore: ignorePatterns,
     });
 
-    const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json)$/i.test(filePath));
+    const yamlMatches = matches.filter((filePath) =>
+      /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(filePath),
+    );
     for (const filePath of yamlMatches) {
       results.add(path.normalize(filePath));
     }
@@ -94,7 +99,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
     throw new Error(
       `No eval files matched any provided paths or globs: ${includePatterns.join(
         ', ',
-      )}. Provide YAML, JSONL, or JSON paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl", "evals.json").`,
+      )}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/eval.yaml", "evals/**/*.eval.ts").`,
     );
   }
 

diff --git a/apps/cli/test/commands/eval/shared.test.ts b/apps/cli/test/commands/eval/shared.test.ts
@@ -64,4 +64,53 @@ describe('resolveEvalPaths', () => {
       resolveEvalPaths(['evals/**/*.eval.yaml', 'evals/**/eval.yaml'], tempDir),
     ).rejects.toThrow('No eval files matched any provided paths or globs');
   });
+
+  it('discovers *.eval.ts files from directory auto-expansion', async () => {
+    const evalDir = path.join(tempDir, 'evals');
+    mkdirSync(evalDir, { recursive: true });
+
+    const tsFile = path.join(evalDir, 'greeting.eval.ts');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tempDir], tempDir);
+
+    expect(resolved).toEqual([path.normalize(tsFile)]);
+  });
+
+  it('accepts a direct .mts file path', async () => {
+    const tsFile = path.join(tempDir, 'custom.eval.mts');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tsFile], tempDir);
+
+    expect(resolved).toEqual([path.normalize(tsFile)]);
+  });
+
+  it('accepts a direct .ts file path', async () => {
+    const tsFile = path.join(tempDir, 'custom.eval.ts');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tsFile], tempDir);
+
+    expect(resolved).toEqual([path.normalize(tsFile)]);
+  });
+
+  it('discovers both .yaml and .ts files from directory', async () => {
+    const evalDir = path.join(tempDir, 'evals');
+    mkdirSync(evalDir, { recursive: true });
+
+    const yamlFile = path.join(evalDir, 'suite.eval.yaml');
+    const evalYamlFile = path.join(evalDir, 'eval.yaml');
+    const tsFile = path.join(evalDir, 'suite.eval.ts');
+    writeFileSync(yamlFile, 'tests:\n  - id: sample\n    input: test\n');
+    writeFileSync(evalYamlFile, 'tests:\n  - id: sample2\n    input: test\n');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tempDir], tempDir);
+
+    expect(resolved).toContain(path.normalize(yamlFile));
+    expect(resolved).toContain(path.normalize(evalYamlFile));
+    expect(resolved).toContain(path.normalize(tsFile));
+    expect(resolved).toHaveLength(3);
+  });
 });