From 1513e7965f83779715528db0a7bd238a3021d2fc Mon Sep 17 00:00:00 2001
From: Christopher <christso@gmail.com>
Date: Thu, 16 Apr 2026 00:20:39 +0000
Subject: [PATCH 1/2] feat(cli): add *.eval.ts auto-discovery (#1116)

Add TypeScript eval file support to `agentv run`. TS eval files export
an EvalConfig (default, `config`, or `evalConfig` named export) and are
discovered alongside YAML files via the same glob/path resolution.

Changes:
- shared.ts: Include .ts in file extension regex and directory auto-glob
- config-loader.ts: Add **/evals/**/*.eval.ts to DEFAULT_EVAL_PATTERNS
- jsonl-parser.ts: Add typescript format detection in detectFormat()
- ts-eval-loader.ts: New loader that imports TS modules and extracts EvalConfig
- run-eval.ts: Integrate TS files through evaluate() with CLI overrides,
  feeding results through the same artifact/reporting pipeline
- run.ts: Update CLI description to mention .ts files
- index.ts: Export loadTsEvalFile and TsEvalResult from @agentv/core

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 apps/cli/src/commands/eval/commands/run.ts    |  2 +-
 apps/cli/src/commands/eval/run-eval.ts        | 71 ++++++++++++++-----
 apps/cli/src/commands/eval/shared.ts          | 11 +--
 apps/cli/test/commands/eval/shared.test.ts    | 37 ++++++++++
 .../src/evaluation/loaders/config-loader.ts   |  1 +
 .../src/evaluation/loaders/jsonl-parser.ts    |  7 +-
 .../src/evaluation/loaders/ts-eval-loader.ts  | 58 +++++++++++++++
 packages/core/src/index.ts                    |  4 ++
 .../loaders/fixtures/default-export.eval.ts   | 14 ++++
 .../fixtures/eval-config-named.eval.ts        | 12 ++++
 .../loaders/fixtures/named-config.eval.ts     | 12 ++++
 .../loaders/fixtures/no-config.eval.ts        |  2 +
 .../evaluation/loaders/jsonl-parser.test.ts   |  9 +++
 .../evaluation/loaders/ts-eval-loader.test.ts | 39 ++++++++++
 14 files changed, 255 insertions(+), 24 deletions(-)
 create mode 100644 packages/core/src/evaluation/loaders/ts-eval-loader.ts
 create mode 100644 packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts
 create mode 100644 packages/core/test/evaluation/loaders/fixtures/eval-config-named.eval.ts
 create mode 100644 packages/core/test/evaluation/loaders/fixtures/named-config.eval.ts
 create mode 100644 packages/core/test/evaluation/loaders/fixtures/no-config.eval.ts
 create mode 100644 packages/core/test/evaluation/loaders/ts-eval-loader.test.ts

diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
index 18668aa53..3c4631f27 100644
--- a/apps/cli/src/commands/eval/commands/run.ts
+++ b/apps/cli/src/commands/eval/commands/run.ts
@@ -20,7 +20,7 @@ export const evalRunCommand = command({
     evalPaths: restPositionals({
       type: string,
       displayName: 'eval-paths',
-      description: 'Path(s) or glob(s) to evaluation .yaml file(s)',
+      description: 'Path(s) or glob(s) to evaluation files (.yaml, .eval.ts)',
     }),
     target: multioption({
       type: array(string),
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index a69078be0..5e84d40df 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -17,9 +17,11 @@ import {
   runEvaluation as defaultRunEvaluation,
   deriveCategory,
   ensureVSCodeSubagents,
+  evaluate,
   loadConfig,
   loadTestSuite,
   loadTsConfig,
+  loadTsEvalFile,
   shouldEnableCache,
   shouldSkipCacheForTemperature,
   subscribeToCodexLogEntries,
@@ -1172,30 +1174,18 @@ export async function runEvalCommand(
       readonly tags?: readonly string[];
     }
   >();
-  // Separate TypeScript/JS eval files from YAML files.
-  // TS files are self-contained scripts that call evaluate() directly.
+  // Separate TypeScript eval files from YAML/JSONL files.
+  // TS files export an EvalConfig and run through evaluate().
   const tsFiles: string[] = [];
   const yamlFiles: string[] = [];
   for (const testFilePath of resolvedTestFiles) {
-    if (/\.(ts|js|mts|mjs)$/.test(testFilePath)) {
+    if (/\.(ts|mts)$/.test(testFilePath)) {
       tsFiles.push(testFilePath);
     } else {
       yamlFiles.push(testFilePath);
     }
   }
 
-  // Run TypeScript eval files by importing them.
-  // evaluate() runs during import via top-level await and handles its own output.
-  for (const tsFile of tsFiles) {
-    await ensureFileExists(tsFile, 'TypeScript eval file');
-    await import(pathToFileURL(tsFile).href);
-  }
-
-  // If only TS files were provided, we're done — evaluate() handled everything.
-  if (yamlFiles.length === 0 && tsFiles.length > 0) {
-    return;
-  }
-
   for (const testFilePath of yamlFiles) {
     const meta = await prepareFileMetadata({
       testFilePath,
@@ -1287,7 +1277,7 @@ export async function runEvalCommand(
     }
   }
 
-  if (totalEvalCount === 0) {
+  if (totalEvalCount === 0 && tsFiles.length === 0) {
     // When using --retry-errors, all tests being filtered means no errors or missing cases remain
     if (options.retryErrors && retryNonErrorResults && retryNonErrorResults.length > 0) {
       console.log('No execution errors or missing cases in the previous run. Nothing to retry.');
@@ -1355,7 +1345,7 @@ export async function runEvalCommand(
     }
   }
 
-  // Use only files that survived tag filtering (fileMetadata keys)
+  // Use only files that survived tag filtering (fileMetadata keys) — TS files are processed separately above
   const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
 
   // --transcript: create a shared TranscriptProvider and validate entry count
@@ -1387,6 +1377,53 @@ export async function runEvalCommand(
   // This matches industry practice (promptfoo, deepeval, OpenAI Evals) and avoids cross-file
   // workspace races without any grouping complexity.
   try {
+    // Process TypeScript eval files through evaluate() with CLI overrides.
+    // Results flow through the same output/artifact pipeline as YAML evals.
+    // Note: TS eval files don't carry tags; they're skipped when --tag/--exclude-tag is active.
+    const tsFilesToRun = hasTagFilters
+      ? (() => {
+          if (tsFiles.length > 0 && options.verbose) {
+            console.log(
+              `Skipped ${tsFiles.length} TS eval file(s) — tag filters don't apply to *.eval.ts files.`,
+            );
+          }
+          return [] as string[];
+        })()
+      : tsFiles;
+
+    for (const tsFile of tsFilesToRun) {
+      await ensureFileExists(tsFile, 'TypeScript eval file');
+      const { config: tsConfig } = await loadTsEvalFile(tsFile);
+
+      const cliOverrides: Record<string, unknown> = {};
+      if (options.workers !== undefined) cliOverrides.workers = options.workers;
+      if (options.filter) cliOverrides.filter = options.filter;
+      if (resolvedThreshold !== undefined) cliOverrides.threshold = resolvedThreshold;
+      if (options.cache !== undefined) cliOverrides.cache = options.cache;
+      if (options.verbose !== undefined) cliOverrides.verbose = options.verbose;
+      if (options.maxRetries !== 2) cliOverrides.maxRetries = options.maxRetries;
+      if (options.agentTimeoutSeconds !== undefined) {
+        cliOverrides.agentTimeoutMs = options.agentTimeoutSeconds * 1000;
+      }
+
+      console.log(`Running TS eval: ${path.relative(cwd, tsFile)}`);
+
+      const evalResult = await evaluate({
+        ...tsConfig,
+        ...cliOverrides,
+        onResult: (result: EvaluationResult) => {
+          outputWriter.append(result);
+          tsConfig.onResult?.(result);
+        },
+      });
+
+      allResults.push(...evalResult.results);
+      remoteEvalSummaries.push({
+        evalFile: path.relative(cwd, tsFile),
+        results: [...evalResult.results],
+      });
+    }
+
     for (const testFilePath of activeTestFiles) {
       const targetPrep = fileMetadata.get(testFilePath);
       if (!targetPrep) {
diff --git a/apps/cli/src/commands/eval/shared.ts b/apps/cli/src/commands/eval/shared.ts
index 55decf920..7570a2e92 100644
--- a/apps/cli/src/commands/eval/shared.ts
+++ b/apps/cli/src/commands/eval/shared.ts
@@ -34,13 +34,16 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       : path.resolve(cwd, pattern);
     try {
       const stats = await stat(candidatePath);
-      if (stats.isFile() && /\.(ya?ml|jsonl|json)$/i.test(candidatePath)) {
+      if (stats.isFile() && /\.(ya?ml|jsonl|json|ts)$/i.test(candidatePath)) {
         results.add(candidatePath);
         continue;
       }
       if (stats.isDirectory()) {
         // Auto-expand directory to recursive eval file glob
-        const dirGlob = path.posix.join(candidatePath.replace(/\\/g, '/'), '**/*.eval.{yaml,yml}');
+        const dirGlob = path.posix.join(
+          candidatePath.replace(/\\/g, '/'),
+          '**/*.eval.{yaml,yml,ts}',
+        );
         const dirMatches = await fg(dirGlob, {
           absolute: true,
           onlyFiles: true,
@@ -69,7 +72,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       ignore: ignorePatterns,
     });
 
-    const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json)$/i.test(filePath));
+    const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json|ts)$/i.test(filePath));
     for (const filePath of yamlMatches) {
       results.add(path.normalize(filePath));
     }
@@ -94,7 +97,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
     throw new Error(
       `No eval files matched any provided paths or globs: ${includePatterns.join(
         ', ',
-      )}. Provide YAML, JSONL, or JSON paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl", "evals.json").`,
+      )}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.eval.ts").`,
     );
   }
 
diff --git a/apps/cli/test/commands/eval/shared.test.ts b/apps/cli/test/commands/eval/shared.test.ts
index 4eb8c07ff..ed0e5e494 100644
--- a/apps/cli/test/commands/eval/shared.test.ts
+++ b/apps/cli/test/commands/eval/shared.test.ts
@@ -64,4 +64,41 @@ describe('resolveEvalPaths', () => {
       resolveEvalPaths(['evals/**/*.eval.yaml', 'evals/**/eval.yaml'], tempDir),
     ).rejects.toThrow('No eval files matched any provided paths or globs');
   });
+
+  it('discovers *.eval.ts files from directory auto-expansion', async () => {
+    const evalDir = path.join(tempDir, 'evals');
+    mkdirSync(evalDir, { recursive: true });
+
+    const tsFile = path.join(evalDir, 'greeting.eval.ts');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tempDir], tempDir);
+
+    expect(resolved).toEqual([path.normalize(tsFile)]);
+  });
+
+  it('accepts a direct .ts file path', async () => {
+    const tsFile = path.join(tempDir, 'custom.eval.ts');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tsFile], tempDir);
+
+    expect(resolved).toEqual([path.normalize(tsFile)]);
+  });
+
+  it('discovers both .yaml and .ts files from directory', async () => {
+    const evalDir = path.join(tempDir, 'evals');
+    mkdirSync(evalDir, { recursive: true });
+
+    const yamlFile = path.join(evalDir, 'suite.eval.yaml');
+    const tsFile = path.join(evalDir, 'suite.eval.ts');
+    writeFileSync(yamlFile, 'tests:\n  - id: sample\n    input: test\n');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tempDir], tempDir);
+
+    expect(resolved).toContain(path.normalize(yamlFile));
+    expect(resolved).toContain(path.normalize(tsFile));
+    expect(resolved).toHaveLength(2);
+  });
 });
diff --git a/packages/core/src/evaluation/loaders/config-loader.ts b/packages/core/src/evaluation/loaders/config-loader.ts
index 7aede85f9..377222123 100644
--- a/packages/core/src/evaluation/loaders/config-loader.ts
+++ b/packages/core/src/evaluation/loaders/config-loader.ts
@@ -21,6 +21,7 @@ const ANSI_RESET = '\u001b[0m';
 export const DEFAULT_EVAL_PATTERNS: readonly string[] = [
   '**/evals/**/*.eval.yaml',
   '**/evals/**/eval.yaml',
+  '**/evals/**/*.eval.ts',
 ];
 
 export type ExecutionDefaults = {
diff --git a/packages/core/src/evaluation/loaders/jsonl-parser.ts b/packages/core/src/evaluation/loaders/jsonl-parser.ts
index 0887048fe..288d0bd22 100644
--- a/packages/core/src/evaluation/loaders/jsonl-parser.ts
+++ b/packages/core/src/evaluation/loaders/jsonl-parser.ts
@@ -62,13 +62,16 @@ type RawJsonlEvalCase = JsonObject & {
 /**
  * Detect file format by extension.
  */
-export function detectFormat(filePath: string): 'yaml' | 'jsonl' | 'agent-skills-json' {
+export function detectFormat(
+  filePath: string,
+): 'yaml' | 'jsonl' | 'agent-skills-json' | 'typescript' {
   const ext = path.extname(filePath).toLowerCase();
   if (ext === '.jsonl') return 'jsonl';
   if (ext === '.yaml' || ext === '.yml') return 'yaml';
   if (ext === '.json') return 'agent-skills-json';
+  if (ext === '.ts' || ext === '.mts') return 'typescript';
   throw new Error(
-    `Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl, .json`,
+    `Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl, .json, .ts`,
   );
 }
 
diff --git a/packages/core/src/evaluation/loaders/ts-eval-loader.ts b/packages/core/src/evaluation/loaders/ts-eval-loader.ts
new file mode 100644
index 000000000..eb4946e3b
--- /dev/null
+++ b/packages/core/src/evaluation/loaders/ts-eval-loader.ts
@@ -0,0 +1,58 @@
+/**
+ * Loads an eval suite from a TypeScript *.eval.ts file.
+ *
+ * Each TS eval file must export an EvalConfig as its default export or
+ * as a named export called `config` or `evalConfig`.
+ *
+ * The file is loaded via dynamic import() which works natively in Bun
+ * and requires tsx/jiti for Node.js.
+ *
+ * To add a new export convention: add the name to EXPORT_NAMES below.
+ */
+import path from 'node:path';
+import { pathToFileURL } from 'node:url';
+import type { EvalConfig } from '../evaluate.js';
+
+const EXPORT_NAMES = ['default', 'config', 'evalConfig'] as const;
+
+export interface TsEvalResult {
+  readonly config: EvalConfig;
+  readonly filePath: string;
+}
+
+/**
+ * Import a *.eval.ts file and extract the EvalConfig export.
+ * Tries default, `config`, and `evalConfig` named exports in priority order.
+ */
+export async function loadTsEvalFile(filePath: string): Promise<TsEvalResult> {
+  const absolutePath = path.resolve(filePath);
+  const moduleUrl = pathToFileURL(absolutePath).href;
+  const module = await import(moduleUrl);
+
+  let config: EvalConfig | undefined;
+  for (const name of EXPORT_NAMES) {
+    const candidate = module[name];
+    if (isEvalConfigLike(candidate)) {
+      config = candidate;
+      break;
+    }
+  }
+
+  if (!config) {
+    throw new Error(
+      `${filePath}: no EvalConfig export found. Export an EvalConfig as default, 'config', or 'evalConfig'.`,
+    );
+  }
+
+  return { config, filePath: absolutePath };
+}
+
+/**
+ * Duck-type check for EvalConfig-like objects.
+ * An EvalConfig must have at least one of: tests, specFile, or target.
+ */
+function isEvalConfigLike(value: unknown): value is EvalConfig {
+  if (!value || typeof value !== 'object') return false;
+  const obj = value as Record<string, unknown>;
+  return 'tests' in obj || 'specFile' in obj || 'target' in obj || 'task' in obj;
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 2719b941c..e809da5ea 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -11,6 +11,10 @@ export {
   type AgentVConfig as AgentVYamlConfig,
   type ResultsExportConfig,
 } from './evaluation/loaders/config-loader.js';
+export {
+  loadTsEvalFile,
+  type TsEvalResult,
+} from './evaluation/loaders/ts-eval-loader.js';
 export {
   transpileEvalYaml,
   transpileEvalYamlFile,
diff --git a/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts b/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts
new file mode 100644
index 000000000..13c449055
--- /dev/null
+++ b/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts
@@ -0,0 +1,14 @@
+import type { EvalConfig } from '../../../../src/evaluation/evaluate.js';
+
+const config: EvalConfig = {
+  tests: [
+    {
+      id: 'greeting',
+      input: 'Say hello',
+      assert: [{ type: 'contains', value: 'hello' }],
+    },
+  ],
+  target: { provider: 'mock_agent' },
+};
+
+export default config;
diff --git a/packages/core/test/evaluation/loaders/fixtures/eval-config-named.eval.ts b/packages/core/test/evaluation/loaders/fixtures/eval-config-named.eval.ts
new file mode 100644
index 000000000..2c74e72e0
--- /dev/null
+++ b/packages/core/test/evaluation/loaders/fixtures/eval-config-named.eval.ts
@@ -0,0 +1,12 @@
+import type { EvalConfig } from '../../../../src/evaluation/evaluate.js';
+
+export const evalConfig: EvalConfig = {
+  tests: [
+    {
+      id: 'eval-config-named',
+      input: 'Say hello',
+      assert: [{ type: 'contains', value: 'hello' }],
+    },
+  ],
+  target: { provider: 'mock_agent' },
+};
diff --git a/packages/core/test/evaluation/loaders/fixtures/named-config.eval.ts b/packages/core/test/evaluation/loaders/fixtures/named-config.eval.ts
new file mode 100644
index 000000000..8dfb9f81c
--- /dev/null
+++ b/packages/core/test/evaluation/loaders/fixtures/named-config.eval.ts
@@ -0,0 +1,12 @@
+import type { EvalConfig } from '../../../../src/evaluation/evaluate.js';
+
+export const config: EvalConfig = {
+  tests: [
+    {
+      id: 'named-config',
+      input: 'Say hello',
+      assert: [{ type: 'contains', value: 'hello' }],
+    },
+  ],
+  target: { provider: 'mock_agent' },
+};
diff --git a/packages/core/test/evaluation/loaders/fixtures/no-config.eval.ts b/packages/core/test/evaluation/loaders/fixtures/no-config.eval.ts
new file mode 100644
index 000000000..ee9eb65c0
--- /dev/null
+++ b/packages/core/test/evaluation/loaders/fixtures/no-config.eval.ts
@@ -0,0 +1,2 @@
+// This file has no EvalConfig export — should cause loadTsEvalFile to throw.
+export const greeting = 'hello';
diff --git a/packages/core/test/evaluation/loaders/jsonl-parser.test.ts b/packages/core/test/evaluation/loaders/jsonl-parser.test.ts
index 1285d91b5..deb73ae68 100644
--- a/packages/core/test/evaluation/loaders/jsonl-parser.test.ts
+++ b/packages/core/test/evaluation/loaders/jsonl-parser.test.ts
@@ -27,6 +27,15 @@ describe('detectFormat', () => {
     expect(detectFormat('/path/to/evals.json')).toBe('agent-skills-json');
   });
 
+  it('returns typescript for .ts extension', () => {
+    expect(detectFormat('greeting.eval.ts')).toBe('typescript');
+    expect(detectFormat('/path/to/eval.ts')).toBe('typescript');
+  });
+
+  it('returns typescript for .mts extension', () => {
+    expect(detectFormat('greeting.eval.mts')).toBe('typescript');
+  });
+
   it('throws for unsupported extensions', () => {
     expect(() => detectFormat('test.txt')).toThrow('Unsupported file format');
     expect(() => detectFormat('test')).toThrow('Unsupported file format');
diff --git a/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts b/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts
new file mode 100644
index 000000000..0322cd495
--- /dev/null
+++ b/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts
@@ -0,0 +1,39 @@
+import { describe, expect, it } from 'bun:test';
+import path from 'node:path';
+
+import { loadTsEvalFile } from '../../../src/evaluation/loaders/ts-eval-loader.js';
+
+const fixtureDir = path.join(import.meta.dir, 'fixtures');
+
+describe('loadTsEvalFile', () => {
+  it('loads default export', async () => {
+    const result = await loadTsEvalFile(path.join(fixtureDir, 'default-export.eval.ts'));
+    expect(result.config).toBeDefined();
+    expect(result.config.tests).toHaveLength(1);
+    expect(result.config.tests?.[0].id).toBe('greeting');
+  });
+
+  it('loads named "config" export', async () => {
+    const result = await loadTsEvalFile(path.join(fixtureDir, 'named-config.eval.ts'));
+    expect(result.config).toBeDefined();
+    expect(result.config.tests?.[0].id).toBe('named-config');
+  });
+
+  it('loads named "evalConfig" export', async () => {
+    const result = await loadTsEvalFile(path.join(fixtureDir, 'eval-config-named.eval.ts'));
+    expect(result.config).toBeDefined();
+    expect(result.config.tests?.[0].id).toBe('eval-config-named');
+  });
+
+  it('throws when no EvalConfig export found', async () => {
+    await expect(loadTsEvalFile(path.join(fixtureDir, 'no-config.eval.ts'))).rejects.toThrow(
+      'no EvalConfig export found',
+    );
+  });
+
+  it('returns absolute file path', async () => {
+    const result = await loadTsEvalFile(path.join(fixtureDir, 'default-export.eval.ts'));
+    expect(path.isAbsolute(result.filePath)).toBe(true);
+    expect(result.filePath).toContain('default-export.eval.ts');
+  });
+});

From 7864abe2f94a180e81596bdf66507b9a43caf137 Mon Sep 17 00:00:00 2001
From: Christopher <christso@gmail.com>
Date: Thu, 16 Apr 2026 03:27:27 +0000
Subject: [PATCH 2/2] refactor(cli): unify TypeScript eval loading with suite
 pipeline (#1116)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 apps/cli/src/commands/eval/run-eval.ts        | 123 ++++----
 apps/cli/src/commands/eval/shared.ts          |  10 +-
 apps/cli/test/commands/eval/shared.test.ts    |  14 +-
 packages/core/src/evaluation/evaluate.ts      | 284 ++++++++++++------
 .../src/evaluation/loaders/ts-eval-loader.ts  |  48 ++-
 packages/core/src/evaluation/yaml-parser.ts   |  13 +
 .../evaluate-programmatic-api.test.ts         |  12 +
 .../loaders/fixtures/default-export.eval.ts   |  10 +-
 .../evaluation/loaders/ts-eval-loader.test.ts |  25 ++
 9 files changed, 384 insertions(+), 155 deletions(-)

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 5e84d40df..a8d0a73e3 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -17,11 +17,10 @@ import {
   runEvaluation as defaultRunEvaluation,
   deriveCategory,
   ensureVSCodeSubagents,
-  evaluate,
   loadConfig,
   loadTestSuite,
   loadTsConfig,
-  loadTsEvalFile,
+  resolveTargetDefinition,
   shouldEnableCache,
   shouldSkipCacheForTemperature,
   subscribeToCodexLogEntries,
@@ -533,6 +532,9 @@ async function prepareFileMetadata(params: {
   readonly failOnError?: FailOnError;
   readonly threshold?: number;
   readonly tags?: readonly string[];
+  readonly providerFactory?: (
+    target: import('@agentv/core').ResolvedTarget,
+  ) => import('@agentv/core').Provider;
 }> {
   const { testFilePath, repoRoot, cwd, options } = params;
 
@@ -576,6 +578,54 @@ async function prepareFileMetadata(params: {
         inlineTargetLabel: `transcript (${path.basename(options.transcript)})`,
       },
     ];
+  } else if (suite.inlineTarget && options.cliTargets.length === 0) {
+    const targetDefinition = suite.inlineTarget;
+    const resolvedTarget = options.dryRun
+      ? ({
+          kind: 'mock',
+          name: `${targetDefinition.name}-dry-run`,
+          graderTarget: undefined,
+          config: {
+            response: '{"answer":"Mock dry-run response"}',
+            delayMs: options.dryRunDelay,
+            delayMinMs: options.dryRunDelayMin,
+            delayMaxMs: options.dryRunDelayMax,
+          },
+        } satisfies ResolvedTarget)
+      : resolveTargetDefinition(targetDefinition, process.env, testFilePath, {
+          emitDeprecationWarnings: false,
+        });
+    selections = [
+      {
+        selection: {
+          definitions: [targetDefinition],
+          resolvedTarget,
+          targetName: targetDefinition.name,
+          targetSource: 'test-file',
+          targetsFilePath: testFilePath,
+        },
+        inlineTargetLabel: resolveTargetLabel(targetDefinition.name, resolvedTarget.name),
+      },
+    ];
+  } else if (suite.providerFactory && options.cliTargets.length === 0) {
+    const taskTarget: ResolvedTarget = {
+      kind: 'mock',
+      name: 'custom-task',
+      graderTarget: undefined,
+      config: {},
+    };
+    selections = [
+      {
+        selection: {
+          definitions: [],
+          resolvedTarget: taskTarget,
+          targetName: 'custom-task',
+          targetSource: 'test-file',
+          targetsFilePath: testFilePath,
+        },
+        inlineTargetLabel: 'custom-task',
+      },
+    ];
   } else {
     // Determine target names: CLI --target flags override YAML
     const cliTargets = options.cliTargets;
@@ -660,6 +710,7 @@ async function prepareFileMetadata(params: {
     failOnError: suite.failOnError,
     threshold: suite.threshold,
     tags: suite.metadata?.tags,
+    providerFactory: suite.providerFactory,
   };
 }
 
@@ -1172,21 +1223,12 @@ export async function runEvalCommand(
       readonly failOnError?: FailOnError;
       readonly threshold?: number;
       readonly tags?: readonly string[];
+      readonly providerFactory?: (
+        target: import('@agentv/core').ResolvedTarget,
+      ) => import('@agentv/core').Provider;
     }
   >();
-  // Separate TypeScript eval files from YAML/JSONL files.
-  // TS files export an EvalConfig and run through evaluate().
-  const tsFiles: string[] = [];
-  const yamlFiles: string[] = [];
   for (const testFilePath of resolvedTestFiles) {
-    if (/\.(ts|mts)$/.test(testFilePath)) {
-      tsFiles.push(testFilePath);
-    } else {
-      yamlFiles.push(testFilePath);
-    }
-  }
-
-  for (const testFilePath of yamlFiles) {
     const meta = await prepareFileMetadata({
       testFilePath,
       repoRoot,
@@ -1277,7 +1319,7 @@ export async function runEvalCommand(
     }
   }
 
-  if (totalEvalCount === 0 && tsFiles.length === 0) {
+  if (totalEvalCount === 0) {
     // When using --retry-errors, all tests being filtered means no errors or missing cases remain
     if (options.retryErrors && retryNonErrorResults && retryNonErrorResults.length > 0) {
       console.log('No execution errors or missing cases in the previous run. Nothing to retry.');
@@ -1345,7 +1387,7 @@ export async function runEvalCommand(
     }
   }
 
-  // Use only files that survived tag filtering (fileMetadata keys) — TS files are processed separately above
+  // Use only files that survived tag filtering.
   const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
 
   // --transcript: create a shared TranscriptProvider and validate entry count
@@ -1377,53 +1419,6 @@ export async function runEvalCommand(
   // This matches industry practice (promptfoo, deepeval, OpenAI Evals) and avoids cross-file
   // workspace races without any grouping complexity.
   try {
-    // Process TypeScript eval files through evaluate() with CLI overrides.
-    // Results flow through the same output/artifact pipeline as YAML evals.
-    // Note: TS eval files don't carry tags; they're skipped when --tag/--exclude-tag is active.
-    const tsFilesToRun = hasTagFilters
-      ? (() => {
-          if (tsFiles.length > 0 && options.verbose) {
-            console.log(
-              `Skipped ${tsFiles.length} TS eval file(s) — tag filters don't apply to *.eval.ts files.`,
-            );
-          }
-          return [] as string[];
-        })()
-      : tsFiles;
-
-    for (const tsFile of tsFilesToRun) {
-      await ensureFileExists(tsFile, 'TypeScript eval file');
-      const { config: tsConfig } = await loadTsEvalFile(tsFile);
-
-      const cliOverrides: Record<string, unknown> = {};
-      if (options.workers !== undefined) cliOverrides.workers = options.workers;
-      if (options.filter) cliOverrides.filter = options.filter;
-      if (resolvedThreshold !== undefined) cliOverrides.threshold = resolvedThreshold;
-      if (options.cache !== undefined) cliOverrides.cache = options.cache;
-      if (options.verbose !== undefined) cliOverrides.verbose = options.verbose;
-      if (options.maxRetries !== 2) cliOverrides.maxRetries = options.maxRetries;
-      if (options.agentTimeoutSeconds !== undefined) {
-        cliOverrides.agentTimeoutMs = options.agentTimeoutSeconds * 1000;
-      }
-
-      console.log(`Running TS eval: ${path.relative(cwd, tsFile)}`);
-
-      const evalResult = await evaluate({
-        ...tsConfig,
-        ...cliOverrides,
-        onResult: (result: EvaluationResult) => {
-          outputWriter.append(result);
-          tsConfig.onResult?.(result);
-        },
-      });
-
-      allResults.push(...evalResult.results);
-      remoteEvalSummaries.push({
-        evalFile: path.relative(cwd, tsFile),
-        results: [...evalResult.results],
-      });
-    }
-
     for (const testFilePath of activeTestFiles) {
       const targetPrep = fileMetadata.get(testFilePath);
       if (!targetPrep) {
@@ -1479,7 +1474,7 @@ export async function runEvalCommand(
               budgetUsd: targetPrep.budgetUsd,
               failOnError: targetPrep.failOnError,
               threshold: resolvedThreshold,
-              providerFactory: transcriptProviderFactory,
+              providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory,
             });
             const evalFile = path.relative(cwd, testFilePath);
             const existingSummary = remoteEvalSummaries.find(
diff --git a/apps/cli/src/commands/eval/shared.ts b/apps/cli/src/commands/eval/shared.ts
index 7570a2e92..3e1c7fc3d 100644
--- a/apps/cli/src/commands/eval/shared.ts
+++ b/apps/cli/src/commands/eval/shared.ts
@@ -34,7 +34,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       : path.resolve(cwd, pattern);
     try {
       const stats = await stat(candidatePath);
-      if (stats.isFile() && /\.(ya?ml|jsonl|json|ts)$/i.test(candidatePath)) {
+      if (stats.isFile() && /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(candidatePath)) {
         results.add(candidatePath);
         continue;
       }
@@ -42,7 +42,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
         // Auto-expand directory to recursive eval file glob
         const dirGlob = path.posix.join(
           candidatePath.replace(/\\/g, '/'),
-          '**/*.eval.{yaml,yml,ts}',
+          '**/{*.eval.yaml,*.eval.yml,eval.yaml,eval.yml,*.eval.ts,*.eval.mts}',
         );
         const dirMatches = await fg(dirGlob, {
           absolute: true,
@@ -72,7 +72,9 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
       ignore: ignorePatterns,
     });
 
-    const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json|ts)$/i.test(filePath));
+    const yamlMatches = matches.filter((filePath) =>
+      /\.(ya?ml|jsonl|json|[cm]?ts)$/i.test(filePath),
+    );
     for (const filePath of yamlMatches) {
       results.add(path.normalize(filePath));
     }
@@ -97,7 +99,7 @@ export async function resolveEvalPaths(evalPaths: string[], cwd: string): Promis
     throw new Error(
       `No eval files matched any provided paths or globs: ${includePatterns.join(
         ', ',
-      )}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.eval.ts").`,
+      )}. Provide YAML, JSONL, JSON, or TypeScript paths or globs (e.g., "evals/**/eval.yaml", "evals/**/*.eval.ts").`,
     );
   }
 
diff --git a/apps/cli/test/commands/eval/shared.test.ts b/apps/cli/test/commands/eval/shared.test.ts
index ed0e5e494..52a20ce4a 100644
--- a/apps/cli/test/commands/eval/shared.test.ts
+++ b/apps/cli/test/commands/eval/shared.test.ts
@@ -77,6 +77,15 @@ describe('resolveEvalPaths', () => {
     expect(resolved).toEqual([path.normalize(tsFile)]);
   });
 
+  it('accepts a direct .mts file path', async () => {
+    const tsFile = path.join(tempDir, 'custom.eval.mts');
+    writeFileSync(tsFile, 'export default { tests: [] }');
+
+    const resolved = await resolveEvalPaths([tsFile], tempDir);
+
+    expect(resolved).toEqual([path.normalize(tsFile)]);
+  });
+
   it('accepts a direct .ts file path', async () => {
     const tsFile = path.join(tempDir, 'custom.eval.ts');
     writeFileSync(tsFile, 'export default { tests: [] }');
@@ -91,14 +100,17 @@ describe('resolveEvalPaths', () => {
     mkdirSync(evalDir, { recursive: true });
 
     const yamlFile = path.join(evalDir, 'suite.eval.yaml');
+    const evalYamlFile = path.join(evalDir, 'eval.yaml');
     const tsFile = path.join(evalDir, 'suite.eval.ts');
     writeFileSync(yamlFile, 'tests:\n  - id: sample\n    input: test\n');
+    writeFileSync(evalYamlFile, 'tests:\n  - id: sample2\n    input: test\n');
     writeFileSync(tsFile, 'export default { tests: [] }');
 
     const resolved = await resolveEvalPaths([tempDir], tempDir);
 
     expect(resolved).toContain(path.normalize(yamlFile));
+    expect(resolved).toContain(path.normalize(evalYamlFile));
     expect(resolved).toContain(path.normalize(tsFile));
-    expect(resolved).toHaveLength(2);
+    expect(resolved).toHaveLength(3);
   });
 });
diff --git a/packages/core/src/evaluation/evaluate.ts b/packages/core/src/evaluation/evaluate.ts
index ff59670d3..328930b77 100644
--- a/packages/core/src/evaluation/evaluate.ts
+++ b/packages/core/src/evaluation/evaluate.ts
@@ -58,12 +58,15 @@
 
 import { existsSync } from 'node:fs';
 import path from 'node:path';
+import micromatch from 'micromatch';
 import { buildDirectoryChain, findGitRoot } from './file-utils.js';
 
 import type { AssertFn } from './assertions.js';
 import { DEFAULT_THRESHOLD } from './graders/scoring.js';
+import type { EvalMetadata } from './metadata.js';
 import { runEvaluation } from './orchestrator.js';
 import { createFunctionProvider } from './providers/function-provider.js';
+import type { ProviderFactoryFn } from './providers/provider-registry.js';
 import { readTargetDefinitions } from './providers/targets-file.js';
 import { type ResolvedTarget, resolveTargetDefinition } from './providers/targets.js';
 import type { TargetDefinition } from './providers/types.js';
@@ -77,7 +80,7 @@ import type {
   InlineAssertEvaluatorConfig,
   WorkspaceHookConfig,
 } from './types.js';
-import { loadTests } from './yaml-parser.js';
+import { loadTestSuite } from './yaml-parser.js';
 
 /**
  * Inline test definition for the programmatic API.
@@ -170,6 +173,8 @@ export interface EvalConfig {
   readonly task?: (input: string) => string | Promise<string>;
   /** Suite-level assertions applied to all tests */
   readonly assert?: readonly AssertEntry[];
+  /** Optional suite metadata used by CLI discovery, tagging, and reporting. */
+  readonly metadata?: EvalMetadata;
   /** Filter tests by ID pattern(s) (glob supported). Arrays use OR logic. */
   readonly filter?: string | readonly string[];
   /** Maximum concurrent workers (default: 3) */
@@ -192,6 +197,19 @@ export interface EvalConfig {
   readonly budgetUsd?: number;
 }
 
+export interface MaterializedEvalConfig {
+  readonly testFilePath: string;
+  readonly tests: readonly EvalTest[];
+  readonly workers?: number;
+  readonly cache?: boolean;
+  readonly budgetUsd?: number;
+  readonly threshold?: number;
+  readonly metadata?: EvalMetadata;
+  readonly target?: TargetDefinition;
+  readonly task?: (input: string) => string | Promise<string>;
+  readonly providerFactory?: ProviderFactoryFn;
+}
+
 /**
  * Summary statistics for an evaluation run.
  */
@@ -269,19 +287,22 @@ export async function evaluate(config: EvalConfig): Promise<EvalRunResult> {
   const gitRoot = await findGitRoot(process.cwd());
   const repoRoot = gitRoot ?? process.cwd();
 
-  const testFilePath = config.specFile
-    ? path.resolve(config.specFile)
-    : path.join(process.cwd(), '__programmatic__.yaml');
+  const materialized = await materializeEvalConfig(config, {
+    repoRoot,
+    baseDir: process.cwd(),
+  });
+  const testFilePath = materialized.testFilePath;
 
   // Load .env files from the eval file hierarchy so nested eval-local .env
   // files participate even when the command is launched from a parent folder.
   await loadEnvHierarchy(repoRoot, testFilePath);
 
   let resolvedTarget: ResolvedTarget;
-  let taskProvider: ReturnType<typeof createFunctionProvider> | undefined;
-  if (config.task) {
-    // Wrap task function as a Provider
-    taskProvider = createFunctionProvider(config.task);
+  let providerFactory: ProviderFactoryFn | undefined;
+  if (config.task || materialized.providerFactory) {
+    providerFactory = config.task
+      ? () => createFunctionProvider(config.task as (input: string) => string | Promise<string>)
+      : materialized.providerFactory;
     resolvedTarget = {
       kind: 'mock',
       name: 'custom-task',
@@ -292,100 +313,29 @@ export async function evaluate(config: EvalConfig): Promise<EvalRunResult> {
     let targetDef: TargetDefinition;
     if (config.target) {
       targetDef = config.target;
+    } else if (materialized.target) {
+      targetDef = materialized.target;
     } else {
       targetDef = (await discoverDefaultTarget(repoRoot)) ?? { name: 'default', provider: 'mock' };
     }
     resolvedTarget = resolveTargetDefinition(targetDef);
   }
 
-  let evalCases: readonly EvalTest[] | EvalTest[];
-
-  if (config.specFile) {
-    // File-based mode: load from YAML
-    evalCases = await loadTests(testFilePath, repoRoot, {
-      verbose: config.verbose,
-      filter: config.filter,
-    });
-  } else {
-    // Build workspace config with before_all hook if beforeAll is provided
-    const suiteWorkspace = config.beforeAll
-      ? { hooks: { before_all: toBeforeAllHook(config.beforeAll) } }
-      : undefined;
-
-    // Inline mode: convert EvalTestInput[] to EvalTest[]
-    evalCases = (config.tests ?? []).map((test): EvalTest => {
-      // Conversation mode: use turns[] for input/question derivation
-      const isConversation = test.mode === 'conversation' || (test.turns && test.turns.length > 0);
-
-      if (!isConversation && !test.input) {
-        throw new Error(`Test '${test.id}': input is required for non-conversation tests`);
-      }
-
-      const input = isConversation
-        ? toMessageArray(test.turns?.[0]?.input ?? '')
-        : toMessageArray(test.input ?? '');
-
-      const question = isConversation
-        ? extractQuestion(test.turns?.[0]?.input ?? '')
-        : extractQuestion(test.input ?? '');
-
-      const expectedOutputValue = test.expectedOutput ?? test.expected_output;
-      const expectedOutput = expectedOutputValue
-        ? ([
-            { role: 'assistant' as const, content: expectedOutputValue },
-          ] as EvalTest['expected_output'])
-        : [];
-
-      // Convert inline assertions to evaluator config format
-      const allAssertions = [...(test.assert ?? []), ...(config.assert ?? [])];
-      const assertConfigs = convertAssertions(allAssertions);
-
-      // Convert conversation turns if present — keep input/expected_output as
-      // TestMessageContent (matching YAML parser behavior), not wrapped in message arrays.
-      const turns: ConversationTurn[] | undefined = test.turns?.map((turn) => {
-        const turnExpected = turn.expectedOutput ?? turn.expected_output;
-        return {
-          input: turn.input as ConversationTurn['input'],
-          ...(turnExpected !== undefined && {
-            expected_output: turnExpected as ConversationTurn['expected_output'],
-          }),
-          assertions: turn.assert ? convertAssertions([...turn.assert]) : undefined,
-        };
-      });
-
-      return {
-        id: test.id,
-        criteria: test.criteria ?? '',
-        question: String(question),
-        input,
-        expected_output: expectedOutput,
-        reference_answer: expectedOutputValue,
-        file_paths: [],
-        assertions: assertConfigs.length > 0 ? assertConfigs : undefined,
-        metadata: test.metadata,
-        ...(suiteWorkspace && { workspace: suiteWorkspace }),
-        ...(isConversation && { mode: 'conversation' as const }),
-        ...(turns && { turns }),
-        ...(test.aggregation && { aggregation: test.aggregation }),
-      };
-    });
-  }
-
   const collectedResults: EvaluationResult[] = [];
 
   const results = await runEvaluation({
     testFilePath,
     repoRoot,
     target: resolvedTarget,
-    ...(taskProvider ? { providerFactory: () => taskProvider } : {}),
+    ...(providerFactory ? { providerFactory } : {}),
     maxRetries: config.maxRetries ?? 2,
     agentTimeoutMs: config.agentTimeoutMs,
     verbose: config.verbose,
     maxConcurrency: config.workers ?? 3,
     filter: config.filter,
     threshold: config.threshold,
-    evalCases,
-    ...(config.budgetUsd !== undefined && { budgetUsd: config.budgetUsd }),
+    evalCases: materialized.tests,
+    ...(materialized.budgetUsd !== undefined && { budgetUsd: materialized.budgetUsd }),
     onResult: async (result) => {
       collectedResults.push(result);
       config.onResult?.(result);
@@ -401,6 +351,62 @@ export async function evaluate(config: EvalConfig): Promise<EvalRunResult> {
   };
 }
 
+export async function materializeEvalConfig(
+  config: EvalConfig,
+  options?: {
+    readonly repoRoot?: string;
+    readonly baseDir?: string;
+    readonly filter?: string | readonly string[];
+    readonly category?: string;
+  },
+): Promise<MaterializedEvalConfig> {
+  const baseDir = options?.baseDir ?? process.cwd();
+  const repoRoot = options?.repoRoot ?? (await findGitRoot(baseDir)) ?? baseDir;
+  const testFilePath = config.specFile
+    ? path.resolve(baseDir, config.specFile)
+    : path.join(baseDir, '__programmatic__.yaml');
+  const effectiveFilter = options?.filter ?? config.filter;
+
+  if (config.specFile) {
+    const suite = await loadTestSuite(testFilePath, repoRoot, {
+      verbose: config.verbose,
+      filter: effectiveFilter,
+      category: options?.category,
+    });
+    const tests = applyProgrammaticSuiteOverrides(suite.tests, config);
+    return {
+      testFilePath,
+      tests,
+      workers: config.workers ?? suite.workers,
+      cache: config.cache ?? suite.cacheConfig?.enabled,
+      budgetUsd: config.budgetUsd ?? suite.budgetUsd,
+      threshold: config.threshold ?? suite.threshold,
+      metadata: config.metadata ?? suite.metadata,
+      target: config.target ?? suite.inlineTarget,
+      task: config.task,
+      providerFactory: suite.providerFactory,
+    };
+  }
+
+  const tests = buildInlineEvalTests(config, {
+    filter: effectiveFilter,
+    category: options?.category,
+    testFilePath,
+  });
+
+  return {
+    testFilePath,
+    tests,
+    workers: config.workers,
+    cache: config.cache,
+    budgetUsd: config.budgetUsd,
+    threshold: config.threshold,
+    metadata: config.metadata,
+    target: config.target,
+    task: config.task,
+  };
+}
+
 /**
  * Convert a flexible input (string or message array) to the internal TestMessage[] format.
  */
@@ -454,6 +460,116 @@ function convertAssertions(entries: readonly AssertEntry[]): GraderConfig[] {
   });
 }
 
+function buildInlineEvalTests(
+  config: EvalConfig,
+  options: {
+    readonly filter?: string | readonly string[];
+    readonly category?: string;
+    readonly testFilePath: string;
+  },
+): readonly EvalTest[] {
+  const suiteWorkspace = config.beforeAll
+    ? { hooks: { before_all: toBeforeAllHook(config.beforeAll) } }
+    : undefined;
+  const derivedSuiteName = path
+    .basename(options.testFilePath)
+    .replace(/\.eval\.[cm]?ts$/i, '')
+    .replace(/\.[cm]?ts$/i, '');
+  const suiteName = config.metadata?.name ?? (derivedSuiteName || 'eval');
+
+  return (config.tests ?? [])
+    .filter((test) => !options.filter || matchesFilter(test.id, options.filter))
+    .map((test): EvalTest => {
+      const isConversation = test.mode === 'conversation' || (test.turns && test.turns.length > 0);
+
+      if (!isConversation && !test.input) {
+        throw new Error(`Test '${test.id}': input is required for non-conversation tests`);
+      }
+
+      const input = isConversation
+        ? toMessageArray(test.turns?.[0]?.input ?? '')
+        : toMessageArray(test.input ?? '');
+
+      const question = isConversation
+        ? extractQuestion(test.turns?.[0]?.input ?? '')
+        : extractQuestion(test.input ?? '');
+
+      const expectedOutputValue = test.expectedOutput ?? test.expected_output;
+      const expectedOutput = expectedOutputValue
+        ? ([
+            { role: 'assistant' as const, content: expectedOutputValue },
+          ] as EvalTest['expected_output'])
+        : [];
+
+      const allAssertions = [...(test.assert ?? []), ...(config.assert ?? [])];
+      const assertConfigs = convertAssertions(allAssertions);
+      const turns: ConversationTurn[] | undefined = test.turns?.map((turn) => {
+        const turnExpected = turn.expectedOutput ?? turn.expected_output;
+        return {
+          input: turn.input as ConversationTurn['input'],
+          ...(turnExpected !== undefined && {
+            expected_output: turnExpected as ConversationTurn['expected_output'],
+          }),
+          assertions: turn.assert ? convertAssertions([...turn.assert]) : undefined,
+        };
+      });
+
+      return {
+        id: test.id,
+        suite: suiteName,
+        category: options.category,
+        criteria: test.criteria ?? '',
+        question: String(question),
+        input,
+        expected_output: expectedOutput,
+        reference_answer: expectedOutputValue,
+        file_paths: [],
+        assertions: assertConfigs.length > 0 ? assertConfigs : undefined,
+        metadata: test.metadata,
+        ...(suiteWorkspace && { workspace: suiteWorkspace }),
+        ...(isConversation && { mode: 'conversation' as const }),
+        ...(turns && { turns }),
+        ...(test.aggregation && { aggregation: test.aggregation }),
+      };
+    });
+}
+
+function applyProgrammaticSuiteOverrides(
+  tests: readonly EvalTest[],
+  config: EvalConfig,
+): readonly EvalTest[] {
+  if (!config.beforeAll && (!config.assert || config.assert.length === 0)) {
+    return tests;
+  }
+
+  const suiteWorkspace = config.beforeAll
+    ? { hooks: { before_all: toBeforeAllHook(config.beforeAll) } }
+    : undefined;
+  const suiteAssertions = config.assert ? convertAssertions(config.assert) : [];
+
+  return tests.map((test) => ({
+    ...test,
+    ...(suiteAssertions.length > 0 && {
+      assertions: [...(test.assertions ?? []), ...suiteAssertions],
+    }),
+    ...(suiteWorkspace && {
+      workspace: {
+        ...test.workspace,
+        hooks: {
+          ...test.workspace?.hooks,
+          ...(test.workspace?.hooks?.before_all ? {} : suiteWorkspace.hooks),
+        },
+      },
+    }),
+  }));
+}
+
+function matchesFilter(id: string, filter: string | readonly string[]): boolean {
+  return typeof filter === 'string'
+    ? micromatch.isMatch(id, filter)
+    : filter.some((pattern) => micromatch.isMatch(id, pattern));
+}
+
 /**
  * Map user-facing assertion type names to internal grader type names.
  * Handles snake_case to kebab-case normalization (e.g., 'llm_grader' -> 'llm-grader').
diff --git a/packages/core/src/evaluation/loaders/ts-eval-loader.ts b/packages/core/src/evaluation/loaders/ts-eval-loader.ts
index eb4946e3b..d406b90ef 100644
--- a/packages/core/src/evaluation/loaders/ts-eval-loader.ts
+++ b/packages/core/src/evaluation/loaders/ts-eval-loader.ts
@@ -11,7 +11,11 @@
  */
 import path from 'node:path';
 import { pathToFileURL } from 'node:url';
-import type { EvalConfig } from '../evaluate.js';
+import { type EvalConfig, materializeEvalConfig } from '../evaluate.js';
+import { createFunctionProvider } from '../providers/function-provider.js';
+import type { ProviderFactoryFn } from '../providers/provider-registry.js';
+import type { TargetDefinition } from '../providers/types.js';
+import type { EvalSuiteResult } from '../yaml-parser.js';
 
 const EXPORT_NAMES = ['default', 'config', 'evalConfig'] as const;
 
@@ -20,6 +24,11 @@ export interface TsEvalResult {
   readonly filePath: string;
 }
 
+export interface TsEvalSuiteResult extends EvalSuiteResult {
+  readonly inlineTarget?: TargetDefinition;
+  readonly providerFactory?: ProviderFactoryFn;
+}
+
 /**
  * Import a *.eval.ts file and extract the EvalConfig export.
  * Tries default, `config`, and `evalConfig` named exports in priority order.
@@ -47,6 +56,43 @@ export async function loadTsEvalFile(filePath: string): Promise<TsEvalResult> {
   return { config, filePath: absolutePath };
 }
 
+export async function loadTsEvalSuite(
+  filePath: string,
+  repoRoot: string,
+  options?: {
+    readonly verbose?: boolean;
+    readonly filter?: string | readonly string[];
+    readonly category?: string;
+  },
+): Promise<TsEvalSuiteResult> {
+  const { config, filePath: absolutePath } = await loadTsEvalFile(filePath);
+  const materialized = await materializeEvalConfig(config, {
+    repoRoot,
+    baseDir: path.dirname(absolutePath),
+    filter: options?.filter,
+    category: options?.category,
+  });
+
+  return {
+    tests: materialized.tests,
+    ...(materialized.workers !== undefined && { workers: materialized.workers }),
+    ...(materialized.cache !== undefined && { cacheConfig: { enabled: materialized.cache } }),
+    ...(materialized.budgetUsd !== undefined && { budgetUsd: materialized.budgetUsd }),
+    ...(materialized.threshold !== undefined && { threshold: materialized.threshold }),
+    ...(materialized.metadata !== undefined && { metadata: materialized.metadata }),
+    ...(materialized.target !== undefined && { inlineTarget: materialized.target }),
+    ...(materialized.task !== undefined && {
+      providerFactory: (() => {
+        const task = materialized.task;
+        if (!task) {
+          throw new Error(`${filePath}: missing task function for providerFactory`);
+        }
+        return createFunctionProvider(task);
+      }) as ProviderFactoryFn,
+    }),
+  };
+}
+
 /**
  * Duck-type check for EvalConfig-like objects.
  * An EvalConfig must have at least one of: tests, specFile, or target.
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index 928e73d5e..4d8cf057d 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -210,6 +210,10 @@ export type EvalSuiteResult = {
   readonly threshold?: number;
   /** Resolved workspace.path from the eval YAML (after env-var expansion), if set */
   readonly workspacePath?: string;
+  /** Inline target definition from a TS eval config. */
+  readonly inlineTarget?: import('./providers/types.js').TargetDefinition;
+  /** Custom provider factory from a TS eval config task(). */
+  readonly providerFactory?: import('./providers/provider-registry.js').ProviderFactoryFn;
 };
 
 /**
@@ -228,6 +232,10 @@ export async function loadTestSuite(
   if (format === 'agent-skills-json') {
     return { tests: await loadTestsFromAgentSkills(evalFilePath) };
   }
+  if (format === 'typescript') {
+    const { loadTsEvalSuite } = await import('./loaders/ts-eval-loader.js');
+    return loadTsEvalSuite(evalFilePath, resolveToAbsolutePath(repoRoot), options);
+  }
   const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
     evalFilePath,
     repoRoot,
@@ -267,6 +275,11 @@ export async function loadTests(
   if (format === 'agent-skills-json') {
     return loadTestsFromAgentSkills(evalFilePath);
   }
+  if (format === 'typescript') {
+    const { loadTsEvalSuite } = await import('./loaders/ts-eval-loader.js');
+    const suite = await loadTsEvalSuite(evalFilePath, resolveToAbsolutePath(repoRoot), options);
+    return suite.tests;
+  }
   const { tests } = await loadTestsFromYaml(evalFilePath, repoRoot, options);
   return tests;
 }
diff --git a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
index 9a91c9e6d..b8d32524d 100644
--- a/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
+++ b/packages/core/test/evaluation/evaluate-programmatic-api.test.ts
@@ -6,6 +6,7 @@
  */
 
 import { describe, expect, it } from 'bun:test';
+import path from 'node:path';
 import { evaluate } from '../../src/evaluation/evaluate.js';
 
 describe('evaluate() — programmatic API extensions', () => {
@@ -225,6 +226,17 @@ describe('evaluate() — programmatic API extensions', () => {
     expect(summary.passed).toBe(1);
   });
 
+  it('uses inline target from a TypeScript specFile', async () => {
+    const specFile = path.join(import.meta.dir, 'loaders', 'fixtures', 'default-export.eval.ts');
+
+    const { summary } = await evaluate({
+      specFile,
+    });
+
+    expect(summary.total).toBe(1);
+    expect(summary.passed).toBe(1);
+  });
+
   // ---------------------------------------------------------------------------
   // Validation
   // ---------------------------------------------------------------------------
diff --git a/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts b/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts
index 13c449055..df6bdcafd 100644
--- a/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts
+++ b/packages/core/test/evaluation/loaders/fixtures/default-export.eval.ts
@@ -1,6 +1,10 @@
 import type { EvalConfig } from '../../../../src/evaluation/evaluate.js';
 
 const config: EvalConfig = {
+  metadata: {
+    name: 'default-export-suite',
+    tags: ['sdk', 'typescript'],
+  },
   tests: [
     {
       id: 'greeting',
@@ -8,7 +12,11 @@ const config: EvalConfig = {
       assert: [{ type: 'contains', value: 'hello' }],
     },
   ],
-  target: { provider: 'mock_agent' },
+  workers: 2,
+  cache: false,
+  budgetUsd: 1.5,
+  threshold: 0.9,
+  target: { name: 'inline-target', provider: 'mock', response: 'hello there' },
 };
 
 export default config;
diff --git a/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts b/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts
index 0322cd495..8abfc74bd 100644
--- a/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts
+++ b/packages/core/test/evaluation/loaders/ts-eval-loader.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from 'bun:test';
 import path from 'node:path';
 
 import { loadTsEvalFile } from '../../../src/evaluation/loaders/ts-eval-loader.js';
+import { loadTestSuite, loadTests } from '../../../src/evaluation/yaml-parser.js';
 
 const fixtureDir = path.join(import.meta.dir, 'fixtures');
 
@@ -36,4 +37,28 @@ describe('loadTsEvalFile', () => {
     expect(path.isAbsolute(result.filePath)).toBe(true);
     expect(result.filePath).toContain('default-export.eval.ts');
   });
+
+  it('materializes a TS eval through loadTestSuite', async () => {
+    const suite = await loadTestSuite(path.join(fixtureDir, 'default-export.eval.ts'), fixtureDir, {
+      category: 'sdk',
+    });
+    expect(suite.tests).toHaveLength(1);
+    expect(suite.tests[0].suite).toBe('default-export-suite');
+    expect(suite.tests[0].category).toBe('sdk');
+    expect(suite.metadata?.tags).toEqual(['sdk', 'typescript']);
+    expect(suite.workers).toBe(2);
+    expect(suite.cacheConfig?.enabled).toBe(false);
+    expect(suite.budgetUsd).toBe(1.5);
+    expect(suite.threshold).toBe(0.9);
+    expect(suite.inlineTarget?.name).toBe('inline-target');
+  });
+
+  it('routes TypeScript evals through loadTests', async () => {
+    const tests = await loadTests(path.join(fixtureDir, 'default-export.eval.ts'), fixtureDir, {
+      category: 'sdk',
+    });
+    expect(tests).toHaveLength(1);
+    expect(tests[0].id).toBe('greeting');
+    expect(tests[0].category).toBe('sdk');
+  });
 });