diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index 79385fe81..013f29b69 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -183,7 +183,7 @@ async function parseEvaluatorList( const customTypeName = isCustomType ? typeValue : undefined; - // Auto-generate name for assertion types if not provided + // Auto-generate name from type if not provided const name = rawName ?? (isCustomType ? typeValue : generateAssertionName(typeValue as EvaluatorKind, rawEvaluator)); @@ -1539,32 +1539,12 @@ export async function parsePreprocessors( return preprocessors; } -/** Assertion evaluator types that support auto-generated names. */ -const ASSERTION_TYPES = new Set([ - 'skill-trigger', - 'contains', - 'contains-any', - 'contains-all', - 'icontains', - 'icontains-any', - 'icontains-all', - 'starts-with', - 'ends-with', - 'regex', - 'is-json', - 'equals', - 'rubrics', -]); - /** - * Generate a descriptive name for assertion-type evaluators when no explicit name is given. - * Returns undefined for non-assertion types (those still require an explicit name). + * Generate a descriptive name for evaluators when no explicit name is given. + * Returns the type name as a fallback so evaluators are never skipped just + * because the author omitted `name`. */ -function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): string | undefined { - if (!ASSERTION_TYPES.has(typeValue)) { - return undefined; - } - +function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): string { const value = asString(rawEvaluator.value); const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : undefined; @@ -1598,7 +1578,9 @@ function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): str case 'rubrics': return 'rubrics'; default: - return undefined; + // For all other evaluator types (llm-grader, code-grader, latency, etc.), + // use the type name itself as the auto-derived name. + return typeValue; } } diff --git a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts index 2f9248750..00a3002b1 100644 --- a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts +++ b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts @@ -1,4 +1,4 @@ -import { afterAll, beforeAll, describe, expect, it } from 'bun:test'; +import { afterAll, beforeAll, describe, expect, it, spyOn } from 'bun:test'; import { mkdir, rm, writeFile } from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; @@ -665,7 +665,7 @@ describe('parseEvaluators - kebab-case type normalization', () => { }); describe('parseEvaluators - score_ranges rubrics', () => { - it('parses valid score_ranges with required_min_score', async () => { + it('parses valid score_ranges with min_score', async () => { const rawEvalCase = { evaluators: [ { @@ -675,7 +675,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { { id: 'accuracy', weight: 2.0, - required_min_score: 7, + min_score: 0.7, score_ranges: [ { score_range: [0, 3], outcome: 'Incorrect' }, { score_range: [4, 6], outcome: 'Partially correct' }, @@ -698,6 +698,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { const rubric = config.rubrics?.[0]; expect(rubric?.id).toBe('accuracy'); expect(rubric?.weight).toBe(2.0); + expect(rubric?.min_score).toBe(0.7); expect(rubric?.required_min_score).toBe(7); expect(rubric?.score_ranges).toHaveLength(4); } @@ -752,6 +753,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { }); it('skips rubric items that use legacy description field without outcome', async () => { + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); const rawEvalCase = { evaluators: [ { @@ -777,6 +779,8 @@ describe('parseEvaluators - score_ranges rubrics', () => { // Rubric should be skipped since it has no 'outcome' field expect(config.rubrics ?? []).toHaveLength(0); } + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('missing outcome')); + warnSpy.mockRestore(); }); }); @@ -791,7 +795,7 @@ describe('parseEvaluators - score_ranges shorthand map', () => { { id: 'accuracy', weight: 2.0, - required_min_score: 7, + min_score: 0.7, score_ranges: { 0: 'Completely wrong', 3: 'Partially correct', @@ -813,6 +817,7 @@ describe('parseEvaluators - score_ranges shorthand map', () => { expect(config.rubrics).toHaveLength(1); const rubric = config.rubrics?.[0]; expect(rubric?.id).toBe('accuracy'); + expect(rubric?.min_score).toBe(0.7); expect(rubric?.required_min_score).toBe(7); expect(rubric?.score_ranges).toHaveLength(4); expect(rubric?.score_ranges?.[0]).toEqual({ @@ -1430,6 +1435,7 @@ describe('parseEvaluators - type: rubrics with criteria', () => { }); it('skips rubrics with empty criteria array', async () => { + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); const evaluators = await parseEvaluators( { assertions: [ @@ -1444,9 +1450,14 @@ describe('parseEvaluators - type: rubrics with criteria', () => { 'test-1', ); expect(evaluators).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('criteria must be a non-empty array'), + ); + warnSpy.mockRestore(); }); it('skips rubrics with missing criteria', async () => { + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); const evaluators = await parseEvaluators( { assertions: [ @@ -1460,6 +1471,10 @@ describe('parseEvaluators - type: rubrics with criteria', () => { 'test-1', ); expect(evaluators).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('criteria must be a non-empty array'), + ); + warnSpy.mockRestore(); }); it('supports string shorthand in criteria', async () => {