Skip to content

Commit 4cac035

Browse files
authored
feat(llmobs): add boolean evaluation metric support (#6919)
1 parent 19a7bfc commit 4cac035

File tree

4 files changed

+46
-5
lines changed

4 files changed

+46
-5
lines changed

docs/test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,12 @@ llmobs.trace({ kind: 'llm', name: 'myLLM' }, (span) => {
662662
tags: {},
663663
timestampMs: Date.now()
664664
})
665+
666+
llmobs.submitEvaluation(llmobsSpanCtx, {
667+
label: 'toxicity',
668+
metricType: 'boolean',
669+
value: 'true'
670+
})
665671
})
666672

667673
// annotate a span

index.d.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3011,15 +3011,15 @@ declare namespace tracer {
30113011
label: string,
30123012

30133013
/**
3014-
* The type of evaluation metric, one of 'categorical' or 'score'
3014+
* The type of evaluation metric, one of 'categorical', 'score', or 'boolean'
30153015
*/
3016-
metricType: 'categorical' | 'score',
3016+
metricType: 'categorical' | 'score' | 'boolean',
30173017

30183018
/**
30193019
* The value of the evaluation metric.
3020-
* Must be string for 'categorical' metrics and number for 'score' metrics.
3020+
* Must be string for 'categorical' metrics, number for 'score' metrics, and boolean for 'boolean' metrics.
30213021
*/
3022-
value: string | number,
3022+
value: string | number | boolean,
30233023

30243024
/**
30253025
* An object of string key-value pairs to tag the evaluation metric with.

packages/dd-trace/src/llmobs/sdk.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ class LLMObs extends NoopLLMObs {
369369
err = 'invalid_metric_label'
370370
throw new Error('label must be the specified name of the evaluation metric')
371371
}
372-
if (!metricType || !['categorical', 'score'].includes(metricType)) {
372+
if (!metricType || !['categorical', 'score', 'boolean'].includes(metricType)) {
373373
err = 'invalid_metric_type'
374374
throw new Error('metricType must be one of "categorical" or "score"')
375375
}
@@ -381,6 +381,10 @@ class LLMObs extends NoopLLMObs {
381381
err = 'invalid_metric_value'
382382
throw new Error('value must be a number for a score metric.')
383383
}
384+
if (metricType === 'boolean' && typeof value !== 'boolean') {
385+
err = 'invalid_metric_value'
386+
throw new Error('value must be a boolean for a boolean metric')
387+
}
384388

385389
const evaluationTags = {
386390
'ddtrace.version': tracerVersion,

packages/dd-trace/test/llmobs/sdk/index.spec.js

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const { expect } = require('chai')
44
const { channel } = require('dc-polyfill')
55
const { describe, it, beforeEach, afterEach, before, after } = require('mocha')
66
const sinon = require('sinon')
7+
const assert = require('node:assert')
78

89
const { getConfigFresh } = require('../../helpers/config')
910

@@ -1210,6 +1211,36 @@ describe('sdk', () => {
12101211
expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.have.property('timestamp_ms', 1234)
12111212
Date.now.restore()
12121213
})
1214+
1215+
it('submits a boolean evaluation metric', () => {
1216+
llmobs.submitEvaluation(spanCtx, {
1217+
label: 'has_toxicity',
1218+
metricType: 'boolean',
1219+
value: true,
1220+
timestampMs: 1234
1221+
})
1222+
1223+
const evalMetric = LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]
1224+
1225+
assert.deepEqual(evalMetric, {
1226+
span_id: '5678',
1227+
trace_id: '1234',
1228+
label: 'has_toxicity',
1229+
metric_type: 'boolean',
1230+
ml_app: 'mlApp',
1231+
boolean_value: true,
1232+
timestamp_ms: 1234,
1233+
tags: [`ddtrace.version:${tracerVersion}`, 'ml_app:mlApp']
1234+
})
1235+
})
1236+
1237+
it('throws an error when submitting a non-boolean boolean evaluation metric', () => {
1238+
assert.throws(() => llmobs.submitEvaluation(spanCtx, {
1239+
label: 'has_toxicity',
1240+
metricType: 'boolean',
1241+
value: 'it is super toxic!'
1242+
}), { message: 'value must be a boolean for a boolean metric' })
1243+
})
12131244
})
12141245

12151246
describe('flush', () => {

0 commit comments

Comments
 (0)