Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
cbe1ee7
v0.96.7
jp-agenta Apr 17, 2026
8589228
[feat] Persist evaluator app selection across sessions and commits
ardaerzin Apr 17, 2026
fc4037a
Merge branch 'main' into feature/age-3732-json-field-evaluator-playgr…
ardaerzin Apr 20, 2026
3aeac88
expand evaluator drawers by default when opened from evaluation contexts
ashrafchowdury Apr 20, 2026
e20ae4f
[feat] Add beautified JSON view mode with structured rendering for tr…
ardaerzin Apr 20, 2026
679be80
Merge branch 'main' into release/v0.96.7
bekossy Apr 20, 2026
9c1000e
Merge branch 'release/v0.96.7' into feature/age-3738-fix-rendered-jso…
bekossy Apr 20, 2026
b4c114d
Add 'Copy ID' in prompt page
jp-agenta Apr 20, 2026
1b12018
Fix random slug in human feedback in both the observability drawer an…
jp-agenta Apr 20, 2026
f5c6f3f
Merge branch 'release/v0.96.7' into fix/mislabeled-annotations
jp-agenta Apr 20, 2026
2e0a850
Fix corrupted simple queries and live evaluations
jp-agenta Apr 20, 2026
69bd8df
Fix Online/Live Evals copy
jp-agenta Apr 20, 2026
80e8493
Merge pull request #4192 from Agenta-AI/fix/mislabeled-annotations
jp-agenta Apr 20, 2026
b4c6a04
Merge pull request #4189 from Agenta-AI/feature/age-3738-fix-rendered…
ardaerzin Apr 20, 2026
3e87895
Merge branch 'release/v0.96.7' into feature/age-3732-json-field-evalu…
bekossy Apr 20, 2026
6c998a0
Fix updated Stripe field
jp-agenta Apr 20, 2026
7c8e1c0
Merge branch 'release/v0.96.7' into fix/issue-with-subscription-upgrade
jp-agenta Apr 20, 2026
eb380d7
Merge pull request #4193 from Agenta-AI/fix/issue-with-subscription-u…
jp-agenta Apr 20, 2026
5172569
persist testset selection in localStorage to restore state across dra…
ashrafchowdury Apr 20, 2026
839c4c9
feat: persist and restore exact testcase data in workflow revision dr…
ashrafchowdury Apr 20, 2026
a3643f6
fix: hide deployment options for evaluators
ashrafchowdury Apr 20, 2026
6746a99
Merge branch 'release/v0.96.7' into feature/age-3732-json-field-evalu…
ardaerzin Apr 20, 2026
6650fe0
Merge pull request #4181 from Agenta-AI/feature/age-3732-json-field-e…
ardaerzin Apr 20, 2026
b580001
fix style
ashrafchowdury Apr 20, 2026
a926dd2
Merge pull request #4195 from Agenta-AI/fix/annotation-style-action
ashrafchowdury Apr 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/ee/src/apis/fastapi/billing/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ async def create_checkout(
},
},
#
ui_mode="hosted",
ui_mode="hosted_page",
success_url=success_url,
)

Expand Down
2 changes: 1 addition & 1 deletion api/oss/src/core/annotations/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ async def create(
project_id=project_id,
user_id=user_id,
#
name=simple_evaluator.name if simple_evaluator else None,
name=simple_evaluator.slug if simple_evaluator else None,
#
flags=annotation_flags,
tags=annotation_create.tags,
Expand Down
127 changes: 127 additions & 0 deletions api/oss/src/core/evaluations/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,32 @@ def _first_reference_id(
return None


def _is_invocation_query(data: Any) -> bool:
"""Live evaluations require the query filter to target invocation traces.

Returns True only when the query's filtering contains a top-level
condition with field="trace_type", operator="is", value="invocation".
"""
filtering = getattr(data, "filtering", None)
if filtering is None:
return False

for condition in filtering.conditions or []:
field = getattr(condition, "field", None)
if field != "trace_type":
continue

operator = getattr(condition, "operator", None)
if operator != "is":
continue

value = getattr(condition, "value", None)
if value == "invocation":
return True

return False


class EvaluationsService:
def __init__(
self,
Expand Down Expand Up @@ -209,6 +235,22 @@ async def refresh_runs(
user_id = run.created_by_id

try:
if not await self._is_live_run_valid(
project_id=project_id,
run=run,
):
log.warning(
"[LIVE] Closing invalid live run (null data or non-invocation trace_type).",
project_id=project_id,
run_id=run.id,
)
await self._close_live_run(
project_id=project_id,
user_id=user_id,
run=run,
)
continue

log.info(
"[LIVE] Dispatching...",
project_id=project_id,
Expand Down Expand Up @@ -239,6 +281,71 @@ async def refresh_runs(

return True

async def _is_live_run_valid(
self,
*,
project_id: UUID,
run: EvaluationRun,
) -> bool:
"""Every query step must reference a revision with data targeting invocation traces."""
if not run.data or not run.data.steps:
return False

query_revision_ids: List[UUID] = []
for step in run.data.steps:
query_ref = (step.references or {}).get("query_revision")
if isinstance(query_ref, Reference) and query_ref.id:
query_revision_ids.append(query_ref.id)

if not query_revision_ids:
return False

for query_revision_id in query_revision_ids:
query_revision = await self.queries_service.fetch_query_revision(
project_id=project_id,
#
query_revision_ref=Reference(id=query_revision_id),
)

if not query_revision or not query_revision.data:
return False

if not _is_invocation_query(query_revision.data):
return False

return True

async def _close_live_run(
self,
*,
project_id: UUID,
user_id: UUID,
run: EvaluationRun,
) -> None:
flags = run.flags.model_copy() if run.flags else EvaluationRunFlags()
flags.is_active = False
flags.is_closed = True

await self.edit_run(
project_id=project_id,
user_id=user_id,
#
run=EvaluationRunEdit(
id=run.id,
#
name=run.name,
description=run.description,
#
flags=flags,
tags=run.tags,
meta=run.meta,
#
status=run.status,
#
data=run.data,
),
)

async def fetch_live_runs(
self,
*,
Expand Down Expand Up @@ -1706,6 +1813,8 @@ async def create(
evaluator_steps=evaluation.data.evaluator_steps,
#
repeats=evaluation.data.repeats,
#
is_live=evaluation.flags.is_live,
)

if not run_data:
Expand Down Expand Up @@ -1882,6 +1991,8 @@ async def edit(
evaluator_steps=evaluation.data.evaluator_steps,
#
repeats=_evaluation.data.repeats,
#
is_live=(_evaluation.flags.is_live if _evaluation.flags else None),
)

run_edit = EvaluationRunEdit(
Expand Down Expand Up @@ -2351,6 +2462,8 @@ async def _make_evaluation_run_data(
evaluator_steps: Optional[Target] = None,
#
repeats: Optional[int] = None,
#
is_live: Optional[bool] = None,
) -> Optional[EvaluationRunData]:
# IMPLICIT FLAG: is_multivariate=False
# IMPLICIT FLAG: all_inputs=True
Expand Down Expand Up @@ -2385,6 +2498,20 @@ async def _make_evaluation_run_data(
)
return None

if is_live and not query_revision.data:
log.warning(
"[EVAL] [run] [make] [failure] live evaluation requires query with data",
id=query_revision_ref.id,
)
return None

if is_live and not _is_invocation_query(query_revision.data):
log.warning(
"[EVAL] [run] [make] [failure] live evaluation requires trace_type=invocation",
id=query_revision_ref.id,
)
return None

query_variant_ref = Reference(id=query_revision.variant_id)

query_variant = await self.queries_service.fetch_query_variant(
Expand Down
31 changes: 30 additions & 1 deletion api/oss/src/core/queries/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,36 @@ async def create(
return None

# ----------------------------------------------------------------------
# Query revision
# Query revision (placeholder v0 — first revision has its fields nulled)
# ----------------------------------------------------------------------
placeholder_revision_slug = uuid4().hex[-12:]

_query_revision_create = QueryRevisionCreate(
slug=placeholder_revision_slug,
#
name=simple_query_create.name,
description=simple_query_create.description,
#
flags=simple_query_create.flags,
tags=simple_query_create.tags,
meta=simple_query_create.meta,
#
query_id=query.id,
query_variant_id=query_variant.id,
)

placeholder_revision = await self.queries_service.create_query_revision(
project_id=project_id,
user_id=user_id,
#
query_revision_create=_query_revision_create,
)

if placeholder_revision is None:
return None

# ----------------------------------------------------------------------
# Query revision (v1 — carries the actual data)
# ----------------------------------------------------------------------
query_revision_slug = uuid4().hex[-12:]

Expand Down
17 changes: 12 additions & 5 deletions api/oss/src/core/tracing/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,17 +1007,17 @@ async def _resolve_evaluator_references(

references.evaluator = Reference(
id=evaluator_revision.evaluator_id,
slug=(references.evaluator.slug if references.evaluator else None)
or (evaluator.slug if evaluator else None),
slug=(evaluator.slug if evaluator else None)
or (references.evaluator.slug if references.evaluator else None),
)
references.evaluator_variant = Reference(
id=evaluator_revision.evaluator_variant_id,
slug=(
slug=(evaluator_variant.slug if evaluator_variant else None)
or (
references.evaluator_variant.slug
if references.evaluator_variant
else None
)
or (evaluator_variant.slug if evaluator_variant else None),
),
)
references.evaluator_revision = Reference(
id=evaluator_revision.id,
Expand Down Expand Up @@ -1061,6 +1061,12 @@ async def create(
references=_references,
)

span_name = (
references.evaluator.slug
if references.evaluator and references.evaluator.slug
else "annotation"
)

otel_links = await self.tracing_service.create_trace(
organization_id=organization_id,
project_id=project_id,
Expand All @@ -1070,6 +1076,7 @@ async def create(
trace_id=trace_id,
span_id=span_id,
span_type=SpanType.TASK,
span_name=span_name,
attributes=_attributes,
links=_links,
)
Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "api"
version = "0.96.6"
version = "0.96.7"
description = "Agenta API"
authors = [
{ name = "Mahmoud Mabrouk", email = "mahmoud@agenta.ai" },
Expand Down
2 changes: 1 addition & 1 deletion sdk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "agenta"
version = "0.96.6"
version = "0.96.7"
description = "The SDK for agenta is an open-source LLMOps platform."
readme = "README.md"
authors = [
Expand Down
2 changes: 1 addition & 1 deletion services/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "services"
version = "0.96.6"
version = "0.96.7"
description = "Agenta Services (Chat & Completion)"
authors = [
"Mahmoud Mabrouk <mahmoud@agenta.ai>",
Expand Down
2 changes: 1 addition & 1 deletion web/ee/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@agenta/ee",
"version": "0.96.6",
"version": "0.96.7",
"private": true,
"engines": {
"node": ">=18"
Expand Down
2 changes: 1 addition & 1 deletion web/oss/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@agenta/oss",
"version": "0.96.6",
"version": "0.96.7",
"private": true,
"engines": {
"node": ">=18"
Expand Down
Loading
Loading